# 04 — Transfer Patterns
**Requires:** Run `01_load_and_filter.ipynb` first.

Where do Greek players go? Which leagues attract them? How have transfer fees evolved?

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-whitegrid')

OUTPUTS_PATH = "../outputs/"
FIGURES_PATH = "../outputs/figures/"

greek_transfers = pd.read_parquet(OUTPUTS_PATH + "greek_transfers.parquet")
clubs           = pd.read_parquet(OUTPUTS_PATH + "clubs.parquet")
greek_all       = pd.read_parquet(OUTPUTS_PATH + "greek_all.parquet")

print("greek_transfers:", greek_transfers.shape)

## 4.1 Basic Overview

In [None]:
print(greek_transfers.dtypes)
greek_transfers.head()

In [None]:
print(f"Total transfers:       {len(greek_transfers)}")
print(f"Unique players:        {greek_transfers['player_id'].nunique()}")
print(f"Transfers with a fee:  {greek_transfers['transfer_fee'].notna().sum()}")
print(f"Free transfers:        {(greek_transfers['transfer_fee'] == 0).sum()}")

## 4.2 Join with Clubs → Get Destination League

In [None]:
greek_transfers_enriched = greek_transfers.merge(
    clubs[["club_id", "domestic_competition_id"]].rename(
        columns={"domestic_competition_id": "to_league"}),
    left_on="to_club_id",
    right_on="club_id",
    how="left"
).merge(
    clubs[["club_id", "domestic_competition_id"]].rename(
        columns={"domestic_competition_id": "from_league", "club_id": "club_id_from"}),
    left_on="from_club_id",
    right_on="club_id_from",
    how="left"
)

greek_transfers_enriched["year"] = greek_transfers_enriched["transfer_date"].dt.year
print("Shape after joins:", greek_transfers_enriched.shape)
greek_transfers_enriched[["player_name", "from_club_name", "to_club_name",
                           "from_league", "to_league", "transfer_fee", "year"]].head()

## 4.3 Top Destination Leagues

In [None]:
top_leagues = greek_transfers_enriched["to_league"].value_counts().head(12)

plt.figure(figsize=(10, 5))
top_leagues.plot(kind="bar", color="steelblue", edgecolor="black")
plt.title("Top Destination Leagues for Greek Players")
plt.xlabel("League")
plt.ylabel("Number of Transfers")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(FIGURES_PATH + "04_destination_leagues.png", dpi=150)
plt.show()

## 4.4 Transfer Flow: From Where → To Where

In [None]:
# Top routes (from_league → to_league)
routes = (
    greek_transfers_enriched
    .groupby(["from_league", "to_league"])
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
    .head(15)
)
routes["route"] = routes["from_league"] + " → " + routes["to_league"]

plt.figure(figsize=(10, 6))
plt.barh(routes["route"], routes["count"], color="coral", edgecolor="black")
plt.title("Top 15 Transfer Routes for Greek Players")
plt.xlabel("Number of Transfers")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig(FIGURES_PATH + "04_transfer_routes.png", dpi=150)
plt.show()

## 4.5 Transfer Fees Over Time

In [None]:
fee_by_year = (
    greek_transfers_enriched
    .dropna(subset=["transfer_fee"])
    [greek_transfers_enriched["transfer_fee"] > 0]  # exclude free transfers
    .groupby("year")["transfer_fee"]
    .agg(total="sum", count="count", mean="mean")
    / 1e6
)

fig, axes = plt.subplots(1, 3, figsize=(16, 4))

fee_by_year["total"].plot(ax=axes[0], marker="o", color="green")
axes[0].set_title("Total Fees per Year (€M)")

fee_by_year["count"].plot(ax=axes[1], marker="o", color="steelblue")
axes[1].set_title("Number of Paid Transfers per Year")

fee_by_year["mean"].plot(ax=axes[2], marker="o", color="orange")
axes[2].set_title("Avg Transfer Fee per Year (€M)")

plt.tight_layout()
plt.savefig(FIGURES_PATH + "04_fees_over_time.png", dpi=150)
plt.show()

## 4.6 Most Transferred Greek Players

In [None]:
most_transferred = (
    greek_transfers_enriched.groupby("player_id")
    .agg(
        transfers      = ("player_name", "count"),
        player_name    = ("player_name",  "first"),
        total_fees     = ("transfer_fee", "sum"),
    )
    .sort_values("transfers", ascending=False)
    .head(10)
    .reset_index(drop=True)
)

most_transferred["total_fees_M"] = most_transferred["total_fees"] / 1e6
most_transferred[["player_name", "transfers", "total_fees_M"]]