# 01 â€” Load & Filter
Load all raw CSVs, filter Greek players, split active vs retired, and save intermediate files for the next notebooks.

In [None]:
import pandas as pd

DATA_PATH    = "../data/"
OUTPUTS_PATH = "../outputs/"

## Load CSVs

In [None]:
players     = pd.read_csv(DATA_PATH + "players.csv", parse_dates=["date_of_birth"])
valuations  = pd.read_csv(DATA_PATH + "player_valuations.csv", parse_dates=["date"])
transfers   = pd.read_csv(DATA_PATH + "transfers.csv", parse_dates=["transfer_date"])
clubs       = pd.read_csv(DATA_PATH + "clubs.csv")
appearances = pd.read_csv(DATA_PATH + "appearances.csv", parse_dates=["date"])

print("players:    ", players.shape)
print("valuations: ", valuations.shape)
print("transfers:  ", transfers.shape)
print("clubs:      ", clubs.shape)
print("appearances:", appearances.shape)

## Check exact country name

In [None]:
players["country_of_citizenship"].value_counts().head(20)

## Filter Greek Players

In [None]:
greek = players[players["country_of_citizenship"] == "Greece"].copy()

greek["age"] = (pd.Timestamp.today() - greek["date_of_birth"]).dt.days // 365

# Check what seasons exist to find the current/latest one
print("last_season value counts (top 10):")
print(greek["last_season"].value_counts().head(10))
print(f"\nMax last_season: {greek['last_season'].max()}")

In [None]:
# Active = played in the most recent season | Retired = older last_season
CURRENT_SEASON = greek["last_season"].max()

greek_active  = greek[greek["last_season"] == CURRENT_SEASON].copy()
greek_retired = greek[greek["last_season"] <  CURRENT_SEASON].copy()

print(f"Current season: {CURRENT_SEASON}")
print(f"Total Greek players: {len(greek)}")
print(f"  Active:  {len(greek_active)}")
print(f"  Retired: {len(greek_retired)}")

greek[["name", "last_season", "position", "age",
       "market_value_in_eur", "highest_market_value_in_eur", "current_club_name"]].head(10)

## Filter related tables to Greek players only

In [None]:
greek_ids = greek["player_id"].unique()

greek_valuations  = valuations[valuations["player_id"].isin(greek_ids)].copy()
greek_transfers   = transfers[transfers["player_id"].isin(greek_ids)].copy()
greek_appearances = appearances[appearances["player_id"].isin(greek_ids)].copy()

print("greek_valuations: ", greek_valuations.shape)
print("greek_transfers:  ", greek_transfers.shape)
print("greek_appearances:", greek_appearances.shape)

## Save intermediate files

In [None]:
greek.to_parquet(OUTPUTS_PATH + "greek_all.parquet", index=False)
greek_active.to_parquet(OUTPUTS_PATH + "greek_active.parquet", index=False)
greek_retired.to_parquet(OUTPUTS_PATH + "greek_retired.parquet", index=False)
greek_valuations.to_parquet(OUTPUTS_PATH + "greek_valuations.parquet", index=False)
greek_transfers.to_parquet(OUTPUTS_PATH + "greek_transfers.parquet", index=False)
greek_appearances.to_parquet(OUTPUTS_PATH + "greek_appearances.parquet", index=False)
clubs.to_parquet(OUTPUTS_PATH + "clubs.parquet", index=False)

print("All files saved to outputs/")