In [None]:
import os
from pathlib import Path

from loguru import logger
from soccerdata import FBref

In [None]:
# ===== Defining the data directories ====#
PROJECT_ROOT = Path().absolute().parent.parent
DATA_DIR = PROJECT_ROOT / "datasets" 
FANTACALCIO_DATA_DIR = DATA_DIR / "fantacalcio_data"
FBREF_CACHE_DIR = PROJECT_ROOT / ".cache" / "fbref" # needed for FBRef

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(FANTACALCIO_DATA_DIR, exist_ok=True)
os.makedirs(FBREF_CACHE_DIR, exist_ok=True)

logger.info(f"Project root: {PROJECT_ROOT}")
logger.info(f"Data directory: {DATA_DIR}")
logger.info(f"Fantacalcio data directory: {FANTACALCIO_DATA_DIR}")
logger.info(f"FBRef cache directory: {FBREF_CACHE_DIR}")

In [None]:
fbref = FBref(leagues="ITA-Serie A", seasons=[21,22,23,24,25], data_dir=FBREF_CACHE_DIR)

In [None]:
df_schedule = fbref.read_schedule()

In [None]:
df_calendar = df_schedule.reset_index().loc[:,["season","week","home_team","away_team"]]

In [None]:
df_calendar.rename(columns={
    "week": "matchday",
    "home_team": "team1",
    "away_team": "team2",
}, inplace=True)
df_calendar.dropna(inplace=True)        # Non-championship games, such as tiebreaks/play-outs, are excluded
df_calendar.replace("Hellas Verona", "Verona", inplace=True)
df_calendar.to_csv(FANTACALCIO_DATA_DIR / "seriea_calendar.csv", index=False)