# Generate Unified Calendar

Generate a single calendar file with all the matches for each league

In [17]:
from io import StringIO
import json
from pathlib import Path

import pandas as pd
import requests

CALENDAR_DIR = Path("Input/Calendars")
output_merged_calendar = Path("Input/Combined_Country_League_Calendar_25_26.csv")

def merge_calendars(
    calendar_dir: Path = CALENDAR_DIR,
    output_path: Path = Path("Input/Combined_Country_League_Calendar_25_26.csv"),
) -> pd.DataFrame:
    """Merge all calendar CSVs into one file with unified columns."""
    frames = []
    for path in calendar_dir.glob("Calendar_*.csv"):
        if path.name == output_path.name:
            continue  # avoid self-inclusion if rerun
        df = pd.read_csv(path)
        required = {"Country", "League", "Matchday", "Hometeam", "Awayteam"}
        missing = required - set(df.columns)
        if missing:
            raise ValueError(f"{path.name} missing required columns: {missing}")
        frames.append(df[["Country", "League", "Matchday", "Hometeam", "Awayteam"]])

    if not frames:
        raise ValueError("No calendar CSVs found to merge.")

    combined = pd.concat(frames, ignore_index=True)
    combined.to_csv(output_path, index=False)
    print(f"Saved combined calendar to {output_path} with {len(combined)} rows.")
    return combined

# Run to build the combined calendar CSV
merge_calendars()

Saved combined calendar to Input\Combined_Country_League_Calendar_25_26.csv with 3766 rows.


Unnamed: 0,Country,League,Matchday,Hometeam,Awayteam
0,Germany,Bundesliga 1,1,Bayern Munich,RB Leipzig
1,Germany,Bundesliga 1,1,Heidenheim,Wolfsburg
2,Germany,Bundesliga 1,1,Union Berlin,Stuttgart
3,Germany,Bundesliga 1,1,Freiburg,Augsburg
4,Germany,Bundesliga 1,1,Ein Frankfurt,Werder Bremen
...,...,...,...,...,...
3761,Turkey,Super Lig,34,Gaziantep,Buyuksehyr
3762,Turkey,Super Lig,34,Samsunspor,Goztep
3763,Turkey,Super Lig,34,Kasimpasa,Galatasaray
3764,Turkey,Super Lig,34,Trabzonspor,Genclerbirligi


# Pull Latest Results

Pull the latest results for each league 

In [18]:
# Download all league CSVs to Input/results folder
LEAGUE_CFG_PATH = Path("Input/league_urls.json")
OUTPUT_DIR = Path("Input/results")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


def fetch_league_data(url: str) -> pd.DataFrame:
    response = requests.get(url, timeout=30)
    response.raise_for_status()
    return pd.read_csv(StringIO(response.text))

with LEAGUE_CFG_PATH.open("r", encoding="utf-8") as f:
    league_data = json.load(f)

leagues_cfg = league_data.get("leagues", [])
for entry in leagues_cfg:
    league_name = entry["league"]
    url = entry.get("url")
    if not url:
        continue
    safe_filename = league_name.replace(" ", "_").replace("/", "_") + ".csv"
    output_path = OUTPUT_DIR / safe_filename
    
    try:
        print(f"Downloading {league_name}...")
        df = fetch_league_data(url)
        df.to_csv(output_path, index=False)
        print(f"  ✓ Saved to {output_path}")
    except Exception as e:
        print(f"  ✗ Error downloading {league_name}: {e}")



Downloading Bundesliga 1...
  ✓ Saved to Input\results\Bundesliga_1.csv
Downloading Bundesliga 2...
  ✓ Saved to Input\results\Bundesliga_1.csv
Downloading Bundesliga 2...
  ✓ Saved to Input\results\Bundesliga_2.csv
Downloading Serie A...
  ✓ Saved to Input\results\Bundesliga_2.csv
Downloading Serie A...
  ✓ Saved to Input\results\Serie_A.csv
Downloading Serie B...
  ✓ Saved to Input\results\Serie_A.csv
Downloading Serie B...
  ✓ Saved to Input\results\Serie_B.csv
Downloading La Liga Primera...
  ✓ Saved to Input\results\Serie_B.csv
Downloading La Liga Primera...
  ✓ Saved to Input\results\La_Liga_Primera.csv
Downloading La Liga Segunda...
  ✓ Saved to Input\results\La_Liga_Primera.csv
Downloading La Liga Segunda...
  ✓ Saved to Input\results\La_Liga_Segunda.csv
Downloading Ligue 1...
  ✓ Saved to Input\results\La_Liga_Segunda.csv
Downloading Ligue 1...
  ✓ Saved to Input\results\Ligue_1.csv
Downloading Ligue 2...
  ✓ Saved to Input\results\Ligue_1.csv
Downloading Ligue 2...
  ✓ Saved 

# Condensate Results into a single CSV

In [19]:
col_of_interest = ["HomeTeam", "AwayTeam", "FTR", "FTHG", "FTAG", "HTR", "HTHG", "HTAG"]

RESULTS_DIR = Path("Input/results")

def condense_results(
    results_dir: Path = RESULTS_DIR,
    output_path: Path = Path("Input/combined_results.csv"),
    columns_of_interest: list[str] = col_of_interest,
) -> pd.DataFrame:
    """Concatenate all results CSVs keeping only the selected columns."""
    frames = []
    for path in results_dir.glob("*.csv"):
        df = pd.read_csv(path)
        missing = set(columns_of_interest) - set(df.columns)
        if missing:
            print(f"Skipping {path.name}: missing columns {missing}")
            continue
        frames.append(df[columns_of_interest])

    if not frames:
        raise ValueError("No result CSVs found with the required columns.")

    combined = pd.concat(frames, ignore_index=True)
    
    print(f"Saved condensed results to {output_path} with {len(combined)} rows.")
    combined.rename(columns={"HomeTeam": "Hometeam", "AwayTeam": "Awayteam"}, inplace=True)
    combined.to_csv(output_path, index=False)

    return combined

# Run to build the condensed results CSV
condense_results()


Saved condensed results to Input\combined_results.csv with 1995 rows.


Unnamed: 0,Hometeam,Awayteam,FTR,FTHG,FTAG,HTR,HTHG,HTAG
0,Bayern Munich,RB Leipzig,H,6,0,H,3,0
1,Ein Frankfurt,Werder Bremen,H,4,1,H,2,0
2,Freiburg,Augsburg,A,1,3,A,0,3
3,Heidenheim,Wolfsburg,A,1,3,D,1,1
4,Leverkusen,Hoffenheim,A,1,2,D,1,1
...,...,...,...,...,...,...,...,...
1990,Venezia,Mantova,H,3,0,H,1,0
1991,Catanzaro,Virtus Entella,H,3,2,D,2,2
1992,Palermo,Carrarese,H,5,0,H,2,0
1993,Juve Stabia,Monza,D,2,2,D,1,1


# Left Join the Results on the Calendar

In [20]:
# Load the combined calendar
calendar = pd.read_csv(Path("Input/Combined_Country_League_Calendar_25_26.csv"))

# Load the combined results
results = pd.read_csv(Path("Input/combined_results.csv"))

# Left join results onto calendar using Hometeam and Awayteam as keys
merged = calendar.merge(
    results,
    on=["Hometeam", "Awayteam"],
    how="left"
)

print(f"Merged dataset has {len(merged)} rows with {len(merged.columns)} columns.")
merged.head(10)
merged.to_csv(Path("Output/Calendar_with_Results_25_26.csv"), index=False)

Merged dataset has 3766 rows with 11 columns.


In [21]:
print("Unique Countries:")
print(merged['Country'].unique())
print("\nUnique Leagues:")
print(merged['League'].unique())

Unique Countries:
['Germany' 'England' 'Netherlands' 'Belgium' 'Spain' 'France' 'Italy'
 'Turkey']

Unique Leagues:
['Bundesliga 1' 'Bundesliga 2' 'Championship' 'Eredivisie'
 'Jupiler League' 'La Liga Primera' 'Ligue 1' 'Premier League' 'Serie A'
 'Serie B' 'Super Lig']
