In [14]:
# =============================================================================
# SETUP
# =============================================================================
import pandas as pd
import polars as pl
from utils.constants import get_open_data_dirs, get_j1_league_dirs

J1_LEAGUE_DIR = get_j1_league_dirs()
OPEN_DATA_DIR = get_open_data_dirs()

# Read the competitions data
df = pl.scan_parquet(OPEN_DATA_DIR["bronze_matches"], extra_columns="ignore", missing_columns="insert")
df.schema


  df.schema


Schema([('match_id', Int64),
        ('match_date', String),
        ('kick_off', String),
        ('home_score', Int64),
        ('away_score', Int64),
        ('match_status', String),
        ('match_status_360', String),
        ('last_updated', String),
        ('last_updated_360', String),
        ('match_week', Int64),
        ('competition_competition_id', Int64),
        ('competition_country_name', String),
        ('competition_competition_name', String),
        ('season_season_id', Int64),
        ('season_season_name', String),
        ('home_team_home_team_id', Int64),
        ('home_team_home_team_name', String),
        ('home_team_home_team_gender', String),
        ('home_team_home_team_group', String),
        ('home_team_country_id', Int64),
        ('home_team_country_name', String),
        ('home_team_managers',
         List(Struct({'country': Struct({'id': Int64, 'name': String}), 'dob': String, 'id': Int64, 'name': String, 'nickname': String}))),
        ('aw

In [None]:
bundesliga_match_seasons = (
    df.select("season_season_name", "competition_competition_name")
        .unique() \
        .sort(["competition_competition_name", "season_season_name"]) \
        .collect()
)

# Rename columns to match competitions_summary
matches_summary = bundesliga_match_seasons.rename({
    "competition_competition_name": "competition_name",
    "season_season_name": "season_name"
})
print("Bundesliga seasons in matches:", matches_summary)


Bundesliga seasons in matches: shape: (47, 2)
┌─────────────┬────────────────────┐
│ season_name ┆ competition_name   │
│ ---         ┆ ---                │
│ str         ┆ str                │
╞═════════════╪════════════════════╡
│ 2015/2016   ┆ 1. Bundesliga      │
│ 2023/2024   ┆ 1. Bundesliga      │
│ 1970/1971   ┆ Champions League   │
│ 1971/1972   ┆ Champions League   │
│ 1972/1973   ┆ Champions League   │
│ …           ┆ …                  │
│ 2020        ┆ UEFA Euro          │
│ 2024        ┆ UEFA Euro          │
│ 1988/1989   ┆ UEFA Europa League │
│ 2019        ┆ Women's World Cup  │
│ 2023        ┆ Women's World Cup  │
└─────────────┴────────────────────┘
