In [None]:
# Build silver/open_data/lineups/lineups_by_match.parquet (all files)
from football_pipeline.utils.constants import BRONZE_OPEN_DATA_LINEUPS_DIR, SILVER_OPEN_DATA_LINEUPS_DIR
from pathlib import Path
import re
import polars as pl

# paths
bronze_lineups_dir = BRONZE_OPEN_DATA_LINEUPS_DIR
silver_lineups_dir = SILVER_OPEN_DATA_LINEUPS_DIR
silver_lineups_dir.mkdir(parents=True, exist_ok=True)
out_file = silver_lineups_dir / "lineups_by_match.parquet"

# collect rows from all lineup files (keep lineup JSON as-is)
rx = re.compile(r"lineups_(\d+)\.parquet$", re.I)
rows = []
for fp in sorted(Path(bronze_lineups_dir).glob("lineups_*.parquet")):
    m = rx.search(fp.name)
    if not m:
        continue
    mid = int(m.group(1))
    df = pl.read_parquet(fp, columns=["team_id", "team_name", "lineup"]).with_columns(
        pl.lit(mid).alias("match_id")
    )
    rows.append(df)

if not rows:
    raise RuntimeError(f"No lineup files found in {bronze_lineups_dir}")

all_lineups = pl.concat(rows)
# optional: drop accidental dupes
all_lineups = all_lineups.unique(["match_id", "team_id"])

all_lineups.write_parquet(out_file)
print(f"✓ wrote {out_file}")
print(all_lineups.shape)
print(all_lineups.sort("match_id").head(5))



✓ wrote /Users/architmanek/Desktop/DataEngineering/football_pipeline/data/silver/open_data/lineups/lineups_by_match.parquet
(6928, 4)
shape: (5, 4)
┌─────────┬────────────────────────┬─────────────────────────────────┬──────────┐
│ team_id ┆ team_name              ┆ lineup                          ┆ match_id │
│ ---     ┆ ---                    ┆ ---                             ┆ ---      │
│ i64     ┆ str                    ┆ str                             ┆ i32      │
╞═════════╪════════════════════════╪═════════════════════════════════╪══════════╡
│ 746     ┆ Manchester City WFC    ┆ [{"player_id": 4635, "player_n… ┆ 7298     │
│ 971     ┆ Chelsea FCW            ┆ [{"player_id": 4633, "player_n… ┆ 7298     │
│ 759     ┆ Washington Spirit      ┆ [{"player_id": 4940, "player_n… ┆ 7430     │
│ 766     ┆ North Carolina Courage ┆ [{"player_id": 5034, "player_n… ┆ 7430     │
│ 765     ┆ Portland Thorns        ┆ [{"player_id": 4995, "player_n… ┆ 7443     │
└─────────┴─────────────────────