4. What is the longest match recorded in terms of duration? 


In [2]:
import pandas as pd
from pathlib import Path

project_root = Path.cwd().parent
proc_dir = project_root / "data" / "processed"
out_dir = project_root / "reports" / "answers"
out_dir.mkdir(parents=True, exist_ok=True)

df_periods = pd.read_parquet(proc_dir / "all_match_periods.parquet")
df_results = pd.read_parquet(proc_dir / "match_results_player_filled.parquet")

period_cols = [c for c in df_periods.columns if c.startswith("period_")]

for c in period_cols:
    df_periods[c] = pd.to_numeric(df_periods[c], errors="coerce").fillna(0)

# normalization
def normalize_row(row):
    total = row.sum()
    if total > 10000:         # milliseconds
        return row / 1000
    elif total < 300:         # minutes
        return row * 60
    else:                     # seconds
        return row

df_periods[period_cols] = df_periods[period_cols].apply(normalize_row, axis=1)

df_periods["duration_sec"] = df_periods[period_cols].sum(axis=1)
df_periods = df_periods[df_periods["duration_sec"] > 0]
df_periods["duration_min"] = df_periods["duration_sec"] / 60

# filter unrealistic times
df_periods = df_periods[df_periods["duration_min"] <= 600]

# find longest match
longest_id = df_periods.loc[df_periods["duration_min"].idxmax(), "match_id"]

df_longest = df_periods.merge(df_results, on="match_id", how="left")
df_longest = df_longest[df_longest["match_id"] == longest_id]

if "start_datetime" in df_longest.columns:
    df_longest["start_datetime_dt"] = pd.to_datetime(
        df_longest["start_datetime"], unit="s", errors="coerce"
    )

df_longest.to_csv(out_dir / "q4_longest_match.csv", index=False, encoding="utf-8-sig")

row = df_longest.iloc[0]
print("The longest valid match with player info:")
print("Match ID:", row["match_id"])
if "tournament_name" in row:
    print("Tournament:", row["tournament_name"])
if "start_datetime_dt" in row:
    print("Start time:", row["start_datetime_dt"])
print(f"Duration: {row['duration_min']:.2f} minutes "
      f"({int(row['duration_min']//60)}h {int(row['duration_min']%60)}m)")


The longest valid match with player info:
Match ID: 12102290
Start time: 2024-02-26 10:00:00
Duration: 274.00 minutes (4h 34m)
