In [None]:
import pandas as pd
from collections import defaultdict
import numpy as np

def ground_over_averages(
    df,
    weight_recent=0.5,
    recent_games=30,
    innings_runs_col="inns_runs",
    innings_wkts_col="inns_wkts",
):
    """
    Returns
    -------
    dict
        {
            "Wankhede Stadium": {1: "10.2-0.3", 2: "19.8-0.5", ...},
            ...
            "Neutral Venue":    {1: "11.0-0.4", 2: "21.2-0.6", ...}
        }
    """

    # ---------- collect per-innings end-of-over snapshots -----------------
    cols_needed = [
        "ground", "year", "p_match", "inns", "over",
        innings_runs_col, innings_wkts_col
    ]
    snap = (
        df[cols_needed]
        .sort_values(["ground", "year", "p_match", "inns", "over"])
        .groupby(["ground", "p_match", "inns", "over"], as_index=False)
        .tail(1)                     # last ball of each over
    )

    # ---------- weighted mean per ground ----------------------------------
    gdict = defaultdict(dict)

    for ground, gdf in snap.groupby("ground"):
        # matches ordered chronologically
        match_ids = (
            gdf[["p_match", "year"]].drop_duplicates()
            .sort_values(["year", "p_match"])["p_match"]
            .tolist()
        )

        recent_set  = set(match_ids[-recent_games:])
        earlier_set = set(match_ids[:-recent_games])

        for over_no, odf in gdf.groupby("over"):
            recent  = odf[odf["p_match"].isin(recent_set)]
            earlier = odf[odf["p_match"].isin(earlier_set)]

            def _mean(col, df_part):
                return df_part[col].mean() if not df_part.empty else np.nan

            rec_runs = _mean(innings_runs_col, recent)
            rec_wkts = _mean(innings_wkts_col, recent)
            ear_runs = _mean(innings_runs_col, earlier)
            ear_wkts = _mean(innings_wkts_col, earlier)

            # combine, using only buckets that exist
            if not np.isnan(rec_runs) and not np.isnan(ear_runs):
                runs = weight_recent * rec_runs + (1 - weight_recent) * ear_runs
                wkts = weight_recent * rec_wkts + (1 - weight_recent) * ear_wkts
            elif not np.isnan(rec_runs):
                runs, wkts = rec_runs, rec_wkts
            elif not np.isnan(ear_runs):
                runs, wkts = ear_runs, ear_wkts
            else:
                continue

            gdict[ground][over_no] = (runs, wkts)

    # ---------- build the Neutral Venue averages --------------------------
    neutral = {}
    # collect every over number that appears anywhere
    all_overs = set().union(*(d.keys() for d in gdict.values()))

    for ov in sorted(all_overs):
        runs_list = [gdict[g][ov][0] for g in gdict if ov in gdict[g]]
        wkts_list = [gdict[g][ov][1] for g in gdict if ov in gdict[g]]
        if runs_list:          # should always be true
            neutral[ov] = (
                np.nanmean(runs_list),
                np.nanmean(wkts_list),
            )

    # convert tuples → "runs-wkts" strings & round
    out = {}
    for g, d in gdict.items():
        out[g] = {ov: f"{r:.1f}-{w:.1f}" for ov, (r, w) in d.items()}

    out["Neutral Venue"] = {ov: f"{r:.1f}-{w:.1f}" for ov, (r, w) in neutral.items()}
    return out


  df = pd.read_csv("C:/Users/Arnav Jain/Desktop/cricket/data/t20_bbb.csv")
