In [8]:
import json
import pandas as pd
from mm_analytics.utilities import NpEncoder
DATA_ROOT = "/Users/andrewgrowney/data/kaggle/marchmadness-2023/Stage2"

In [5]:
seasons_df = pd.read_csv(f"{DATA_ROOT}/MSeasons.csv") # Season, DayZero, RegionW, RegionX, RegionY, RegionZ
seeds_df = pd.read_csv(f"{DATA_ROOT}/MNCAATourneySeeds.csv") # Season, Seed, TeamID
slots_df = pd.read_csv(f"{DATA_ROOT}/MNCAATourneySlots.csv") # Season, Slot[R1W4], StrongSeed[W04], WeakSeed[W13]
seed_round_slots_df = pd.read_csv(f"{DATA_ROOT}/MNCAATourneySeedRoundSlots.csv") # Seed, GameRound[1], GameSlot[R1W4], EarlyDayNum, LateDayNum
results_df = pd.read_csv(f"{DATA_ROOT}/MNCAATourneyCompactResults.csv") # Season, DayNum, WTeamID, LTeamID, WScore, LScore, WLoc, NumOT

In [16]:
TEAM_DATA_ROOT = "../data/web/ts"
def get_team_data(tid, season):
    with open(f"{TEAM_DATA_ROOT}/{tid}_{season}.json") as f:
        team_data = json.load(f)
    return {
        "Rec": team_data["record"]["overall"],
        "AdjOE": team_data["stat_rankings"]["AdjOE"],
        "AdjDE": team_data["stat_rankings"]["AdjDE"],
        "AdjNE": team_data["stat_rankings"]["AdjNE"],
        "SOS": team_data["stat_rankings"]["SOS"],
        "Poss": team_data["stat_rankings"]["Poss"],
        "Sims": [{"i": i["id"], "y": i["year"], "s": i["avg"], "e": i["er"]} for i in team_data["similar_teams"]]
    }

In [19]:
# Build out json view of the tournament from the dataframes
for season in range(2013, 2023):
    print(f"Season: {season}")
    seasion_regions = seasons_df[seasons_df["Season"] == season]
    season_slots = slots_df[slots_df["Season"] == season]
    season_seeds = seeds_df[seeds_df["Season"] == season]
    season_results = results_df[results_df["Season"] == season]
    season_teams = season_seeds["TeamID"].unique()
    season_teams.sort()

    # Build out json view of the tournament from the dataframes
    # Use slots to build out the tournament structure
    # Each slot has a strong seed and a weak seed
    tournament = {
        "regions": {
            "W": seasion_regions["RegionW"].values[0],
            "X": seasion_regions["RegionX"].values[0],
            "Y": seasion_regions["RegionY"].values[0],
            "Z": seasion_regions["RegionZ"].values[0]
        },
        "slots": {},
        "teams": { int(team_id): get_team_data(int(team_id), season) for team_id in season_teams }
    }
    for index, row in season_slots.iterrows():
        slot = row["Slot"]
        strong_seed = row["StrongSeed"]
        weak_seed = row["WeakSeed"]
        seed_round_slots = seed_round_slots_df[seed_round_slots_df["GameSlot"] == slot]
        if slot not in tournament["slots"]:
            tournament["slots"][slot] = {}
        tournament["slots"][slot]["round_start"] = seed_round_slots["EarlyDayNum"].values[0]
        tournament["slots"][slot]["strong_seed"] = strong_seed
        tournament["slots"][slot]["weak_seed"] = weak_seed
    
    # Fill Team Seeds Mapping
    seed_teams = {}
    for index, row in season_seeds.iterrows():
        seed = row["Seed"]
        team_id = row["TeamID"]
        seed_teams[seed] = team_id

    # Fill in the first round slot
    # strong seed and week seed data from seeds data
    slot_winners = {}
    slot_keys_sorted_by_day_range = sorted(tournament["slots"].keys(), key=lambda x: tournament["slots"][x]["round_start"])
    for slot in slot_keys_sorted_by_day_range:
        try:
            strong_seed = tournament["slots"][slot]["strong_seed"]
            if strong_seed.startswith("R") or strong_seed in slot_winners:
                # Fetch game winner
                strong_seed_team_id = slot_winners[strong_seed]
            else:
                # Fetch from seeding data
                strong_seed_team_id = seed_teams[strong_seed]

            weak_seed = tournament["slots"][slot]["weak_seed"]
            if weak_seed.startswith("R") or weak_seed in slot_winners:
                weak_seed_team_id = slot_winners[weak_seed]
            else:
                weak_seed_team_id = seed_teams[weak_seed]

            print(f"{strong_seed}[{strong_seed_team_id}] vs {weak_seed}[{weak_seed_team_id}][{season}]")
            tournament["slots"][slot]["strong_seed"] = strong_seed_team_id
            tournament["slots"][slot]["weak_seed"] = weak_seed_team_id
            # Fetch slot winner from seaon_results
            if season == 2021 and slot == "R1X7":
                # VCU vs Oregon forfeit
                slot_winner, wscore, lscore = 1332, 2, 0
            else:
                game_row = season_results[season_results["WTeamID"].isin([strong_seed_team_id, weak_seed_team_id]) & season_results["LTeamID"].isin([strong_seed_team_id, weak_seed_team_id])]
                slot_winner = game_row["WTeamID"].values[0]
                wscore, lscore = game_row["WScore"].values[0], game_row["LScore"].values[0]
            tournament["slots"][slot]["winner"] = slot_winner
            tournament["slots"][slot]["wscore"] = wscore
            tournament["slots"][slot]["lscore"] = lscore

            print(f"{slot} winner: {slot_winner} [{game_row['WScore'].values[0]}-{game_row['LScore'].values[0]}]")
            slot_winners[slot] = slot_winner
        except Exception as e:
            print(f"Error {e}: {slot} {strong_seed} {weak_seed}, {slot_winners}")
            raise

    with open(f"../data/web/tourney/{season}.json", "w") as f:
        json.dump(tournament, f, cls=NpEncoder)

Season: 2013
W16a[1241] vs W16b[1254][2013]
W16 winner: 1241 [68-55]
Y11a[1292] vs Y11b[1388][2013]
Y11 winner: 1388 [67-54]
Y16a[1251] vs Y16b[1299][2013]
Y16 winner: 1299 [73-72]
Z13a[1129] vs Z13b[1247][2013]
Z13 winner: 1247 [80-71]
W01[1231] vs W16[1241][2013]
R1W1 winner: 1231 [83-62]
W02[1274] vs W15[1334][2013]
R1W2 winner: 1274 [78-49]
W03[1266] vs W14[1172][2013]
R1W3 winner: 1266 [59-58]
W04[1393] vs W13[1285][2013]
R1W4 winner: 1393 [81-34]
W05[1424] vs W12[1143][2013]
R1W5 winner: 1143 [64-61]
W06[1139] vs W11[1137][2013]
R1W6 winner: 1139 [68-56]
W07[1228] vs W10[1160][2013]
R1W7 winner: 1228 [57-49]
W08[1301] vs W09[1396][2013]
R1W8 winner: 1396 [76-72]
X01[1242] vs X16[1443][2013]
R1X1 winner: 1242 [64-57]
X02[1207] vs X15[1195][2013]
R1X2 winner: 1195 [78-68]
X03[1196] vs X14[1322][2013]
R1X3 winner: 1196 [79-47]
X04[1276] vs X13[1355][2013]
R1X4 winner: 1276 [71-56]
X05[1433] vs X12[1103][2013]
R1X5 winner: 1433 [88-42]
X06[1417] vs X11[1278][2013]
R1X6 winner: 1278 [