In [1]:
import requests
import json
import math
import pandas as pd
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

In [16]:
def to_finish(round_id, final_round):
    if round_id == 1 and final_round == 7:
        return 68
    else:
        return 2 ** (final_round + 1 - round_id)
    
def get_winner(game):
    if len(game["teams"]) == 0:
        return None
    winner_list = list(filter(lambda x: x["isWinner"], game["teams"]))
    if len(winner_list) == 1:
        return winner_list[0]
    else:
        score0 = game["teams"][0]["score"]
        score1 = game["teams"][1]["score"]
        if score0 and score1 and score0 > score1:
            return game["teams"][0]
        else:
            return game["teams"][1]
        
def get_loser(game):
    if len(game["teams"]) == 0:
        return None
    loser_list = list(filter(lambda x: not x["isWinner"], game["teams"]))
    if len(loser_list) == 1:
        return loser_list[0]
    else:
        score0 = game["teams"][0]["score"]
        score1 = game["teams"][1]["score"]
        if score0 and score1 and score0 > score1:
            return game["teams"][1]
        else:
            return game["teams"][0]

def convert(game, final_round):
    winner = get_winner(game)
    loser = get_loser(game)
    finish = to_finish(math.floor(game["bracketId"] / 100), final_round)
    if not winner:
        return ["", 0, finish, "", ""]
    teamName = loser["nameFull"]
    seed = loser["seed"]
    opponent = winner["nameFull"]
    score = str(loser["score"]) + "-" + str(winner["score"])
    return [teamName, seed, finish, opponent, score]


def get_champion(games, final_round):
    games.sort(key=lambda x: x["bracketId"], reverse=True)
    championship = games[0]
    winner = get_winner(championship)
    loser = get_loser(championship)
    finish = to_finish(math.floor(championship["bracketId"] / 100) + 1, final_round)
    if not winner:
        return ["", 0, finish, "", ""]
    teamName = winner["nameFull"]
    seed = winner["seed"]
    
    opponent = loser["nameFull"]
    score = str(winner["score"]) + "-" + str(loser["score"])
    return [teamName, seed, finish, opponent, score]

In [17]:
ids = [
    6251,  # 2023
    6200,  # 2022
    6124,  # 2021
    5761,  # 2019
    5489,  # 2018
    4761,  # 2017
    3921,  # 2016
]
url = "https://sdataprod.ncaa.com/?operationName=get_championship_by_id_ncaa&variables=%7B%22championshipId%22%3A{champ_id}%2C%22sportUrl%22%3A%22basketball-women%22%2C%22division%22%3A1%2C%22year%22%3A{year}%2C%22showUnstaged%22%3Afalse%2C%22staticTestEnv%22%3Anull%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22f08bb9619f4e1b045d0c2f6fa82261fce5cf4e16bdd2e61f9d9f9772b9a40ec0%22%7D%7D"

In [18]:
def load_season(url):
    data = requests.get(url).json()
    year = data["data"]["championships"][0]["year"]
    games = data["data"]["championships"][0]["games"]
    games.sort(key=lambda x: x["bracketId"])
    final_round = math.floor(games[-1]["bracketId"] / 100)

    df = pd.DataFrame(
        list(map(lambda x: convert(x, final_round), games))
        + [get_champion(games, final_round)],
        columns=["team", "seed", "finish", "opponent", "score"],
    )
    df["year"] = year
    return df


df = pd.concat(list(map(lambda x: load_season(url.format(champ_id=x, year=0)), ids)))
df.reset_index(drop=True)
df = df[["year", "team", "seed", "finish", "opponent", "score"]]
df

Unnamed: 0,year,team,seed,finish,opponent,score
0,2023,"Southern University, Baton Rouge",16,68,Sacred Heart University,47-57
1,2023,Monmouth University,16,68,Tennessee Technological University,69-79
2,2023,University of Illinois Urbana-Champaign,11,68,Mississippi State University,56-70
3,2023,Purdue University,11,68,St. John's University (New York),64-66
4,2023,Norfolk State University,16,64,"University of South Carolina, Columbia",40-72
...,...,...,...,...,...,...
59,2016,Stanford University,4,8,University of Washington,76-85
60,2016,Oregon State University,2,4,University of Connecticut,51-80
61,2016,University of Washington,7,4,Syracuse University,59-80
62,2016,Syracuse University,4,2,University of Connecticut,51-82


In [19]:
df.to_csv("recent_matches.csv", index=False)