In [7]:
import pandas as pd

matchup_code = 'R6CH' # championship matchup
year = 2025

bracket_sims = pd.read_csv('./bracket_simulations.csv')
bracket_sims_w = pd.read_csv('./bracket_simulations_w.csv')

m_bracket_sims = bracket_sims[bracket_sims['Tournament'] == 'M']
w_bracket_sims = bracket_sims_w[bracket_sims_w['Tournament'] == 'W']

seeds_m = pd.read_csv("march-machine-learning-mania-2025/MNCAATourneySeeds.csv")
seeds_w = pd.read_csv("march-machine-learning-mania-2025/WNCAATourneySeeds.csv")

# Have to manually pick outcome of first four
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'Y11', 'TeamID': 1314}])], ignore_index=True) # Alabama St
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'Y16', 'TeamID': 1106}])], ignore_index=True) # UNC
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'X11', 'TeamID': 1462}])], ignore_index=True) # Xavier
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'W16', 'TeamID': 1110}])], ignore_index=True) # American

seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'W11', 'TeamID': 3162}])], ignore_index=True) # Columbia
seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'X11', 'TeamID': 3235}])], ignore_index=True) # Iowa St
seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'X16', 'TeamID': 3219}])], ignore_index=True) # Xavier
seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'Y16', 'TeamID': 3471}])], ignore_index=True) # American

teams = pd.read_csv("march-machine-learning-mania-2025/MTeams.csv")
seeds_m = seeds_m.merge(teams[['TeamID', 'TeamName']], on='TeamID')
seeds_m = seeds_m[seeds_m['Season'] == year]

teams_w = pd.read_csv("march-machine-learning-mania-2025/WTeams.csv")
seeds_w = seeds_w.merge(teams_w[['TeamID', 'TeamName']], on='TeamID')
seeds_w = seeds_w[seeds_w['Season'] == year]

# Determining how many brackets were generated
num_brackets = bracket_sims['Bracket'].max()

# Labeling teams with the percentages of winning a particular matchup, given how many times they won in the simulation
value_counts = m_bracket_sims[m_bracket_sims['Slot'] == matchup_code]['Team'].value_counts()
percentages = (value_counts / num_brackets) * 100
team_names = seeds_m.set_index('Seed')['TeamName']
percentages.index = percentages.index.map(team_names)

# Represents the conditional probability that a team wins a particular matchup
percentages


Team
Duke              26.112
Houston           12.322
Auburn            10.917
Florida            9.564
St John's          5.234
Michigan St        4.632
Texas Tech         3.867
Iowa St            3.035
Tennessee          2.992
Wisconsin          2.041
Maryland           1.920
Alabama            1.885
St Mary's CA       1.584
Kentucky           1.481
Arizona            1.278
Clemson            0.977
Gonzaga            0.915
Texas A&M          0.899
Illinois           0.893
Purdue             0.880
Marquette          0.737
UCLA               0.602
BYU                0.571
Connecticut        0.516
Oregon             0.445
Kansas             0.418
Michigan           0.402
Missouri           0.391
Mississippi        0.352
Memphis            0.293
Louisville         0.282
VCU                0.200
Utah St            0.199
Mississippi St     0.176
Baylor             0.132
UC San Diego       0.123
Creighton          0.106
North Carolina     0.099
Vanderbilt         0.075
McNeese St         0

In [8]:
# Labeling teams with the percentages of winning a particular matchup, given how many times they won in the simulation
value_counts = w_bracket_sims[w_bracket_sims['Slot'] == matchup_code]['Team'].value_counts()
percentages = (value_counts / num_brackets) * 100
team_names = seeds_w.set_index('Seed')['TeamName']
percentages.index = percentages.index.map(team_names)

# Represents the conditional probability that a team wins a particular matchup
percentages

Team
Connecticut       37.828
South Carolina    16.180
Texas             13.252
USC                9.967
UCLA               9.964
TCU                3.119
Notre Dame         2.162
Duke               2.004
LSU                1.351
NC State           1.146
Ohio St            0.541
Oklahoma           0.412
Baylor             0.391
North Carolina     0.347
Kansas St          0.286
West Virginia      0.264
Mississippi        0.250
Tennessee          0.128
Alabama            0.085
Kentucky           0.070
Maryland           0.058
Vanderbilt         0.043
Michigan St        0.043
Florida St         0.021
Oklahoma St        0.019
Utah               0.017
Michigan           0.015
Iowa               0.007
Harvard            0.005
Louisville         0.004
Georgia Tech       0.004
Columbia           0.004
Richmond           0.003
Illinois           0.003
Creighton          0.002
Murray St          0.002
Iowa St            0.001
Oregon             0.001
California         0.001
Name: count, dtype: 

In [9]:
# Make a copy of bracket_simulations.csv FIRST, name it remaining_perfect_brackets.csv

def count_perfect_brackets(team_id, slot):
    """
    Count the number of perfect brackets remaining for a given team ID and slot.
    
    Parameters:
    team_id (int): The ID of the team.
    slot (str): The slot corresponding to a particular matchup.
    
    Returns:
    int: The number of perfect brackets remaining.
    """
    # Load the remaining perfect brackets
    remaining_brackets = pd.read_csv('remaining_perfect_brackets.csv')
    
    # Filter the bracket simulations for the given slot and team ID
    matching_brackets = m_bracket_sims[(m_bracket_sims['Slot'] == slot) & (m_bracket_sims['Team'] == team_id)]
    
    # Get the list of brackets that have the given team ID and slot
    matching_bracket_ids = matching_brackets['Bracket'].unique()
    
    # Remove brackets that don't have this pick from the remaining perfect brackets
    remaining_brackets = remaining_brackets[remaining_brackets['Bracket'].isin(matching_bracket_ids)]
    
    # Save the updated remaining perfect brackets
    remaining_brackets.to_csv('remaining_perfect_brackets.csv', index=False)
    
    # Return the count of remaining perfect brackets
    return remaining_brackets['Bracket'].nunique()

# Example usage:
team_id = 'W02'  # Replace with the desired team seed identifier (this is Uconn)
slot = 'R6CH'  # Replace with the desired slot (this is the championship)
perfect_brackets = 0

try:
    perfect_brackets = count_perfect_brackets(team_id, slot)
except FileNotFoundError:
    print("Make sure you have a copy of bracket_simulations.csv named remaining_perfect_brackets.csv")
    print()
    perfect_brackets = 0

print(f"Percent of perfect brackets remaining for team ID {team_id} in slot {slot}: {(perfect_brackets / num_brackets)*100}%")

Make sure you have a copy of bracket_simulations.csv named remaining_perfect_brackets.csv

Percent of perfect brackets remaining for team ID W02 in slot R6CH: 0.0%
