In [2]:
import pandas as pd

matchup_code = 'R6CH' # championship matchup
year = 2025

bracket_sims = pd.read_csv('./bracket_simulations.csv')

m_bracket_sims = bracket_sims[bracket_sims['Tournament'] == 'M']
# w_bracket_sims = bracket_sims[bracket_sims['Tournament'] == 'W']
    
seeds_m = pd.read_csv("march-machine-learning-mania-2025/MNCAATourneySeeds.csv")

# Have to manually pick outcome of first four
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'Y11', 'TeamID': 1314}])], ignore_index=True) # Alabama St
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'Y16', 'TeamID': 1106}])], ignore_index=True) # UNC
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'X11', 'TeamID': 1462}])], ignore_index=True) # Xavier
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'W16', 'TeamID': 1110}])], ignore_index=True) # American

teams = pd.read_csv("march-machine-learning-mania-2025/MTeams.csv")
seeds_m = seeds_m.merge(teams[['TeamID', 'TeamName']], on='TeamID')
seeds_m = seeds_m[seeds_m['Season'] == year]

seeds_m.head()

# Determining how many brackets were generated
num_brackets = bracket_sims['Bracket'].max()

# Labeling teams with the percentages of winning a particular matchup, given how many times they won in the simulation
value_counts = m_bracket_sims[m_bracket_sims['Slot'] == matchup_code]['Team'].value_counts()
percentages = (value_counts / num_brackets) * 100
team_names = seeds_m.set_index('Seed')['TeamName']
percentages.index = percentages.index.map(team_names)

# Represents the conditional probability that a team wins a particular matchup
percentages


Team
Duke              26.284
Houston           12.328
Auburn            10.786
Florida            9.646
St John's          5.038
Michigan St        4.597
Texas Tech         3.831
Iowa St            3.130
Tennessee          3.069
Maryland           1.963
Wisconsin          1.948
Alabama            1.907
St Mary's CA       1.559
Kentucky           1.553
Arizona            1.310
Texas A&M          0.951
Purdue             0.950
Clemson            0.950
Gonzaga            0.847
Illinois           0.836
Marquette          0.703
BYU                0.627
Connecticut        0.526
UCLA               0.525
Kansas             0.456
Michigan           0.424
Oregon             0.408
Mississippi        0.407
Missouri           0.385
Memphis            0.312
Louisville         0.288
VCU                0.205
Mississippi St     0.195
Utah St            0.187
Baylor             0.121
Creighton          0.108
UC San Diego       0.106
North Carolina     0.080
New Mexico         0.066
Vanderbilt         0

In [3]:
# Make a copy of bracket_simulations.csv FIRST, name it remaining_perfect_brackets.csv

def count_perfect_brackets(team_id, slot):
    """
    Count the number of perfect brackets remaining for a given team ID and slot.
    
    Parameters:
    team_id (int): The ID of the team.
    slot (str): The slot corresponding to a particular matchup.
    
    Returns:
    int: The number of perfect brackets remaining.
    """
    # Load the remaining perfect brackets
    remaining_brackets = pd.read_csv('remaining_perfect_brackets.csv')
    
    # Filter the bracket simulations for the given slot and team ID
    matching_brackets = m_bracket_sims[(m_bracket_sims['Slot'] == slot) & (m_bracket_sims['Team'] == team_id)]
    
    # Get the list of brackets that have the given team ID and slot
    matching_bracket_ids = matching_brackets['Bracket'].unique()
    
    # Remove brackets that don't have this pick from the remaining perfect brackets
    remaining_brackets = remaining_brackets[remaining_brackets['Bracket'].isin(matching_bracket_ids)]
    
    # Save the updated remaining perfect brackets
    remaining_brackets.to_csv('remaining_perfect_brackets.csv', index=False)
    
    # Return the count of remaining perfect brackets
    return remaining_brackets['Bracket'].nunique()

# Example usage:
team_id = 'W02'  # Replace with the desired team seed identifier (this is Uconn)
slot = 'R6CH'  # Replace with the desired slot (this is the championship)
perfect_brackets = 0

try:
    perfect_brackets = count_perfect_brackets(team_id, slot)
except FileNotFoundError:
    print("Make sure you have a copy of bracket_simulations.csv named remaining_perfect_brackets.csv")
    print()
    perfect_brackets = 0

print(f"Percent of perfect brackets remaining for team ID {team_id} in slot {slot}: {(perfect_brackets / num_brackets)*100}%")

Make sure you have a copy of bracket_simulations.csv named remaining_perfect_brackets.csv

Percent of perfect brackets remaining for team ID W02 in slot R6CH: 0.0%
