In [35]:
import pandas as pd

matchup_code = 'R6CH' # championship matchup
year = 2025

bracket_sims = pd.read_csv('./bracket_simulations.csv')
bracket_sims_w = pd.read_csv('./bracket_simulations_w.csv')

m_bracket_sims = bracket_sims[bracket_sims['Tournament'] == 'M']
w_bracket_sims = bracket_sims_w[bracket_sims_w['Tournament'] == 'W']

seeds_m = pd.read_csv("march-machine-learning-mania-2025/MNCAATourneySeeds.csv")
seeds_w = pd.read_csv("march-machine-learning-mania-2025/WNCAATourneySeeds.csv")

# Have to manually pick outcome of first four
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'Y11', 'TeamID': 1314}])], ignore_index=True) # Alabama St
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'Y16', 'TeamID': 1106}])], ignore_index=True) # UNC
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'X11', 'TeamID': 1462}])], ignore_index=True) # Xavier
seeds_m = pd.concat([seeds_m, pd.DataFrame([{'Season': 2025, 'Seed': 'W16', 'TeamID': 1291}])], ignore_index=True) # mt. St. Mary's

seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'W11', 'TeamID': 3162}])], ignore_index=True) # Columbia
seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'X11', 'TeamID': 3235}])], ignore_index=True) # Iowa St
seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'X16', 'TeamID': 3219}])], ignore_index=True) # Xavier
seeds_w = pd.concat([seeds_w, pd.DataFrame([{'Season': 2025, 'Seed': 'Y16', 'TeamID': 3471}])], ignore_index=True) # American

teams = pd.read_csv("march-machine-learning-mania-2025/MTeams.csv")
seeds_m = seeds_m.merge(teams[['TeamID', 'TeamName']], on='TeamID')
seeds_m = seeds_m[seeds_m['Season'] == year]

teams_w = pd.read_csv("march-machine-learning-mania-2025/WTeams.csv")
seeds_w = seeds_w.merge(teams_w[['TeamID', 'TeamName']], on='TeamID')
seeds_w = seeds_w[seeds_w['Season'] == year]

# Determining how many brackets were generated
num_brackets = bracket_sims['Bracket'].max()

# Labeling teams with the percentages of winning a particular matchup, given how many times they won in the simulation
value_counts = m_bracket_sims[m_bracket_sims['Slot'] == matchup_code]['Team'].value_counts()
percentages = (value_counts / num_brackets) * 100
team_names = seeds_m.set_index('Seed')['TeamName']
percentages.index = percentages.index.map(team_names)

# Represents the conditional probability that a team wins a particular matchup
percentages


Team
Duke             27.1710
Houston          12.2594
Auburn           10.7660
Florida           9.4839
St John's         5.0292
                  ...   
Mt St Mary's      0.0001
NE Omaha          0.0001
Norfolk St        0.0001
Robert Morris     0.0001
Montana           0.0001
Name: count, Length: 62, dtype: float64

In [36]:
# Labeling teams with the percentages of winning a particular matchup, given how many times they won in the simulation
value_counts = w_bracket_sims[w_bracket_sims['Slot'] == matchup_code]['Team'].value_counts()
percentages = (value_counts / num_brackets) * 100
team_names = seeds_w.set_index('Seed')['TeamName']
percentages.index = percentages.index.map(team_names)

# Represents the conditional probability that a team wins a particular matchup
percentages

Team
Connecticut       37.7899
South Carolina    16.0958
Texas             13.3498
USC               10.0099
UCLA               9.9265
TCU                3.1133
Notre Dame         2.1554
Duke               2.0435
LSU                1.3555
NC State           1.1249
Ohio St            0.5504
Oklahoma           0.4265
Baylor             0.4067
Kansas St          0.2973
North Carolina     0.2862
West Virginia      0.2646
Mississippi        0.2366
Tennessee          0.1398
Alabama            0.0934
Maryland           0.0609
Kentucky           0.0521
Vanderbilt         0.0515
Michigan St        0.0460
Florida St         0.0253
Utah               0.0213
Oklahoma St        0.0212
Michigan           0.0106
Louisville         0.0091
Columbia           0.0061
Richmond           0.0047
Georgia Tech       0.0047
Iowa               0.0042
Harvard            0.0030
Mississippi St     0.0029
S Dakota St        0.0023
Illinois           0.0016
Murray St          0.0012
Creighton          0.0011
Oregon 

In [None]:
# Make a copy of bracket_simulations.csv FIRST, name it remaining_perfect_brackets.csv

def count_perfect_brackets(team_id, slot):
    """
    Count the number of perfect brackets remaining for a given team ID and slot.
    
    Parameters:
    team_id (int): The ID of the team.
    slot (str): The slot corresponding to a particular matchup.
    
    Returns:
    int: The number of perfect brackets remaining.
    """
    # Load the remaining perfect brackets
    remaining_brackets = pd.read_csv('remaining_perfect_brackets.csv')
    
    # Filter the bracket simulations for the given slot and team ID
    matching_brackets = m_bracket_sims[(m_bracket_sims['Slot'] == slot) & (m_bracket_sims['Team'] == team_id)]
    
    # Get the list of brackets that have the given team ID and slot
    matching_bracket_ids = matching_brackets['Bracket'].unique()
    
    # Remove brackets that don't have this pick from the remaining perfect brackets
    remaining_brackets = remaining_brackets[remaining_brackets['Bracket'].isin(matching_bracket_ids)]
    
    # Save the updated remaining perfect brackets
    remaining_brackets.to_csv('remaining_perfect_brackets.csv', index=False)
    
    # Return the count of remaining perfect brackets
    return remaining_brackets['Bracket'].nunique()

# Example usage:
team_id = 'W01'  # Replace with the desired team seed identifier (this is Duke)
slot = 'R1W1'  # Replace with the desired slot (this is the championship)
perfect_brackets = 0

try:
    perfect_brackets = count_perfect_brackets(team_id, slot)
except FileNotFoundError:
    print("Make sure you have a copy of bracket_simulations.csv named remaining_perfect_brackets.csv")
    print()
    perfect_brackets = 0

print(f"Percent of perfect brackets remaining for team ID {team_id} in slot {slot}: {(perfect_brackets / num_brackets)*100}%")

Percent of perfect brackets remaining for team ID W01 in slot R6CH: 27.171%
