In [141]:
import pandas as pd
import numpy as np
import sklearn
import random
from collections import Counter
import matplotlib.pyplot as plt

bracket = pd.read_csv("Bracket.csv")
mm_elos = pd.read_csv("MM_Team_Elos.csv")

combined = bracket.merge(mm_elos, left_on="Team", right_on="Team")
print(combined)

# teams_ratings_seeds_dict =  combined.groupby('Team').apply(lambda x: [item for sublist in x[['Rating','Seed']].values.tolist() for item in sublist]).to_dict()
teams_ratings_seeds_dict = {row['Team']: [row['Rating'], row['Seed']] for index, row in combined.iterrows()}
print(teams_ratings_seeds_dict)

teams_og_appearance_probs = {key: 1 for key in teams_ratings_seeds_dict.keys()}
print(teams_og_appearance_probs)

                        Team      Rating  Seed
0                    Alabama  2820.05248     1
1   Texas A&M-Corpus Christi  2130.04288    16
2                   Maryland  2574.20800     8
3              West Virginia  2563.54560     9
4            San Diego State  2621.12256     5
5      College of Charleston  2430.11328    12
6                   Virginia  2553.18784     4
7                     Furman  2383.19872    13
8                  Creighton  2667.42784     6
9       North Carolina State  2480.98816    11
10                    Baylor  2654.32832     3
11          UC-Santa Barbara  2300.64128    14
12                  Missouri  2479.46496     7
13                Utah State  2543.13472    10
14                   Arizona  2711.29600     2
15                 Princeton  2297.29024    15
16                    Purdue  2725.91872     1
17       Fairleigh Dickinson  1961.88160    16
18                   Memphis  2587.30752     8
19          Florida Atlantic  2515.41248     9
20           

In [142]:
#elo calculation of the chance of one team winning against the other    
def calculateWinExpectation(rating1, rating2):
    return 1 / (1 + 10**((rating2 - rating1) / 400))

#calculates the points expected in the current round
def calculateExpectedPoints(win_probs_dict, team_base_stats_dict, round_no):
    expected_points_dict = {}
    for team in win_probs_dict:
        team_stats = team_base_stats_dict[team] 
        expected_points_dict[team] = win_probs_dict[team] * team_stats[1] * 2**(round_no - 1)
    
    return expected_points_dict

def calculateAdvancementExpectation(teams1, teams2, teams_dict):
    win_probs_dict = {}

    for team_a in teams1:
        team_a_appearance_prob = teams1[team_a]
        team_a_stats = teams_dict[team_a]
        team_a_rating = team_a_stats[0]
        team_a_win_prob = 0
        for team_b in teams2:
            team_b_appearance_prob = teams2[team_b]
            team_b_stats = teams_dict[team_b]
            team_b_rating = team_b_stats[0]
            team_a_win_prob +=  team_b_appearance_prob * calculateWinExpectation(team_a_rating, team_b_rating)
        win_probs_dict[team_a] = team_a_win_prob * team_a_appearance_prob
    
    for team_b in teams2:
        team_b_appearance_prob = teams2[team_b]
        team_b_stats = teams_dict[team_b]
        team_b_rating = team_b_stats[0]
        team_b_win_prob = 0
        for team_a in teams1:
            team_a_appearance_prob = teams1[team_a]
            team_a_stats= teams_dict[team_a]
            team_a_rating = team_a_stats[0]
            team_b_win_prob +=  team_a_appearance_prob * calculateWinExpectation(team_b_rating, team_a_rating)
        win_probs_dict[team_b] = team_b_win_prob * team_b_appearance_prob

    return win_probs_dict


def createRound1Dict(teams_og_appearance_probs):
    # Create an empty list to hold the games
    games = []

    match_no = 1
    round_no = 1
    new_round = False

    for i in range(0, len(teams_og_appearance_probs), 2):

        # Get the team names from the keys of the teams dictionary
        team_names = list(teams_og_appearance_probs.keys())
        
        # Create a game dictionary
        game = {
            "match_no": match_no,
            "round": round_no,
            "team_a": {team_names[i]: teams_og_appearance_probs[team_names[i]]},
            "team_b": {team_names[i+1]: teams_og_appearance_probs[team_names[i+1]]},
            "win_probabilities": None,
            "expected_scores": {}
        }
        
        if match_no == len(teams_og_appearance_probs) / 2:
            round_no += 1
        
        match_no += 1

    # Add the game to the games list
        games.append(game)
        advancement_preds = calculateAdvancementExpectation(game["team_a"], game["team_b"], teams_ratings_seeds_dict)
        # print(f"slot1: {game['team_a']}  vs  {game['team_b']}  ->  {advancement_preds}")
        game["win_probabilities"] = advancement_preds
        game["expected_scores"] = calculateExpectedPoints(advancement_preds, teams_ratings_seeds_dict, round_no)
    
    return games

def iterateSubsequentRounds(games, round_num, match_num):
    prev_round_dict = [game for game in games if game["round"] == round_num - 1]
    # print("prev_round_dict:", prev_round_dict)
    # games = {}

    if len(prev_round_dict) == 1:
        return games

    match_no = match_num
    round_no = round_num
    current_round_matches = 0

    for i in range(0, len(prev_round_dict), 2):
        
        game1 = prev_round_dict[i]
        game2 = prev_round_dict[i+1]

        teams_a = game1["win_probabilities"]
        teams_b = game2["win_probabilities"]

        game = {
            "match_no" : match_no,
            "round": round_no,
            "team_a": teams_a,
            "team_b" : teams_b, 
            "win_probabilities": None,
            "expected_scores": {}
        }

        games.append(game)
        advancement_preds = calculateAdvancementExpectation(teams_a, teams_b, teams_ratings_seeds_dict)
        game["win_probabilities"] = advancement_preds
        game["expected_scores"] = calculateExpectedPoints(advancement_preds, teams_ratings_seeds_dict, round_no)

        match_no += 1
        current_round_matches += 1

        if current_round_matches == len(prev_round_dict) / 2:
            round_no += 1
            # print("entering new round:\n", games)
            return iterateSubsequentRounds(games, round_no, match_no)


In [143]:
#create dict for round 1
games = createRound1Dict(teams_og_appearance_probs)
print(games)

#iterate through remaining rounds based on round 1 dict
games_final = iterateSubsequentRounds(games, 2, 33)
print(games_final)

final_game = games_final[-1]
win_probs_list = [final_game["win_probabilities"]]
expected_points_list = [final_game["expected_scores"]]

values1 = [item for sublist in win_probs_list for item in sublist.values()]
values2 = [item for sublist in expected_points_list for item in sublist.values()]
keys = list(win_probs_list[0].keys())

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

df = pd.DataFrame({
    "Teams": keys,
    "Final Win Probs": values1,
    "Expected Points": values2,
})

df_sorted = df.sort_values(by="Final Win Probs", ascending=False)
display(df_sorted)

win_prob_sum = df_sorted["Final Win Probs"].sum()
print(win_prob_sum)

[{'match_no': 1, 'round': 1, 'team_a': {'Alabama': 1}, 'team_b': {'Texas A&M-Corpus Christi': 1}, 'win_probabilities': {'Alabama': 0.9815127654018506, 'Texas A&M-Corpus Christi': 0.01848723459814948}, 'expected_scores': {'Alabama': 0.9815127654018506, 'Texas A&M-Corpus Christi': 0.2957957535703917}}, {'match_no': 2, 'round': 1, 'team_a': {'Maryland': 1}, 'team_b': {'West Virginia': 1}, 'win_probabilities': {'Maryland': 0.5153396117171185, 'West Virginia': 0.48466038828288144}, 'expected_scores': {'Maryland': 4.122716893736948, 'West Virginia': 4.361943494545933}}, {'match_no': 3, 'round': 1, 'team_a': {'San Diego State': 1}, 'team_b': {'College of Charleston': 1}, 'win_probabilities': {'San Diego State': 0.7501734935719745, 'College of Charleston': 0.2498265064280255}, 'expected_scores': {'San Diego State': 3.7508674678598726, 'College of Charleston': 2.997918077136306}}, {'match_no': 4, 'round': 1, 'team_a': {'Virginia': 1}, 'team_b': {'Furman': 1}, 'win_probabilities': {'Virginia': 0

Unnamed: 0,Teams,Final Win Probs,Expected Points
32,Houston,0.2203234,7.050349
0,Alabama,0.194584,6.226687
46,Texas,0.07493687,4.795959
16,Purdue,0.06934899,2.219168
58,Gonzaga,0.06451879,6.193804
48,Kansas,0.0569754,1.823213
14,Arizona,0.05029805,3.219075
54,Connecticut,0.04093687,5.239919
62,UCLA,0.03433916,2.197707
30,Marquette,0.03067709,1.963334


1.0000000000000004


In [169]:
initial_predictions = []

#get initial win-loss predictions based on round 1 win probs
games_in_round = [game for game in games_final if game["round"] == 1]
# print(f"Round {1}:", games_in_round)

for match in games_in_round:
    # print(match)
    for team in match["win_probabilities"]:
        if match["round"] == 1:
            if match["win_probabilities"][team] > 0.5:
                greater_score_team = team
                # print(f"{team} is the winner")
            else:
                lesser_score_team = team
                # print(f"{team} is the loser")
    game_winner_loser = {
        "winner" : greater_score_team,
        "loser" : lesser_score_team,
    }
    initial_predictions.append(game_winner_loser)

print("initial predictions round 1: ", initial_predictions)

def calculateStartIndex(round_no):
    if round_no == 2:
        return 0
    else:
        return 2**(8-round_no) + calculateStartIndex(round_no - 1)

def getSubsequentRoundPreds(round_no, count, games_final, initial_predictions):
    if round_no == 7:
        return initial_predictions
    
    start_index = calculateStartIndex(round_no)
    # print("start index: ", start_index)

    # counter = count
    # print("counter: ", counter)

    games_in_round = [game for game in games_final if game["round"] == round_no]
    # print(f"Round {round_no}:", games_in_round)

    for match in games_in_round:
        # print("match no: ", match["match_no"])
        probs_of_teams_winning_current_round = match["win_probabilities"]

        if count < start_index + 2 ** (8 - round_no):
            team1 = initial_predictions[count]["winner"]
            # print(team1)
            team2 = initial_predictions[count+1]["winner"]
            # print(team2)

            if team1 in probs_of_teams_winning_current_round and team2 in probs_of_teams_winning_current_round:
                # print("both teams in probs dict")
                if probs_of_teams_winning_current_round[team1] > probs_of_teams_winning_current_round[team2]:
                    greater_score_team = team1
                    lesser_score_team = team2
                else:
                    greater_score_team = team2
                    lesser_score_team = team1

                game_winner_loser = {
                    "winner" : greater_score_team,
                    "loser" : lesser_score_team,
                }
                initial_predictions.append(game_winner_loser)
        count += 2
    return getSubsequentRoundPreds(round_no + 1, count, games_final, initial_predictions)

print("predictions for all following rounds: ", getSubsequentRoundPreds(2, 0, games_final, initial_predictions))
print(len(initial_predictions))
            

initial predictions round 1:  [{'winner': 'Alabama', 'loser': 'Texas A&M-Corpus Christi'}, {'winner': 'Maryland', 'loser': 'West Virginia'}, {'winner': 'San Diego State', 'loser': 'College of Charleston'}, {'winner': 'Virginia', 'loser': 'Furman'}, {'winner': 'Creighton', 'loser': 'North Carolina State'}, {'winner': 'Baylor', 'loser': 'UC-Santa Barbara'}, {'winner': 'Utah State', 'loser': 'Missouri'}, {'winner': 'Arizona', 'loser': 'Princeton'}, {'winner': 'Purdue', 'loser': 'Fairleigh Dickinson'}, {'winner': 'Memphis', 'loser': 'Florida Atlantic'}, {'winner': 'Duke', 'loser': 'Oral Roberts'}, {'winner': 'Tennessee', 'loser': 'Louisiana-Lafayette'}, {'winner': 'Kentucky', 'loser': 'Providence'}, {'winner': 'Kansas State', 'loser': 'Montana State'}, {'winner': 'Michigan State', 'loser': 'Southern California'}, {'winner': 'Marquette', 'loser': 'Vermont'}, {'winner': 'Houston', 'loser': 'Northern Kentucky'}, {'winner': 'Auburn', 'loser': 'Iowa'}, {'winner': 'Miami (FL)', 'loser': 'Drake'}