In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
lameness_setting = 1.6
# float suggested to be between 1 and ~6
# a lower number will result in more upsets, a higher number will result in fewer upsets

In [3]:
seeds = pd.read_csv('seed_win_likelihoods.csv')
kenpom = pd.read_csv('NCAA2021_Kenpom.csv')
tournament = pd.read_csv('tournament.csv')

In [4]:
tournament = tournament.dropna(how='all')
tournament = tournament.drop('Unnamed: 0', axis=1)
tournament.columns = ['seed', 'team']

In [5]:
tournament.head()

Unnamed: 0,seed,team
0,1.0,Gonzaga
2,16.0,Appalachian St
5,8.0,Oklahoma
7,9.0,Missouri
10,5.0,Creighton


In [6]:
tournament['team'] = tournament['team'].str.replace('.', '', regex=True)
kenpom['TeamName'] = kenpom['TeamName'].str.replace('.', '', regex=True)
kenpom['team'] = kenpom['team'].str.replace('.', '', regex=True)

In [7]:
kenpom_2021 = kenpom[kenpom['Season'] == 2021]

In [8]:
seeds.columns = ['seed', '1', '2', '3', '4', '5', '6']

In [9]:
rank_avg = kenpom.groupby('ncaa_seed').mean()['rank']

In [10]:
def generate_winners(tournament, game):
    '''
    Parameters
    tournament: the remaining teams in the tournament (ordered such that the 
    first two rows are playing each other, and so on)
    n_game: what game # it is (i.e. 1 for the round of 64, 2 for the round of 32, etc.)
    
    Returns
    tournament: the remaining teams after this round
    '''
    
    winner_inds = []
    game = str(game)

    for i in range(int(len(tournament) / 2)):
        team_1 = tournament.iloc[i * 2]['team']
        team_2 = tournament.iloc[i * 2 + 1]['team']

        team_1_seed = tournament.iloc[i * 2]['seed']
        team_2_seed = tournament.iloc[i * 2 + 1]['seed']

        team_1_kenpom_rank = kenpom_2021.loc[((kenpom_2021['TeamName'] == team_1) | (kenpom_2021['team'] == team_1))]['rank'].iloc[0]
        team_2_kenpom_rank = kenpom_2021.loc[((kenpom_2021['TeamName'] == team_2) | (kenpom_2021['team'] == team_2))]['rank'].iloc[0]

        # base odds that first team wins = based on game # and seeds
        team_1_wins = seeds.loc[seeds['seed'] == team_1_seed][game].iloc[0]
        team_2_wins = seeds.loc[seeds['seed'] == team_2_seed][game].iloc[0]

        # adjust up/down based on kenpom rating compared to avg kenpom rating for that seed
        team_1_diff = (rank_avg.loc[team_1_seed] - team_1_kenpom_rank) ** 1/3 * np.sign(rank_avg.loc[team_2_seed] - team_2_kenpom_rank)
        team_2_diff = (rank_avg.loc[team_2_seed] - team_2_kenpom_rank) ** 1/3 * np.sign(rank_avg.loc[team_2_seed] - team_2_kenpom_rank)

        team_1_chance = (team_1_wins + (team_1_diff / (lameness_setting ** (int(game) + 1) * team_1_seed))) * 100
        team_2_chance = (team_2_wins + (team_2_diff / (lameness_setting ** (int(game) + 1) * team_2_seed))) * 100

        n = int(team_1_chance + team_2_chance)
        y = random.random() * n

        if y <= team_1_chance:
            print(team_1 + ' wins, ' + team_2 + ' loses')
            winner_inds.append(i * 2)
        else:
            winner_inds.append(i * 2 + 1)
            print(team_2 + ' wins, ' + team_1 + ' loses')
            
    return tournament.iloc[winner_inds]

In [11]:
for n in range(1, 7):
    print('ROUND ' + str(n))
    tournament = generate_winners(tournament, n)
    print()

ROUND 1
Gonzaga wins, Appalachian St loses
Missouri wins, Oklahoma loses
Creighton wins, UC Santa Barbara loses
Virginia wins, Ohio loses
USC wins, Drake loses
Eastern Washington wins, Kansas loses
Oregon wins, VCU loses
Iowa wins, Grand Canyon loses
Michigan wins, Texas Southern loses
LSU wins, St Bonaventure loses
Colorado wins, Georgetown loses
UNC Greensboro wins, Florida St loses
Michigan St wins, BYU loses
Texas wins, Abilene Christian loses
Connecticut wins, Maryland loses
Alabama wins, Iona loses
Baylor wins, Hartford loses
North Carolina wins, Wisconsin loses
Winthrop wins, Villanova loses
North Texas wins, Purdue loses
Utah St wins, Texas Tech loses
Colgate wins, Arkansas loses
Florida wins, Virginia Tech loses
Ohio St wins, Oral Roberts loses
Illinois wins, Drexel loses
Loyola Chicago wins, Georgia Tech loses
Tennessee wins, Oregon St loses
Oklahoma St wins, Liberty loses
San Diego St wins, Syracuse loses
West Virginia wins, Morehead St loses
Rutgers wins, Clemson loses
Hous