In [3]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

### 1) Generate Our Bracket Predictions

In [4]:
# read in the dataset
teams = pd.read_csv("datasets/kaggle_data/Teams.csv")
seeds = pd.read_csv("datasets/kaggle_data/TourneySeeds.csv")
slots = pd.read_csv("datasets/kaggle_data/TourneySlots.csv")

In [5]:
seeds.head()

Unnamed: 0,Season,Seed,Team
0,1985,W01,1207
1,1985,W02,1210
2,1985,W03,1228
3,1985,W04,1260
4,1985,W05,1374


In [6]:
slots.head()

Unnamed: 0,Season,Slot,Strongseed,Weakseed
0,1985,R1W1,W01,W16
1,1985,R1W2,W02,W15
2,1985,R1W3,W03,W14
3,1985,R1W4,W04,W13
4,1985,R1W5,W05,W12


In [3]:
# filtration function
def filter_season(data, season):
    return data[data["Season"] == season]
    
def filter_into_seasons(data):
    # buffer to hold list of seasons
    season_arr = []
    
    # min and max
    max_season = data["Season"].max()
    min_season = data["Season"].min()
    
    # filter
    for season in range(min_season, max_season + 1):
        season_arr.append(filter_season(data, season))
        
    return season_arr

In [4]:
seeds_arr = filter_into_seasons(seeds)

In [122]:
slots_arr = filter_into_seasons(slots)

In [487]:
# tournament class
class Tournament(object):
    # init function
    def __init__(self, seeds, slots, model):
        self.seeds = seeds
        self.slots = slots
        self.model = model
        
        games = []
       
        round_1_slots = slots[slots["Slot"].str.contains("R1")]
        
        # generate first round games
        for index, slot in round_1_slots.iloc[:32, :].iterrows():
            # get seeds
            team_1_seed = slot["Strongseed"]
            team_2_seed = slot["Weakseed"] 

            # teams
            team_1 = seeds.loc[seeds["Seed"] == team_1_seed, "TeamID"].values[0]
            team_2 = seeds.loc[seeds["Seed"] == team_2_seed, "TeamID"].values[0]
            
            # predict winner under our model
            cur_game_pred_team = self.model.predict(team_1, team_2)

            # predict winner seed under our model
            if cur_game_pred_team ==  team_1:
                cur_game_pred_seed = team_1_seed
            else:
                cur_game_pred_seed = team_2_seed

            # append games
            games.append((slot["Slot"], 
                          team_1_seed, 
                          team_1, 
                          team_2_seed, 
                          team_2, 
                          cur_game_pred_team, 
                          cur_game_pred_seed))

        # convert to datafram
        self.round_1_df = pd.DataFrame(data=np.array(games), 
                                       columns=["Slot", 
                                                "Strongseed", 
                                                "Strongseed Team", 
                                                "Weakseed", 
                                                "Weekseed Team", 
                                                "Prediction", 
                                                "Prediction Seed"])
        
        self.round_2_df = pd.DataFrame()
        self.round_3_df = pd.DataFrame()
        self.round_4_df = pd.DataFrame()
        self.round_5_df = pd.DataFrame()
        self.round_6_df = pd.DataFrame()
            
    
    # run a particular round
    def generate_round_games(self, round_n):
        games = []
        
        n_games_in_prev_round = {2: 32, 3: 16, 4: 8, 5:4, 6:2}
        
        prev_round_df_dic = {2: self.round_1_df,
                         3: self.round_2_df,
                         4: self.round_3_df,
                         5: self.round_4_df,
                         6: self.round_5_df}
    
        # slots of previous round
        round_n_slots = self.slots[self.slots["Slot"].str.contains("R{}".format(round_n))]
        
        # prev round df
        prev_round_df = prev_round_df_dic.get(round_n)
        
        # generate first round games
        for index, slot in round_n_slots.iloc[:n_games_in_prev_round.get(round_n), :].iterrows():
            # get seeds
            team_1_seed = slot["StrongSeed"]
            team_2_seed = slot["WeakSeed"]
            

            # teams
            team_1 = prev_round_df.loc[prev_round_df["Slot"] == team_1_seed, "Prediction"].values[0]
            team_2 = prev_round_df.loc[prev_round_df["Slot"] == team_2_seed, "Prediction"].values[0]

            # predict winner under our model
            cur_game_pred_team = self.model.predict(team_1, team_2)

            # predict winner seed under our model
            if cur_game_pred_team ==  team_1:
                cur_game_pred_seed = team_1_seed
            else:
                cur_game_pred_seed = team_2_seed

            # append games
            games.append((slot["Slot"], 
                          team_1_seed, 
                          team_1, 
                          team_2_seed, 
                          team_2, 
                          cur_game_pred_team, 
                          cur_game_pred_seed))

        # convert to datafram
        cur_round_df = pd.DataFrame(data=np.array(games), 
                                       columns=["Slot", 
                                                "StrongSeed", 
                                                "Strongseed Team", 
                                                "WeakSeed", 
                                                "Weekseed Team", 
                                                "Prediction", 
                                                "Prediction Seed"])
        
        if round_n == 2:
            self.round_2_df = cur_round_df
        elif round_n == 3:
            self.round_3_df = cur_round_df
        elif round_n == 4:
            self.round_4_df = cur_round_df
        elif round_n == 5:
            self.round_5_df = cur_round_df
        elif round_n == 6:
            self.round_6_df = cur_round_df  
     
    # simulate an entire tournament
    def simulate_tournament(self):  
        for n in range(2,7):
            self.generate_round_games(n)
            
        self.entire_bracket = pd.concat([self.round_1_df, 
                                              self.round_2_df,
                                              self.round_3_df,
                                              self.round_4_df,
                                              self.round_5_df,
                                              self.round_6_df])
        self.entire_bracket.reset_index(inplace = True, drop=True)
        

In [488]:
class Predictor_Basic(object):
    # init function
    def __init__(self):
        return
    
    def predict(self, team_1, team_2):
        return team_2

In [489]:
seeds_1985 = seeds_arr[0]
slots_1985 = slots_arr[0]

In [490]:
# test
tourney_1985 = Tournament(seeds_1985, slots_1985, Predictor_dBasic())
tourney_1985.simulate_tournament()

In [491]:
tourney_1985.entire_bracket

Unnamed: 0,Slot,Strongseed,Strongseed Team,Weakseed,Weekseed Team,Prediction,Prediction Seed
0,R1W1,W01,1207,W16,1250,1250,W16
1,R1W2,W02,1210,W15,1273,1273,W15
2,R1W3,W03,1228,W14,1318,1318,W14
3,R1W4,W04,1260,W13,1233,1233,W13
4,R1W5,W05,1374,W12,1330,1330,W12
5,R1W6,W06,1208,W11,1455,1455,W11
6,R1W7,W07,1393,W10,1177,1177,W10
7,R1W8,W08,1396,W09,1439,1439,W09
8,R1X1,X01,1385,X16,1380,1380,X16
9,R1X2,X02,1433,X15,1267,1267,X15


In [377]:
tourney_1985.round_2_df.head()

Unnamed: 0,Slot,Strongseed,Strongseed Team,Weakseed,Weekseed Team,Prediction,Prediction Seed
0,R2W1,R1W1,1250,R1W8,1439,1439,R1W8
1,R2W2,R1W2,1273,R1W7,1177,1177,R1W7
2,R2W3,R1W3,1318,R1W6,1455,1455,R1W6
3,R2W4,R1W4,1233,R1W5,1330,1330,R1W5
4,R2X1,R1X1,1380,R1X8,1116,1116,R1X8


In [378]:
tourney_1985.round_3_df.head()

Unnamed: 0,Slot,Strongseed,Strongseed Team,Weakseed,Weekseed Team,Prediction,Prediction Seed
0,R3W1,R2W1,1439,R2W4,1330,1330,R2W4
1,R3W2,R2W2,1177,R2W3,1455,1455,R2W3
2,R3X1,R2X1,1116,R2X4,1246,1246,R2X4
3,R3X2,R2X2,1112,R2X3,1431,1431,R2X3
4,R3Y1,R2Y1,1229,R2Y4,1338,1338,R2Y4


In [379]:
tourney_1985.round_4_df.head()

Unnamed: 0,Slot,Strongseed,Strongseed Team,Weakseed,Weekseed Team,Prediction,Prediction Seed
0,R4W1,R3W1,1330,R3W2,1455,1455,R3W2
1,R4X1,R3X1,1246,R3X2,1431,1431,R3X2
2,R4Y1,R3Y1,1338,R3Y2,1130,1130,R3Y2
3,R4Z1,R3Z1,1275,R3Z2,1120,1120,R3Z2


In [380]:
tourney_1985.round_5_df.head()

Unnamed: 0,Slot,Strongseed,Strongseed Team,Weakseed,Weekseed Team,Prediction,Prediction Seed
0,R5WX,R4W1,1455,R4X1,1431,1431,R4X1
1,R5YZ,R4Y1,1130,R4Z1,1120,1120,R4Z1


In [374]:
tourney_1985.round_6_df.head()

Unnamed: 0,Slot,Strongseed,Strongseed Team,Weakseed,Weekseed Team,Prediction,Prediction Seed
0,R6CH,R5WX,1431,R5YZ,1120,1120,R5YZ


---

### 2) Generate Actual Bracket

In [383]:
tourney_games = pd.read_csv("datasets/kaggle_data/TourneyCompactResults.csv")

In [384]:
tourney_games.head()

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
0,1985,136,1116,63,1234,54,N,0
1,1985,136,1120,59,1345,58,N,0
2,1985,136,1207,68,1250,43,N,0
3,1985,136,1229,58,1425,55,N,0
4,1985,136,1242,49,1325,38,N,0


In [492]:
tourney_arr = filter_into_seasons(tourney_games)

In [493]:
tourney_1985 = tourney_arr[0]
seeds_1985 = seeds_arr[0]

In [494]:
class ActualTournament(object):
    # init function
    def __init__(self, data):
        self.tourney = data
        return
    
    def predict(self, team_1, team_2):
        game_played_team_1_win = self.tourney[(self.tourney["Wteam"] == int(team_1)) & (self.tourney["Lteam"] == int(team_2))]
        game_played_team_2_win = self.tourney[(self.tourney["Lteam"] == int(team_1)) & (self.tourney["Wteam"] == int(team_2))]
        
        if game_played_team_1_win.shape[0] == 1:
            return team_1
        elif game_played_team_2_win.shape[0] == 1:
            return team_2
        else:
            print "Error"
            return -1

In [495]:
actual_1985_tourney = Tournament(seeds_1985, slots_1985, ActualTournament(tourney_1985))

In [496]:
actual_1985_tourney.simulate_tournament()

In [497]:
actual_1985_tourney.entire_bracket

Unnamed: 0,Slot,Strongseed,Strongseed Team,Weakseed,Weekseed Team,Prediction,Prediction Seed
0,R1W1,W01,1207,W16,1250,1207,W01
1,R1W2,W02,1210,W15,1273,1210,W02
2,R1W3,W03,1228,W14,1318,1228,W03
3,R1W4,W04,1260,W13,1233,1260,W04
4,R1W5,W05,1374,W12,1330,1374,W05
5,R1W6,W06,1208,W11,1455,1208,W06
6,R1W7,W07,1393,W10,1177,1393,W07
7,R1W8,W08,1396,W09,1439,1396,W08
8,R1X1,X01,1385,X16,1380,1385,X01
9,R1X2,X02,1433,X15,1267,1433,X02
