In [1]:
import pandas as pd
import xgboost as xgb
import numpy as np
import datetime as dt
import random
from sportsreference.ncaab.teams import Teams
from sklearn.preprocessing import scale
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
teams_df = pd.DataFrame()
i = 2010
while i <=2018:
    print("Downloading data for the", i, "season.")
    start = dt.datetime.now()
    teams = Teams(year = i)
    end = dt.datetime.now()
    teams_temp = teams.dataframes
    teams_temp['Season'] = i
    teams_df = pd.concat([teams_df, teams_temp])
    span = end - start
    print(i, "took", span, "seconds to download.")
    i += 1

Downloading data for the 2010 season.
2010 took 0:01:08.229259 seconds to download.
Downloading data for the 2011 season.
2011 took 0:01:09.568548 seconds to download.
Downloading data for the 2012 season.
2012 took 0:00:44.798424 seconds to download.
Downloading data for the 2013 season.
2013 took 0:00:28.536256 seconds to download.
Downloading data for the 2014 season.
2014 took 0:00:27.211948 seconds to download.
Downloading data for the 2015 season.
2015 took 0:00:29.511849 seconds to download.
Downloading data for the 2016 season.
2016 took 0:00:44.063620 seconds to download.
Downloading data for the 2017 season.
2017 took 0:00:36.770692 seconds to download.
Downloading data for the 2018 season.
2018 took 0:00:29.280486 seconds to download.


In [3]:
teams_df.head(10)

Unnamed: 0,abbreviation,assist_percentage,assists,away_losses,away_wins,block_percentage,blocks,conference,conference_losses,conference_wins,...,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_field_goal_attempts,two_point_field_goal_percentage,two_point_field_goals,win_percentage,wins,Season
AIR-FORCE,AIR-FORCE,61.6,389,10,0,6.2,51,mwc,15,1,...,851,0.529,19.0,394,866,0.527,456,0.323,10,2010
AKRON,AKRON,53.9,473,4,7,8.5,114,mac,4,12,...,1269,0.521,16.4,464,1330,0.483,642,0.686,24,2010
ALABAMA-AM,ALABAMA-AM,48.1,288,9,3,12.7,143,swac,10,8,...,1004,0.463,18.8,444,1197,0.41,491,0.407,11,2010
ALABAMA-BIRMINGHAM,ALABAMA-BIRMINGHAM,51.1,390,4,9,7.3,91,cusa,5,11,...,1248,0.518,17.0,451,1237,0.474,586,0.735,25,2010
ALABAMA-STATE,ALABAMA-STATE,60.0,400,11,6,11.1,129,swac,6,12,...,1101,0.499,20.0,501,1064,0.448,477,0.516,16,2010
ALABAMA,ALABAMA,54.2,429,6,3,10.9,127,sec,10,6,...,1126,0.525,16.4,410,1294,0.476,616,0.531,17,2010
ALBANY-NY,ALBANY-NY,53.5,380,15,3,8.2,97,america-east,14,2,...,1156,0.503,20.6,517,1151,0.448,516,0.219,7,2010
ALCORN-STATE,ALCORN-STATE,41.3,269,18,0,7.4,98,swac,16,2,...,1065,0.456,23.7,642,1280,0.403,516,0.065,2,2010
AMERICAN,AMERICAN,65.6,447,12,4,6.6,81,patriot,7,7,...,1097,0.518,19.6,458,1069,0.465,497,0.355,11,2010
APPALACHIAN-STATE,APPALACHIAN-STATE,46.9,447,8,8,7.7,102,southern,5,13,...,1424,0.581,18.6,544,1303,0.518,675,0.649,24,2010


In [4]:
team_names = teams_df[['abbreviation']]

In [5]:
team_names.head()

Unnamed: 0,abbreviation
AIR-FORCE,AIR-FORCE
AKRON,AKRON
ALABAMA-AM,ALABAMA-AM
ALABAMA-BIRMINGHAM,ALABAMA-BIRMINGHAM
ALABAMA-STATE,ALABAMA-STATE


In [6]:
#Only needed this on first run
#team_names.to_csv(r'data/team_names.csv')
#teams_df.to_csv(r'data/team_data.csv')

In [7]:
games = pd.read_csv(r"C:\Users\bdraus\Documents\Python Scripts\Practice\TourneyCompactResults.csv")

In [8]:
games = games[['Season','WTeamID','LTeamID']]
games['Winner'] = games['WTeamID']
games = games.rename(columns = {'WTeamID' : 'Team_A', 'LTeamID': 'Team_B'})
games.head()

Unnamed: 0,Season,Team_A,Team_B,Winner
0,1985,1116,1234,1116
1,1985,1120,1345,1120
2,1985,1207,1250,1207
3,1985,1229,1425,1229
4,1985,1242,1325,1242


In [9]:
i = 0
games['Team_X'] = 0
games['Team_Y'] = 0
while i < len(games):
    games['Team_X'][i] = random.choice(([games['Team_A'][i],games['Team_B'][i]]))
    i += 1
games['Team_Y'] = np.where(games['Team_X'] == games['Team_A'], games['Team_B'], games['Team_A'])
games['Result'] = np.where(games['Winner'] == games['Team_X'], 'X', 'Y')
games = games.drop(columns = ['Team_A','Team_B'], axis = 1)

In [10]:
games.head(25)

Unnamed: 0,Season,Winner,Team_X,Team_Y,Result
0,1985,1116,1234,1116,Y
1,1985,1120,1345,1120,Y
2,1985,1207,1207,1250,X
3,1985,1229,1229,1425,X
4,1985,1242,1325,1242,Y
5,1985,1246,1246,1449,X
6,1985,1256,1256,1338,X
7,1985,1260,1233,1260,Y
8,1985,1314,1292,1314,Y
9,1985,1323,1333,1323,Y


In [11]:
team_data = teams_df[['Season','abbreviation','assist_percentage','block_percentage','effective_field_goal_percentage','field_goal_percentage','free_throw_attempt_rate','free_throw_percentage','free_throws_per_field_goal_attempt','net_rating','offensive_rating','offensive_rebound_percentage','opp_assist_percentage','opp_block_percentage','opp_effective_field_goal_percentage','opp_field_goal_percentage','opp_free_throw_attempt_rate','opp_free_throw_percentage','opp_free_throws_per_field_goal_attempt','opp_offensive_rating','opp_offensive_rebound_percentage','opp_steal_percentage','opp_three_point_attempt_rate','opp_three_point_field_goal_percentage','opp_total_rebound_percentage','opp_true_shooting_percentage','opp_two_point_field_goal_percentage','pace','simple_rating_system','steal_percentage','strength_of_schedule','three_point_attempt_rate','three_point_field_goal_percentage','total_rebound_percentage','true_shooting_percentage','turnover_percentage','two_point_field_goal_percentage','win_percentage']]

In [12]:
games = games.loc[games['Season'] >= 2010]
games.head()

Unnamed: 0,Season,Winner,Team_X,Team_Y,Result
1584,2010,1115,1457,1115,Y
1585,2010,1124,1124,1358,X
1586,2010,1139,1139,1431,X
1587,2010,1140,1196,1140,Y
1588,2010,1242,1250,1242,Y


In [13]:
team_abb = pd.read_csv(r"C:\Users\bdraus\Documents\Python Scripts\Practice\team_names.csv")

In [14]:
team_abb = team_abb[['abbreviation','Team_Id']]

In [15]:
team_data_abb = team_data.merge(team_abb, left_on = 'abbreviation', right_on = 'abbreviation', how = 'left').dropna()
team_data_abb['Team_Id'] = team_data_abb['Team_Id'].astype(np.int64)

In [16]:
team_data_abb.head()

Unnamed: 0,Season,abbreviation,assist_percentage,block_percentage,effective_field_goal_percentage,field_goal_percentage,free_throw_attempt_rate,free_throw_percentage,free_throws_per_field_goal_attempt,net_rating,...,steal_percentage,strength_of_schedule,three_point_attempt_rate,three_point_field_goal_percentage,total_rebound_percentage,true_shooting_percentage,turnover_percentage,two_point_field_goal_percentage,win_percentage,Team_Id
0,2010,AIR-FORCE,61.6,6.2,0.504,0.443,0.367,0.635,0.233,-10.0,...,8.4,3.13,0.394,0.313,46.8,0.529,19.0,0.527,0.323,1102
1,2010,AKRON,53.9,8.5,0.491,0.433,0.363,0.657,0.239,7.6,...,9.0,-1.5,0.343,0.339,51.6,0.521,16.4,0.483,0.686,1103
2,2010,ALABAMA-AM,48.1,12.7,0.416,0.382,0.474,0.635,0.301,-5.2,...,12.5,-13.71,0.237,0.291,46.8,0.463,18.8,0.41,0.407,1105
3,2010,ALABAMA-BIRMINGHAM,51.1,7.3,0.471,0.422,0.457,0.694,0.317,10.0,...,10.0,2.9,0.315,0.311,53.6,0.518,17.0,0.474,0.735,1412
4,2010,ALABAMA-STATE,60.0,11.1,0.462,0.404,0.448,0.641,0.287,-2.1,...,10.8,-12.02,0.356,0.324,51.3,0.499,20.0,0.448,0.516,1106


In [17]:
games_a = games.merge(team_data_abb, left_on = ['Team_X','Season'], right_on = ['Team_Id','Season'], how = 'left')
games_b = games_a.merge(team_data_abb, left_on = ['Team_Y','Season'], right_on = ['Team_Id','Season'], how = 'left')
games_b = games_b.dropna()
print(games_b.head(15))

    Season  Winner  Team_X  Team_Y Result        abbreviation_x  \
0     2010    1115    1457    1115      Y              WINTHROP   
1     2010    1124    1124    1358      X                BAYLOR   
2     2010    1139    1139    1431      X                BUTLER   
3     2010    1140    1196    1140      Y               FLORIDA   
4     2010    1242    1250    1242      Y                LEHIGH   
5     2010    1243    1317    1243      Y           NORTH-TEXAS   
6     2010    1246    1190    1246      Y  EAST-TENNESSEE-STATE   
7     2010    1293    1293    1435      X          MURRAY-STATE   
8     2010    1307    1285    1307      Y               MONTANA   
9     2010    1320    1320    1424      X         NORTHERN-IOWA   
10    2010    1325    1325    1207      X                  OHIO   
11    2010    1330    1330    1323      X          OLD-DOMINION   
12    2010    1388    1350    1388      Y              RICHMOND   
13    2010    1397    1397    1361      X             TENNESSE

In [18]:
games_b['Winner'] = np.where(games_b['Winner'] == games_b['Team_X'], games_b['abbreviation_x'], games_b['abbreviation_y'])
games_b['Team_X'] = games_b['abbreviation_x']
games_b['Team_Y'] = games_b['abbreviation_y']

In [19]:
games_b.dtypes

Season                                        int64
Winner                                       object
Team_X                                       object
Team_Y                                       object
Result                                       object
abbreviation_x                               object
assist_percentage_x                         float64
block_percentage_x                          float64
effective_field_goal_percentage_x           float64
field_goal_percentage_x                     float64
free_throw_attempt_rate_x                   float64
free_throw_percentage_x                     float64
free_throws_per_field_goal_attempt_x        float64
net_rating_x                                float64
offensive_rating_x                          float64
offensive_rebound_percentage_x              float64
opp_assist_percentage_x                     float64
opp_block_percentage_x                      float64
opp_effective_field_goal_percentage_x       float64
opp_field_go

In [20]:
print(games_b.head(15))

    Season               Winner                Team_X               Team_Y  \
0     2010  ARKANSAS-PINE-BLUFF              WINTHROP  ARKANSAS-PINE-BLUFF   
1     2010               BAYLOR                BAYLOR    SAM-HOUSTON-STATE   
2     2010               BUTLER                BUTLER        TEXAS-EL-PASO   
3     2010        BRIGHAM-YOUNG               FLORIDA        BRIGHAM-YOUNG   
4     2010               KANSAS                LEHIGH               KANSAS   
5     2010         KANSAS-STATE           NORTH-TEXAS         KANSAS-STATE   
6     2010             KENTUCKY  EAST-TENNESSEE-STATE             KENTUCKY   
7     2010         MURRAY-STATE          MURRAY-STATE           VANDERBILT   
8     2010           NEW-MEXICO               MONTANA           NEW-MEXICO   
9     2010        NORTHERN-IOWA         NORTHERN-IOWA     NEVADA-LAS-VEGAS   
10    2010                 OHIO                  OHIO           GEORGETOWN   
11    2010         OLD-DOMINION          OLD-DOMINION           

In [21]:
ml_input = games_b.drop(columns = ['Season','Winner','Team_X','Team_Y','abbreviation_x','abbreviation_y','Team_Id_x','Team_Id_y','win_percentage_x','win_percentage_y'], axis = 1)
ml_input.dtypes

Result                                       object
assist_percentage_x                         float64
block_percentage_x                          float64
effective_field_goal_percentage_x           float64
field_goal_percentage_x                     float64
free_throw_attempt_rate_x                   float64
free_throw_percentage_x                     float64
free_throws_per_field_goal_attempt_x        float64
net_rating_x                                float64
offensive_rating_x                          float64
offensive_rebound_percentage_x              float64
opp_assist_percentage_x                     float64
opp_block_percentage_x                      float64
opp_effective_field_goal_percentage_x       float64
opp_field_goal_percentage_x                 float64
opp_free_throw_attempt_rate_x               float64
opp_free_throw_percentage_x                 float64
opp_free_throws_per_field_goal_attempt_x    float64
opp_offensive_rating_x                      float64
opp_offensiv

In [22]:
X_all = ml_input.drop(['Result'],1)
y_all = ml_input['Result']

In [23]:
cols = [['assist_percentage_x','block_percentage_x','effective_field_goal_percentage_x','field_goal_percentage_x','free_throw_attempt_rate_x','free_throw_percentage_x','free_throws_per_field_goal_attempt_x','net_rating_x','offensive_rating_x','offensive_rebound_percentage_x','opp_assist_percentage_x','opp_block_percentage_x','opp_effective_field_goal_percentage_x','opp_field_goal_percentage_x','opp_free_throw_attempt_rate_x','opp_free_throw_percentage_x','opp_free_throws_per_field_goal_attempt_x','opp_offensive_rating_x','opp_offensive_rebound_percentage_x','opp_steal_percentage_x','opp_three_point_attempt_rate_x','opp_three_point_field_goal_percentage_x','opp_total_rebound_percentage_x','opp_true_shooting_percentage_x','opp_two_point_field_goal_percentage_x','pace_x','simple_rating_system_x','steal_percentage_x','strength_of_schedule_x','three_point_attempt_rate_x','three_point_field_goal_percentage_x','total_rebound_percentage_x','true_shooting_percentage_x','turnover_percentage_x','two_point_field_goal_percentage_x','assist_percentage_y','block_percentage_y','effective_field_goal_percentage_y','field_goal_percentage_y','free_throw_attempt_rate_y','free_throw_percentage_y','free_throws_per_field_goal_attempt_y','net_rating_y','offensive_rating_y','offensive_rebound_percentage_y','opp_assist_percentage_y','opp_block_percentage_y','opp_effective_field_goal_percentage_y','opp_field_goal_percentage_y','opp_free_throw_attempt_rate_y','opp_free_throw_percentage_y','opp_free_throws_per_field_goal_attempt_y','opp_offensive_rating_y','opp_offensive_rebound_percentage_y','opp_steal_percentage_y','opp_three_point_attempt_rate_y','opp_three_point_field_goal_percentage_y','opp_total_rebound_percentage_y','opp_true_shooting_percentage_y','opp_two_point_field_goal_percentage_y','pace_y','simple_rating_system_y','steal_percentage_y','strength_of_schedule_y','three_point_attempt_rate_y','three_point_field_goal_percentage_y','total_rebound_percentage_y','true_shooting_percentage_y','turnover_percentage_y','two_point_field_goal_percentage_y']]
for col in cols:
    X_all[col] = scale(X_all[col])

In [24]:
len(X_all)

533

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_all, y_all,
                                                   stratify = y_all)

In [26]:
#for measuring training time
from time import time 
# F1 score (also F-score or F-measure) is a measure of a test's accuracy. 
#It considers both the precision p and the recall r of the test to compute 
#the score: p is the number of correct positive results divided by the number of 
#all positive results, and r is the number of correct positive results divided by 
#the number of positive results that should have been returned. The F1 score can be 
#interpreted as a weighted average of the precision and recall, where an F1 score 
#reaches its best value at 1 and worst at 0.
from sklearn.metrics import f1_score

def train_classifier(clf, X_train, y_train):
    ''' Fits a classifier to the training data. '''
    
    # Start the clock, train the classifier, then stop the clock
    start = time()
    clf.fit(X_train, y_train)
    end = time()
    time_taken = end - start
    
    # Print the results
    print('Trained model', clf.__class__.__name__, 'in', time_taken, ' seconds')

    
def predict_labels(clf, features, target):
    ''' Makes predictions using a fit classifier based on F1 score. '''
    
    # Start the clock, make predictions, then stop the clock
    start = time()
    y_pred = clf.predict(features)
    
    end = time()
    # Print and return results
    time_taken = end - start
    print("Made predictions in", time_taken, "seconds.")
    
    return f1_score(target, y_pred, pos_label='X'), sum(target == y_pred) / float(len(y_pred))


def train_predict(clf, X_train, y_train, X_test, y_test):
    ''' Train and predict using a classifer based on F1 score. '''
    
    # Indicate the classifier and the training set size
    #print("Training something using a training set size of %d. . .").format(len(X_train))
    
    # Train the classifier
    train_classifier(clf, X_train, y_train)
    
    # Print the results of prediction for both training and testing
    f1, acc = predict_labels(clf, X_train, y_train)
    print("F1 score and accuracy score for training set:", f1, " , ", acc)
    
    f1, acc = predict_labels(clf, X_test, y_test)
    print("F1 score and accuracy score for test set:", f1, " , ", acc)

In [27]:
clf_A = LogisticRegression(random_state = 64)
clf_B = SVC(random_state = 64, kernel = 'rbf')
clf_C = xgb.XGBClassifier(seed = 64)

train_predict(clf_A, X_train, y_train, X_test, y_test)
print('')
train_predict(clf_B, X_train, y_train, X_test, y_test)
print('')
train_predict(clf_C, X_train, y_train, X_test, y_test)
print('')

Trained model LogisticRegression in 0.045624494552612305  seconds
Made predictions in 0.0010027885437011719 seconds.
F1 score and accuracy score for training set: 0.7641025641025642  ,  0.7694235588972431
Made predictions in 0.0 seconds.
F1 score and accuracy score for test set: 0.6370370370370371  ,  0.6343283582089553

Trained model SVC in 0.01804804801940918  seconds
Made predictions in 0.026586532592773438 seconds.
F1 score and accuracy score for training set: 0.9095744680851064  ,  0.9147869674185464
Made predictions in 0.014037132263183594 seconds.
F1 score and accuracy score for test set: 0.5625  ,  0.582089552238806

Trained model XGBClassifier in 0.2652130126953125  seconds
Made predictions in 0.009472846984863281 seconds.
F1 score and accuracy score for training set: 1.0  ,  1.0
Made predictions in 0.0025038719177246094 seconds.
F1 score and accuracy score for test set: 0.6616541353383459  ,  0.664179104477612



  if diff:
  if diff:


In [28]:
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import make_scorer


# TODO: Create the parameters list you wish to tune
parameters = { 'learning_rate' : [0.1],
               'n_estimators' : [40],
               'max_depth': [3],
               'min_child_weight': [3],
               'gamma':[0.4],
               'subsample' : [0.8],
               'colsample_bytree' : [0.8],
               'scale_pos_weight' : [1],
               'reg_alpha':[1e-5]
             }  

# TODO: Initialize the classifier
clf = xgb.XGBClassifier(seed=2)

# TODO: Make an f1 scoring function using 'make_scorer' 
f1_scorer = make_scorer(f1_score,pos_label='X')

# TODO: Perform grid search on the classifier using the f1_scorer as the scoring method
grid_obj = GridSearchCV(clf,
                        scoring=f1_scorer,
                        param_grid=parameters,
                        cv=5)

# TODO: Fit the grid search object to the training data and find the optimal parameters
grid_obj = grid_obj.fit(X_train,y_train)

# Get the estimator
clf = grid_obj.best_estimator_
print(clf)

# Report the final F1 score for training and testing after parameter tuning
f1, acc = predict_labels(clf, X_train, y_train)
print("F1 score and accuracy score for training set:", f1, " , ", acc)
    
f1, acc = predict_labels(clf, X_test, y_test)
print("F1 score and accuracy score for test set:", f1, " , ", acc)

  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.8, gamma=0.4, learning_rate=0.1,
       max_delta_step=0, max_depth=3, min_child_weight=3, missing=None,
       n_estimators=40, n_jobs=1, nthread=None,
       objective='binary:logistic', random_state=0, reg_alpha=1e-05,
       reg_lambda=1, scale_pos_weight=1, seed=2, silent=True,
       subsample=0.8)
Made predictions in 0.0040094852447509766 seconds.
F1 score and accuracy score for training set: 0.9198966408268734  ,  0.9223057644110275
Made predictions in 0.0019974708557128906 seconds.
F1 score and accuracy score for test set: 0.6323529411764707  ,  0.6268656716417911


  if diff:
  if diff:


In [29]:
X_all.head()

Unnamed: 0,assist_percentage_x,block_percentage_x,effective_field_goal_percentage_x,field_goal_percentage_x,free_throw_attempt_rate_x,free_throw_percentage_x,free_throws_per_field_goal_attempt_x,net_rating_x,offensive_rating_x,offensive_rebound_percentage_x,...,pace_y,simple_rating_system_y,steal_percentage_y,strength_of_schedule_y,three_point_attempt_rate_y,three_point_field_goal_percentage_y,total_rebound_percentage_y,true_shooting_percentage_y,turnover_percentage_y,two_point_field_goal_percentage_y
0,-1.553525,0.329605,-3.739824,-3.280079,-0.448014,-1.813924,-1.08064,-1.872149,-3.318104,0.119426,...,0.190034,-3.060074,-0.082088,-2.828226,-1.518109,-2.233258,0.867829,-2.533132,3.315223,-2.403302
1,-0.701879,2.333278,1.177715,1.306788,-0.490144,0.50281,-0.274387,0.643899,0.862411,1.245142,...,1.239993,-1.445131,0.58167,-1.911284,1.209209,0.306772,0.393335,0.41714,0.59339,0.470437
2,0.06668,-1.56673,-0.222168,-0.622268,1.848058,0.874631,2.201961,0.268836,-0.479957,-0.827199,...,0.977504,-0.240653,1.356054,-0.777339,-0.492802,-0.716822,-0.595195,-0.033596,0.714361,0.834201
3,-0.16181,-1.31627,-0.79648,-0.536533,-0.995701,-0.126427,-0.994256,-0.637566,-0.364896,0.861375,...,1.371238,0.588351,1.079488,-0.556082,-0.123691,2.088585,0.393335,1.605444,-0.555828,0.179425
4,0.876783,-1.49517,-0.078591,-0.107853,1.047593,0.302598,1.16535,-0.997002,-0.537487,-0.980705,...,0.846259,1.376981,1.134801,0.560843,-0.595332,1.595743,1.025994,1.03178,0.35145,0.579566


This section inputs all upsets from 2010-2018 and tests how the model performs

In [30]:
game_list = [[2017,'MIDDLE-TENNESSEE','MINNESOTA','MIDDLE-TENNESSEE'],
[2016,'ARKANSAS-LITTLE-ROCK','PURDUE','ARKANSAS-LITTLE-ROCK'],
[2016,'YALE','BAYLOR','YALE'],
[2014,'STEPHEN-F-AUSTIN','VIRGINIA-COMMONWEALTH','STEPHEN-F-AUSTIN'],
[2014,'NORTH-DAKOTA-STATE','OKLAHOMA','NORTH-DAKOTA-STATE'],
[2014,'HARVARD','CINCINNATI','HARVARD'],
[2013,'MISSISSIPPI','WISCONSIN','MISSISSIPPI'],
[2013,'CALIFORNIA','NEVADA-LAS-VEGAS','CALIFORNIA'],
[2013,'OREGON','OKLAHOMA-STATE','OREGON'],
[2012,'SOUTH-FLORIDA','TEMPLE','SOUTH-FLORIDA'],
[2012,'VIRGINIA-COMMONWEALTH','WICHITA-STATE','VIRGINIA-COMMONWEALTH'],
[2011,'RICHMOND','VANDERBILT','RICHMOND'],
[2010,'CORNELL','TEMPLE','CORNELL'],
[2016,'NORTHERN-IOWA','TEXAS','NORTHERN-IOWA'],
[2016,'GONZAGA','SETON-HALL','GONZAGA'],
[2016,'WICHITA-STATE','ARIZONA','WICHITA-STATE'],
[2015,'DAYTON','PROVIDENCE','DAYTON'],
[2015,'UCLA','SOUTHERN-METHODIST','UCLA'],
[2014,'TENNESSEE','MASSACHUSETTS','TENNESSEE'],
[2014,'DAYTON','OHIO-STATE','DAYTON'],
[2013,'MINNESOTA','UCLA','MINNESOTA'],
[2016,'MIDDLE-TENNESSEE','MICHIGAN-STATE','MIDDLE-TENNESSEE'],
[2013,'FLORIDA-GULF-COAST','GEORGETOWN','FLORIDA-GULF-COAST'],
[2012,'LEHIGH','DUKE','LEHIGH'],
[2012,'NORFOLK-STATE','MISSOURI','NORFOLK-STATE'],
[2016,'STEPHEN-F-AUSTIN','WEST-VIRGINIA','STEPHEN-F-AUSTIN'],
[2015,'GEORGIA-STATE','BAYLOR','GEORGIA-STATE'],
[2015,'ALABAMA-BIRMINGHAM','IOWA-STATE','ALABAMA-BIRMINGHAM'],
[2014,'MERCER','DUKE','MERCER'],
[2013,'HARVARD','NEW-MEXICO','HARVARD'],
[2010,'OHIO','GEORGETOWN','OHIO'],
[2018,'MARSHALL','WICHITA-STATE','MARSHALL'],
[2018,'BUFFALO','ARIZONA','BUFFALO'],
[2016,'HAWAII','CALIFORNIA','HAWAII'],
[2013,'LA-SALLE','KANSAS-STATE','LA-SALLE'],
[2012,'OHIO','MICHIGAN','OHIO'],
[2011,'MOREHEAD-STATE','LOUISVILLE','MOREHEAD-STATE'],
[2010,'MURRAY-STATE','VANDERBILT','MURRAY-STATE'],
[2018,'LOYOLA-IL','MIAMI-FL','LOYOLA-IL'],
[2018,'SYRACUSE','TEXAS-CHRISTIAN','SYRACUSE'],
[2017,'RHODE-ISLAND','CREIGHTON','RHODE-ISLAND'],
[2017,'SOUTHERN-CALIFORNIA','SOUTHERN-METHODIST','SOUTHERN-CALIFORNIA'],
[2017,'XAVIER','MARYLAND','XAVIER']]
game_df_teams = pd.DataFrame(game_list)
game_df_teams.columns = ['Season','Away','Home','Actual Winner']
game_df_teams

Unnamed: 0,Season,Away,Home,Actual Winner
0,2017,MIDDLE-TENNESSEE,MINNESOTA,MIDDLE-TENNESSEE
1,2016,ARKANSAS-LITTLE-ROCK,PURDUE,ARKANSAS-LITTLE-ROCK
2,2016,YALE,BAYLOR,YALE
3,2014,STEPHEN-F-AUSTIN,VIRGINIA-COMMONWEALTH,STEPHEN-F-AUSTIN
4,2014,NORTH-DAKOTA-STATE,OKLAHOMA,NORTH-DAKOTA-STATE
5,2014,HARVARD,CINCINNATI,HARVARD
6,2013,MISSISSIPPI,WISCONSIN,MISSISSIPPI
7,2013,CALIFORNIA,NEVADA-LAS-VEGAS,CALIFORNIA
8,2013,OREGON,OKLAHOMA-STATE,OREGON
9,2012,SOUTH-FLORIDA,TEMPLE,SOUTH-FLORIDA


In [31]:
game_df = pd.DataFrame(game_list)
game_df.columns = ['Season','Away','Home','Actual Winner']
game_df = game_df[['Season','Away','Home']]
game_df = game_df.merge(team_data_abb, left_on = ['Season','Home'], right_on = ['Season','abbreviation'], how = 'left')
game_df = game_df.merge(team_data_abb, left_on = ['Season','Away'], right_on = ['Season','abbreviation'], how = 'left')
print(game_df)

    Season                   Away                   Home  \
0     2017       MIDDLE-TENNESSEE              MINNESOTA   
1     2016   ARKANSAS-LITTLE-ROCK                 PURDUE   
2     2016                   YALE                 BAYLOR   
3     2014       STEPHEN-F-AUSTIN  VIRGINIA-COMMONWEALTH   
4     2014     NORTH-DAKOTA-STATE               OKLAHOMA   
5     2014                HARVARD             CINCINNATI   
6     2013            MISSISSIPPI              WISCONSIN   
7     2013             CALIFORNIA       NEVADA-LAS-VEGAS   
8     2013                 OREGON         OKLAHOMA-STATE   
9     2012          SOUTH-FLORIDA                 TEMPLE   
10    2012  VIRGINIA-COMMONWEALTH          WICHITA-STATE   
11    2011               RICHMOND             VANDERBILT   
12    2010                CORNELL                 TEMPLE   
13    2016          NORTHERN-IOWA                  TEXAS   
14    2016                GONZAGA             SETON-HALL   
15    2016          WICHITA-STATE       

In [32]:
game_input = game_df.drop(columns = ['Season','Away','Home','abbreviation_x','abbreviation_y','Team_Id_x','Team_Id_y','win_percentage_x','win_percentage_y'], axis = 1)

In [33]:
game_input

Unnamed: 0,assist_percentage_x,block_percentage_x,effective_field_goal_percentage_x,field_goal_percentage_x,free_throw_attempt_rate_x,free_throw_percentage_x,free_throws_per_field_goal_attempt_x,net_rating_x,offensive_rating_x,offensive_rebound_percentage_x,...,pace_y,simple_rating_system_y,steal_percentage_y,strength_of_schedule_y,three_point_attempt_rate_y,three_point_field_goal_percentage_y,total_rebound_percentage_y,true_shooting_percentage_y,turnover_percentage_y,two_point_field_goal_percentage_y
0,57.2,15.9,0.487,0.435,0.396,0.711,0.281,8.0,104.6,29.6,...,66.4,9.8,10.1,-0.48,0.299,0.367,53.9,0.567,13.6,0.54
1,64.1,11.7,0.536,0.47,0.354,0.744,0.263,18.5,113.5,34.5,...,64.6,7.12,10.1,-1.76,0.354,0.384,50.1,0.553,13.6,0.49
2,64.6,11.2,0.518,0.466,0.375,0.725,0.272,10.9,112.8,40.1,...,66.9,9.08,8.4,-1.03,0.309,0.363,57.8,0.555,16.4,0.517
3,49.8,11.5,0.479,0.42,0.352,0.674,0.237,13.7,105.0,34.3,...,65.5,4.82,11.1,-6.76,0.36,0.345,53.8,0.561,14.2,0.529
4,53.1,8.4,0.517,0.447,0.394,0.749,0.295,8.1,112.9,32.4,...,63.9,6.31,9.7,-0.02,0.275,0.364,53.2,0.593,13.4,0.558
5,52.8,16.0,0.477,0.425,0.387,0.708,0.274,15.6,107.5,39.1,...,64.6,10.79,11.6,-0.47,0.283,0.385,52.1,0.564,15.0,0.496
6,56.5,10.8,0.486,0.42,0.297,0.634,0.188,14.5,104.4,32.4,...,70.5,14.42,11.6,4.14,0.33,0.324,48.1,0.526,13.0,0.494
7,64.4,14.6,0.492,0.436,0.373,0.703,0.262,12.1,103.2,33.8,...,66.5,10.65,8.7,7.77,0.236,0.302,50.0,0.515,16.2,0.488
8,48.9,12.8,0.488,0.439,0.405,0.746,0.302,12.9,104.8,31.3,...,68.8,13.82,12.1,5.77,0.273,0.333,52.5,0.531,18.2,0.491
9,57.3,10.1,0.531,0.469,0.342,0.719,0.246,7.9,108.8,31.5,...,59.7,9.2,9.2,7.29,0.303,0.316,50.5,0.521,19.1,0.49


In [34]:
y_pred_C = clf_C.predict(game_input)
y_pred_B = clf_B.predict(game_input)
y_pred_A = clf_A.predict(game_input)
Y_pred = clf.predict(game_input)

  if diff:
  if diff:


In [35]:
print(y_pred_C,y_pred_B,y_pred_A,Y_pred)

['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'X' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'] ['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'] ['Y' 'Y' 'X' 'X' 'X' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'X' 'Y' 'Y' 'Y' 'Y'
 'Y' 'X' 'Y' 'X' 'X' 'X' 'X' 'X' 'X' 'X' 'X' 'X' 'X' 'X' 'Y' 'X' 'Y' 'Y'
 'X' 'X' 'Y' 'Y' 'Y' 'Y' 'Y'] ['Y' 'X' 'Y' 'X' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'X' 'X' 'X' 'X' 'X' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'X' 'X' 'Y' 'Y' 'Y' 'Y' 'Y']


In [36]:
test = pd.DataFrame(Y_pred)

In [37]:
games_df_out = game_df_teams.merge(test, left_index = True, right_index = True)
games_df_out.columns = ['Season','Away','Home','Actual Winner','Forecast']

In [38]:
games_df_out['Winner'] = np.where(games_df_out['Forecast'] == 'Y', games_df_out['Away'], games_df_out['Home'])
games_df_out['Correct'] = np.where(games_df_out['Actual Winner'] == games_df_out['Winner'], "Correct!", "¯\_(ツ)_/¯")

In [39]:
games_df_out[['Season','Away','Home','Actual Winner','Winner','Correct']]

Unnamed: 0,Season,Away,Home,Actual Winner,Winner,Correct
0,2017,MIDDLE-TENNESSEE,MINNESOTA,MIDDLE-TENNESSEE,MIDDLE-TENNESSEE,Correct!
1,2016,ARKANSAS-LITTLE-ROCK,PURDUE,ARKANSAS-LITTLE-ROCK,PURDUE,¯\_(ツ)_/¯
2,2016,YALE,BAYLOR,YALE,YALE,Correct!
3,2014,STEPHEN-F-AUSTIN,VIRGINIA-COMMONWEALTH,STEPHEN-F-AUSTIN,VIRGINIA-COMMONWEALTH,¯\_(ツ)_/¯
4,2014,NORTH-DAKOTA-STATE,OKLAHOMA,NORTH-DAKOTA-STATE,NORTH-DAKOTA-STATE,Correct!
5,2014,HARVARD,CINCINNATI,HARVARD,HARVARD,Correct!
6,2013,MISSISSIPPI,WISCONSIN,MISSISSIPPI,MISSISSIPPI,Correct!
7,2013,CALIFORNIA,NEVADA-LAS-VEGAS,CALIFORNIA,CALIFORNIA,Correct!
8,2013,OREGON,OKLAHOMA-STATE,OREGON,OREGON,Correct!
9,2012,SOUTH-FLORIDA,TEMPLE,SOUTH-FLORIDA,SOUTH-FLORIDA,Correct!


This section is where you can input current year games and get an output.

In [49]:
teams_19 = Teams(year = 2019)
teams_df_19 = teams.dataframes
teams_df_19 = teams_df_19[['abbreviation','assist_percentage','block_percentage','effective_field_goal_percentage','field_goal_percentage','free_throw_attempt_rate','free_throw_percentage','free_throws_per_field_goal_attempt','net_rating','offensive_rating','offensive_rebound_percentage','opp_assist_percentage','opp_block_percentage','opp_effective_field_goal_percentage','opp_field_goal_percentage','opp_free_throw_attempt_rate','opp_free_throw_percentage','opp_free_throws_per_field_goal_attempt','opp_offensive_rating','opp_offensive_rebound_percentage','opp_steal_percentage','opp_three_point_attempt_rate','opp_three_point_field_goal_percentage','opp_total_rebound_percentage','opp_true_shooting_percentage','opp_two_point_field_goal_percentage','pace','simple_rating_system','steal_percentage','strength_of_schedule','three_point_attempt_rate','three_point_field_goal_percentage','total_rebound_percentage','true_shooting_percentage','turnover_percentage','two_point_field_goal_percentage']]

In [256]:
teams_df_19.head()

Unnamed: 0,abbreviation,assist_percentage,block_percentage,effective_field_goal_percentage,field_goal_percentage,free_throw_attempt_rate,free_throw_percentage,free_throws_per_field_goal_attempt,net_rating,offensive_rating,...,pace,simple_rating_system,steal_percentage,strength_of_schedule,three_point_attempt_rate,three_point_field_goal_percentage,total_rebound_percentage,true_shooting_percentage,turnover_percentage,two_point_field_goal_percentage
ABILENE-CHRISTIAN,ABILENE-CHRISTIAN,55.4,11.6,0.521,0.464,0.309,0.701,0.217,3.5,102.2,...,71.6,-9.14,11.3,-6.82,0.35,0.325,49.7,0.549,17.7,0.539
AIR-FORCE,AIR-FORCE,60.7,8.1,0.49,0.419,0.318,0.734,0.233,-5.7,100.8,...,67.7,-4.31,9.5,1.72,0.431,0.331,48.7,0.527,16.3,0.485
AKRON,AKRON,52.7,7.5,0.518,0.435,0.319,0.696,0.222,-5.1,102.6,...,69.1,-6.82,8.4,-1.92,0.467,0.358,49.2,0.547,17.3,0.502
ALABAMA-AM,ALABAMA-AM,50.5,3.9,0.45,0.397,0.314,0.647,0.203,-23.2,88.1,...,68.3,-23.97,5.8,-8.04,0.354,0.303,48.2,0.48,20.9,0.448
ALABAMA-BIRMINGHAM,ALABAMA-BIRMINGHAM,59.3,11.6,0.545,0.488,0.291,0.75,0.218,10.1,109.8,...,69.5,4.9,7.7,-0.65,0.334,0.345,54.8,0.575,16.5,0.559


In [257]:
test = teams_df_19[teams_df_19['abbreviation']=='CENTRAL-FLORIDA']

In [258]:
test

Unnamed: 0,abbreviation,assist_percentage,block_percentage,effective_field_goal_percentage,field_goal_percentage,free_throw_attempt_rate,free_throw_percentage,free_throws_per_field_goal_attempt,net_rating,offensive_rating,...,pace,simple_rating_system,steal_percentage,strength_of_schedule,three_point_attempt_rate,three_point_field_goal_percentage,total_rebound_percentage,true_shooting_percentage,turnover_percentage,two_point_field_goal_percentage
CENTRAL-FLORIDA,CENTRAL-FLORIDA,49.1,9.4,0.478,0.424,0.431,0.641,0.277,1.5,96.2,...,64.9,4.75,8.1,3.75,0.331,0.329,50.5,0.512,18.0,0.471


In [289]:
first_round = [['NORTH_CAROLINA-CENTRAL','NORTH-DAKOTA-STATE'],['NORTH_DAKOTA-STATE','DUKE'],
               ['VIRGINIA-COMMONWEALTH','CENTRAL-FLORIDA'],['MISSISSIPPI_STATE','LIBERTY'],['VIRGINIA_TECH','SAINT-LOUIS'],
               ['BELMONT','TEMPLE'],['MARYLAND','TEMPLE'] ,['LOUISIANA-STATE','YALE'],['LOUISVILLE','MINNESOTA'],
               ['BRADLEY','MICHIGAN-STATE'],['FAIRLEIGH-DICKINSON','PRAIRIE-VIEW'],['PRAIRIE-VIEW','GONZAGA'],
               ['SYRACUSE','BAYLOR'],['MARQUETTE','MURRAY-STATE'],['FLORIDA-STATE','VERMONT'],['ST-JOHNS-NY','ARIZONA-STATE'],
               ['ST-JOHNS-NY','BUFFALO'],['TEXAS-TECH','NORTHERN-KENTUCKY'],['FLORIDA','NEVADA'],['MICHIGAN','MONTANA'],
               ['GARDNER-WEBB','VIRGINIA'],['MISSISSIPPI','OKLAHOMA'],['WISCONSIN','OREGON'], 
               ['KANSAS-STATE','CALIFORNIA-IRVINE'],['VILLANOVA','SAINT-MARYS-CA'], ['PURDUE','OLD-DOMINION'],
               ['CINCINNNATI','IOWA'],['TENNESSEE','COLGATE'],['NORTH-CAROLINA','IONA'],['UTAH-STATE','WASHINGTON'],
               ['AUBURN','NEW-MEXICO-STATE'],['KANSAS','NORTHEASTERN'],['IOWA-STATE','OHIO-STATE'],['HOUSTON','GEORGIA-STATE'],
               ['WOFFORD','SETON-HALL'],['KENTUCKY','ABILENE-CHRISTIAN']]

second_round = [['DUKE','VIRGINIA-COMMONWEALTH'],['LIBERTY','SAINT-LOUIS'],['LOUISIANA-STATE','MARYLAND'],
                  ['LOUISVILLE','BRADLEY'],['GONZAGA','SYRACUSE'],['MARQUETTE','FLORIDA-STATE'],['ST-JOHNS-NY','TEXAS-TECH']
                 ,['FLORIDA','MICHIGAN'],['VIRGINIA','MISSISSIPPI'],['WISCONSIN','KANSAS-STATE'],['VILLANOVA','PURDUE'],
                 ['IOWA','TENNESSEE'],['NORTH-CAROLINA','UTAH-STATE'],['AUBURN','KANSAS'],['IOWA-STATE','HOUSTON'],
                 ['SETON-HALL','KENTUCKY']]

sweet_sixteen = [['SAINT-LOUIS','VIRGINIA-COMMONWEALTH'],['MARYLAND','LOUISVILLE'],['SYRACUSE','FLORIDA-STATE'],
                  ['ST-JOHNS-NY','FLORIDA'],['VIRGINIA','WISCONSIN'],['TENNESSEE','PURDUE'],['NORTH-CAROLINA','AUBURN']
                 ,['IOWA-STATE','SETON-HALL']]

elite_eight = [['LOUISVILLE','VIRGINIA-COMMONWEALTH'],['FLORIDA-STATE','ST-JOHNS-NY'],['WISCONSIN','PURDUE'],
                  ['NORTH-CAROLINA','IOWA-STATE']]

final_four = [['ST-JOHNS-NY','VIRGINIA-COMMONWEALTH'],['PURDUE','NORTH-CAROLINA']]

finals = [['NORTH-CAROLINA','VIRGINIA-COMMONWEALTH']]


game_df_cur_team = pd.DataFrame(first_round)
game_df_cur_team.columns = ['Away','Home']

In [290]:
game_df_cur = pd.DataFrame(first_round)
game_df_cur.columns = ['Away','Home']
game_df_cur = game_df_cur[['Away','Home']]
game_df_cur = game_df_cur.merge(teams_df_19, left_on = ['Home'], right_on = ['abbreviation'], how = 'left')
game_df_cur = game_df_cur.merge(teams_df_19, left_on = ['Away'], right_on = ['abbreviation'], how = 'left')

In [291]:
game_df_cur

Unnamed: 0,Away,Home,abbreviation_x,assist_percentage_x,block_percentage_x,effective_field_goal_percentage_x,field_goal_percentage_x,free_throw_attempt_rate_x,free_throw_percentage_x,free_throws_per_field_goal_attempt_x,...,pace_y,simple_rating_system_y,steal_percentage_y,strength_of_schedule_y,three_point_attempt_rate_y,three_point_field_goal_percentage_y,total_rebound_percentage_y,true_shooting_percentage_y,turnover_percentage_y,two_point_field_goal_percentage_y
0,NORTH_CAROLINA-CENTRAL,NORTH-DAKOTA-STATE,NORTH-DAKOTA-STATE,50.4,6.2,0.552,0.469,0.305,0.761,0.232,...,,,,,,,,,,
1,NORTH_DAKOTA-STATE,DUKE,DUKE,57.6,12.4,0.559,0.492,0.346,0.71,0.246,...,,,,,,,,,,
2,VIRGINIA-COMMONWEALTH,CENTRAL-FLORIDA,CENTRAL-FLORIDA,49.1,9.4,0.478,0.424,0.431,0.641,0.277,...,71.4,3.55,8.8,2.42,0.386,0.352,51.0,0.549,16.3,0.524
3,MISSISSIPPI_STATE,LIBERTY,LIBERTY,58.0,9.1,0.539,0.461,0.339,0.792,0.268,...,,,,,,,,,,
4,VIRGINIA_TECH,SAINT-LOUIS,SAINT-LOUIS,53.8,13.1,0.475,0.422,0.463,0.652,0.302,...,,,,,,,,,,
5,BELMONT,TEMPLE,TEMPLE,49.8,9.0,0.493,0.425,0.256,0.682,0.174,...,68.5,5.54,7.1,-2.88,0.542,0.376,51.5,0.61,16.3,0.617
6,MARYLAND,TEMPLE,TEMPLE,49.8,9.0,0.493,0.425,0.256,0.682,0.174,...,66.6,13.78,6.5,7.94,0.364,0.371,53.6,0.584,17.9,0.539
7,LOUISIANA-STATE,YALE,YALE,61.0,8.2,0.533,0.462,0.307,0.722,0.221,...,69.4,10.93,11.0,8.03,0.402,0.346,49.4,0.575,14.7,0.562
8,LOUISVILLE,MINNESOTA,MINNESOTA,54.9,12.7,0.487,0.431,0.351,0.704,0.247,...,69.7,14.3,9.9,9.08,0.352,0.371,49.7,0.552,14.7,0.497
9,BRADLEY,MICHIGAN-STATE,MICHIGAN-STATE,67.6,18.5,0.569,0.496,0.359,0.747,0.268,...,66.3,2.08,9.8,0.36,0.343,0.354,51.8,0.528,17.3,0.469


In [292]:
game_input_cur = game_df_cur.drop(columns = ['Away','Home','abbreviation_x','abbreviation_y'], axis = 1)

In [293]:
out_19 = clf.predict(game_input_cur)

  if diff:


In [294]:
temp = pd.DataFrame(out_19)
games_df_out_cur = game_df_cur_team.merge(temp, left_index = True, right_index = True)
games_df_out_cur.columns = ['Away','Home','Forecast']
games_df_out_cur['Winner'] = np.where(games_df_out_cur['Forecast'] == 'Y', games_df_out_cur['Away'], games_df_out_cur['Home'])

In [295]:
games_df_out_cur

Unnamed: 0,Away,Home,Forecast,Winner
0,NORTH_CAROLINA-CENTRAL,NORTH-DAKOTA-STATE,X,NORTH-DAKOTA-STATE
1,NORTH_DAKOTA-STATE,DUKE,X,DUKE
2,VIRGINIA-COMMONWEALTH,CENTRAL-FLORIDA,Y,VIRGINIA-COMMONWEALTH
3,MISSISSIPPI_STATE,LIBERTY,X,LIBERTY
4,VIRGINIA_TECH,SAINT-LOUIS,X,SAINT-LOUIS
5,BELMONT,TEMPLE,X,TEMPLE
6,MARYLAND,TEMPLE,Y,MARYLAND
7,LOUISIANA-STATE,YALE,Y,LOUISIANA-STATE
8,LOUISVILLE,MINNESOTA,Y,LOUISVILLE
9,BRADLEY,MICHIGAN-STATE,Y,BRADLEY
