In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Warnings
import warnings
warnings.simplefilter("ignore", UserWarning)

from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SelectKBest, f_regression

# Models
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# These models are voting models based off the above models
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingRegressor

# Data prep
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Model evaluations
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.model_selection import KFold,StratifiedKFold, ShuffleSplit, StratifiedShuffleSplit
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification
from sklearn.feature_selection import RFE
from sklearn.inspection import permutation_importance
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix



In [50]:
# Models

svc = SVC(kernel='rbf', gamma=0.1, C=10, random_state=99) # 5% increase with these hyperparamters
KNC = KNeighborsClassifier(weights='distance', p=2, n_neighbors=10, metric='euclidean', leaf_size=40) # 2.7% increase with these hp
ADBC = AdaBoostClassifier(n_estimators=155, learning_rate=0.8, random_state=99) # 2% increase with these hp
RFC = RandomForestClassifier(n_estimators=1000, min_samples_split=5, random_state=99) # 1% better with these hyperparameters

GBC = GradientBoostingClassifier(n_estimators=500, learning_rate=0.15, random_state=99) # 2% better
HGBC = HistGradientBoostingClassifier(min_samples_leaf=25, max_leaf_nodes=80, max_iter=100, max_depth=None, learning_rate=0.1, l2_regularization=1.5, random_state=99) # 2% better
XGB = XGBClassifier(n_estimators=150, learning_rate=0.1, random_state=99) # 1.7% better with hp
QDA = QuadraticDiscriminantAnalysis() # Same with default hp

# Imputer
imputer = SimpleImputer()
MMScaler = MinMaxScaler()

In [51]:
# Read data
data = pd.read_excel('./content/NBA_COMBINED.xlsx', parse_dates=['Date'])

# Drop unneccesary columns
data = data.drop(columns=['PTS22', 'PTS3', 'Attend.'], axis=1)

# Add point diff column to predict. True or False
data['Home Points Differ'] = data['Home PTS'] > data['Vis PTS']

y_all = data['Home Points Differ']

data.loc[len(data.loc[data['Vis PTS'] > 0]):,'Home Points Differ'] = 0

# Add dates and time

# Get Day, Month and Year from date column
dates = pd.DataFrame()
dates['Year'] = data['Date'].dt.strftime('%Y')
dates['Month'] = data['Date'].dt.strftime('%m')
dates['Day'] = data['Date'].dt.strftime('%d')

# Add dates
data = pd.concat([data, dates], axis=1)

# Get start time
start_time = data['Start (ET)'].str[:-1]
start_time = start_time.str.replace(':', '.')
start_time = start_time.astype(float)
start_time.columns = ['Start Time']

# Add start time
data = pd.concat([data, start_time], axis=1)

# Drop Ranking columns from prev years

data.drop(['2016-17 Vis Rank', '2016-17 Home Rank', '2017-18 Vis Rank', '2017-18 Home Rank'], inplace=True, axis =1)
# data.columns

# Remove games before 22-10-19
first_games = data.loc[data['Date'] == '2019-10-22'].index ##### AUTOMATE
first_games[0]
data = data.loc[first_games[0]:,:]


# Did each team win their last game?

data["HomeLastWin"] = False
data["VisitorLastWin"] = False

from collections import defaultdict
won_last = defaultdict(int) # Create dictionary won last

for index, row in data.iterrows(): # for each row
    home_team = row['Home'] # Take the home team in the row
    visitor_team = row['Visitor'] # Take the vis team in each row
    row['HomeLastWin'] = won_last[home_team] # If HomeLastWin is true set that team to won in the won_last dict
    row['VisitorLastWin'] = won_last[visitor_team] # If VisitorLastWin is true set that team to won in won_last dict
    data.loc[index] = row # Set the index for the next row?
    # Set current win
    won_last[home_team] = row['Home Points Differ'] # If home won set that in the won_last dict
    won_last[visitor_team] = not row['Home Points Differ'] # if home did not win set that in the won_last dict
    
# Add WinStreaks

data['HomeWinStreak'] = 0
data['VisitorWinStreak'] = 0

win_streak = defaultdict(int) #  Create a dictionary for teams winning streaks

for index, row in data.iterrows():
    home_team = row['Home'] # Home team = home team for that row
    visitor_team = row['Visitor'] # Vis team = vis team for that row
    row['HomeWinStreak'] = win_streak[home_team] # HomeWinStreak for that row is looked up in the dictionary win_streak
    row['VisitorWinStreak'] = win_streak[visitor_team] # Set VisitorWinStreak in the row to dict value for that team
    data.loc[index] = row # Set row to next row
    # Set current win streak number
    if row['Home Points Differ']:
        win_streak[home_team] += 1
        win_streak[visitor_team] = 0
    else:
        win_streak[home_team] = 0
        win_streak[visitor_team] += 1
    
# Which team won in their last match?

last_match_winner = defaultdict(int)

def home_team_won_last(row):
    # Variables equal the team names
    home_team = row['Home']
    visitor_team = row['Visitor']

    teams = tuple(sorted([home_team, visitor_team])) # Tuple of the home and visitor team to search for
    result = 1 if last_match_winner[teams] == row['Home'] else 0 # Look in last_match_winner dict for if these teams have played before
    winner = row['Home'] if  row['Home Points Differ'] else row['Visitor'] # Winner variable is home team if the homewin column says it is

    last_match_winner[teams] = winner # Feed the winner into the last_match_winner dict

    return result

data['HomeTeamWonLast'] = data.apply(home_team_won_last, axis=1) # Apply the function on each row (axis=1)

data

Unnamed: 0,Date,Start (ET),Visitor,Vis PTS,Home,Home PTS,2018-19 Vis Rank,2018-19 Home Rank,2019-20 Vis Rank,2019-20 Home Rank,Home Points Differ,Year,Month,Day,Start (ET).1,HomeLastWin,VisitorLastWin,HomeWinStreak,VisitorWinStreak,HomeTeamWonLast
2624,2019-10-22,8:00p,New Orleans Pelicans,122.0,Toronto Raptors,130.0,24.0,2.0,,,True,2019,10,22,8.0,0,0,0,0,0
2625,2019-10-22,10:30p,Los Angeles Lakers,102.0,Los Angeles Clippers,112.0,20.0,12.0,,,True,2019,10,22,10.3,0,0,0,0,0
2626,2019-10-23,7:00p,Chicago Bulls,125.0,Charlotte Hornets,126.0,27.0,17.0,,,True,2019,10,23,7.0,0,0,0,0,0
2627,2019-10-23,7:00p,Detroit Pistons,119.0,Indiana Pacers,110.0,16.0,11.0,,,False,2019,10,23,7.0,0,0,0,0,0
2628,2019-10-23,7:00p,Cleveland Cavaliers,85.0,Orlando Magic,94.0,28.0,15.0,,,True,2019,10,23,7.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4651,2021-04-23,8:00p,Cleveland Cavaliers,,Charlotte Hornets,,28.0,17.0,28.0,23.0,0,2021,04,23,8.0,True,0,2,0,0
4652,2021-04-23,8:00p,Los Angeles Clippers,,Houston Rockets,,12.0,5.0,4.0,8.0,0,2021,04,23,8.0,0,0,0,0,0
4653,2021-04-23,8:00p,Washington Wizards,,Oklahoma City Thunder,,25.0,10.0,22.0,10.0,0,2021,04,23,8.0,True,0,4,0,1
4654,2021-04-23,10:00p,Denver Nuggets,,Golden State Warriors,,4.0,3.0,6.0,30.0,0,2021,04,23,10.0,True,True,7,1,1


In [52]:
# Individual Player Rankings

# Team dictionary to change abbr. team names to full length ==== MADE IT WORSE
team_dict = {
    'ATL': 'Atlanta Hawks',
    'BOS': 'Boston Celtics',
    'BRK': 'Brooklyn Nets',
    'CHI': 'Chicago Bulls',
    'CHO': 'Charlotte Hornets',
    'CLE': 'Cleveland Cavaliers',
    'DAL': 'Dallas Mavericks',
    'DEN': 'Denver Nuggets',
    'DET': 'Detroit Pistons',
    'GSW': 'Golden State Warriors',
    'HOU': 'Houston Rockets',
    'IND': 'Indiana Pacers',
    'LAC': 'Los Angeles Clippers',
    'LAL': 'Los Angeles Lakers',
    'MEM': 'Memphis Grizzlies',
    'MIA': 'Miami Heat',
    'MIL': 'Milwaukee Bucks',
    'MIN': 'Minnesota Timberwolves',
    'NOP': 'New Orleans Pelicans',
    'NYK': 'New York Knicks',
    'OKC': 'Oklahoma City Thunder',
    'ORL': 'Orlando Magic',
    'PHI': 'Philadelphia 76ers',
    'PHO': 'Phoenix Suns',
    'POR': 'Portland Trail Blazers',
    'SAC': 'Sacramento Kings',
    'SAS': 'San Antonio Spurs',
    'TOR': 'Toronto Raptors',
    'UTA': 'Utah Jazz',
    'WAS': 'Washington Wizards'
}

# Function to add in player ranks

def add_player_ranks_from_excel(year):
    # Add in player ranks for home and vis teams
    player_ranks = pd.read_excel('./content/NBA_COMBINED.xlsx', sheet_name='Player Rank '+str(year),)

    # Map Team dictionary
    player_ranks['Team'] = player_ranks['Tm'].map(team_dict)

    # Drop multiple rank entries
    player_ranks.drop_duplicates(subset='Rk', inplace=True, keep='last')

    # Join the team and position columns so we only have 5 players per team
    player_ranks['Joined'] = player_ranks['Team'] + player_ranks['Pos']

    # drop duplicate team members
    player_ranks.drop_duplicates(subset='Joined', inplace=True)

    # drop other columns
    player_ranks = player_ranks[['Rk', 'Pos', 'Team']]

    # Make positions columns
    player_ranks = player_ranks.pivot_table(values='Rk', index='Team', columns='Pos', aggfunc='first')

    # Reset the index to numbers again
    player_ranks.reset_index(inplace=True)

    # Create Home and Vis columns
    columns = ['C', 'PF', 'PG', 'SF', 'SG']
    for col in columns:
      player_ranks['H'+col] = player_ranks[col]
      player_ranks['V'+col] = player_ranks[col]

    Home_ranks_df = player_ranks[['Team', 'HC', 'HPF', 'HPG', 'HSF', 'HSG']]
    Vis_ranks_df = player_ranks[['Team', 'VC', 'VPF', 'VPG', 'VSF', 'VSG']]
    Home_ranks_df.columns = ['Home', 'HC '+year, 'HPF '+year, 'HPG '+year, 'HSF '+year, 'HSG '+year]
    Vis_ranks_df.columns = ['Visitor', 'VC '+year, 'VPF '+year, 'VPG '+year, 'VSF '+year, 'VSG '+year]
    return Home_ranks_df, Vis_ranks_df

# Find individual player rankings from excel
homeranks18, visranks18 = add_player_ranks_from_excel('2018-19')

# Merge player rankings and data
X = data.copy()
X = X.merge(homeranks18, on='Home', how='left')
X = X.merge(visranks18, on='Visitor', how='left')
X = X.drop('Home Points Differ', axis=1)
X.columns

# Drop non-feature columns
X = X.loc[:,'2018-19 Vis Rank':]

X = X.astype(float)

# Drop games that havent been played

rows_with_results = len(data) - len(data[data['Vis PTS'].isna()])

# Training and testing
X_train_and_test = X.iloc[:rows_with_results-1,:]
y_train_and_test = y_all.iloc[:rows_with_results-1]
y_train_and_test = y_train_and_test.astype(bool)
# X = X.drop(['2019-20 Vis Rank'], axis=1)
# X = X.drop(['2019-20 Home Rank'], axis=1)

# Future Games
X_valid = X.iloc[rows_with_results:,:]

# Get future teams and dates
future_teams_and_dates = data.iloc[rows_with_results:,:]
future_teams_and_dates = future_teams_and_dates[['Date', 'Visitor','Home']]

In [53]:
# Train model function

def train_model(X_train_and_test, y_train_and_test, model):
    ''' Scale, Split, Impute and Train one model '''
    
    X_train, X_test, y_train, y_test = train_test_split(X_train_and_test, y_train_and_test, test_size=0.2, shuffle=False)

    pipe = make_pipeline(SimpleImputer(),StandardScaler(), model)
    pipe.fit(X_train, y_train)
    preds = pipe.predict(X_test)

    
    # Combine predictions with actuals
    preds_df = pd.DataFrame(preds, columns=['Predictions'])
    preds_df.index = pd.RangeIndex(start=y_train.last_valid_index()+1, stop=y_train.last_valid_index()+1 + len(y_test))
    predictions_array.append(preds_df)
    preds_and_true = pd.concat([y_test, preds_df], axis=1, ignore_index=True)

    
    # Accuracy
    wins = preds_and_true.apply(lambda x: True if x[0] == True and x[1] == True else False, axis=1)
    losses = preds_and_true.apply(lambda x: True if x[0] == False and x[1] == False else False, axis=1)
    print('Model: ',str(model))
    print('Total test games: ', len(y_test))
    print('Wins predicted correctly: ',len(wins[wins == True].index))
    print('Losses predicted correctly: ',len(losses[losses == True].index))
    print('Percentage predicted correctly: ', (len(wins[wins == True].index) + len(losses[losses == True].index)) / len(preds_and_true))
    
    return preds_df

In [54]:
# Without individual player rankings?

X_without_players = X_train_and_test.loc[:,:'HomeTeamWonLast']

# Train and test models

predictions_array = []

models_array = [svc, ADBC, RFC, GBC, HGBC, XGB, QDA, KNC]

for model in models_array:
    train_model(X_train_and_test, y_train_and_test, model)
    
# print('')
# print('')
# print('')
# print('Without player rankings')
# print('')
# print('')
# print('')

# for model in models_array:
#     train_model(X_without_players, y_train_and_test, model)

Model:  SVC(C=10, gamma=0.1, random_state=99)
Total test games:  394
Wins predicted correctly:  165
Losses predicted correctly:  49
Percentage predicted correctly:  0.5431472081218274
Model:  AdaBoostClassifier(learning_rate=0.8, n_estimators=155, random_state=99)
Total test games:  394
Wins predicted correctly:  199
Losses predicted correctly:  26
Percentage predicted correctly:  0.5710659898477157
Model:  RandomForestClassifier(min_samples_split=5, n_estimators=1000, random_state=99)
Total test games:  394
Wins predicted correctly:  208
Losses predicted correctly:  27
Percentage predicted correctly:  0.5964467005076142
Model:  GradientBoostingClassifier(learning_rate=0.15, n_estimators=500,
                           random_state=99)
Total test games:  394
Wins predicted correctly:  162
Losses predicted correctly:  66
Percentage predicted correctly:  0.5786802030456852
Model:  HistGradientBoostingClassifier(l2_regularization=1.5, max_leaf_nodes=80,
                               min_

In [55]:
# Make future predictions Without player rankings


# X_train, X_test, y_train, y_test = train_test_split(X_train_and_test, y_train_and_test, test_size=0.2, shuffle=False)

all_predictions = pd.DataFrame(y_test)
for i in predictions_array:
    all_predictions = pd.concat([all_predictions, i], axis=1)
    
all_predictions.columns =['Home Points Differ', 'SVC', 'ADBC', 'RFC', 'GBC', 'HGBC', 'XGB', 'QDA', 'KNC']

In [61]:
# Make future predictions

future_models = [svc, ADBC, RFC, GBC, HGBC, XGB, QDA, KNC]

def make_preds(X_train, y_train, X_predict, model):
    pipe = make_pipeline(SimpleImputer(),StandardScaler(), model)
    pipe.fit(X_train, y_train)
    preds = pipe.predict(X_predict)
    return preds

In [62]:
# Append the predictions onto the entire data and keep only date, teams and prediction columns
future_predictions_array = []

for model in models_array:
    preds = make_preds(X_train_and_test, y_train_and_test, X_valid, model)
    future_predictions_array.append(preds)




In [63]:
# Concat

future_predictions = pd.DataFrame(future_teams_and_dates)
future_predictions = future_predictions.reset_index(drop=True)

for i in future_predictions_array:
    df = pd.DataFrame(i)
    future_predictions = pd.concat([future_predictions, df], axis=1)

future_predictions.columns = ['Date', 'Visitor', 'Home' , 'SVC', 'ADBC', 'RFC', 'GBC', 'HGBC', 'XGB', 'QDA', 'KNC']
future_predictions

Unnamed: 0,Date,Visitor,Home,SVC,ADBC,RFC,GBC,HGBC,XGB,QDA,KNC
0,2021-04-16,Indiana Pacers,Utah Jazz,True,True,True,True,True,True,True,True
1,2021-04-16,Oklahoma City Thunder,Detroit Pistons,True,True,True,True,True,True,True,True
2,2021-04-16,Los Angeles Clippers,Philadelphia 76ers,True,True,True,True,True,True,True,True
3,2021-04-16,New Orleans Pelicans,Washington Wizards,True,True,True,True,True,True,True,True
4,2021-04-16,Charlotte Hornets,Brooklyn Nets,False,True,False,False,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...
59,2021-04-23,Cleveland Cavaliers,Charlotte Hornets,True,True,False,True,False,True,True,True
60,2021-04-23,Los Angeles Clippers,Houston Rockets,True,True,True,True,True,True,True,True
61,2021-04-23,Washington Wizards,Oklahoma City Thunder,True,True,True,False,True,True,True,True
62,2021-04-23,Denver Nuggets,Golden State Warriors,True,False,True,True,True,True,False,False


In [64]:
future_predictions.to_excel('future_predictions.xlsx', index=False)

In [None]:
past_predictions = pd.DataFrame()
past_predictions = past_predictions.reset_index(drop=True)

for i in predictions_array:
    df = pd.DataFrame(i)
    past_predictions = pd.concat([past_predictions, df], axis=1)

past_predictions.columns = ['SVC', 'ADBC', 'RFC', 'GBC', 'HGBC', 'XGB', 'QDA', 'KNC']
past_predictions

In [None]:
past_predictions.to_excel('Past_predictions.xlsx')

In [25]:
## HYPER PARAM TUNING

models_array = [svc, ADBC, RFC, GBC, HGBC, XGB, QDA, KNC]

pipe_SVC = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), svc)

pipe_ADBC = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), ADBC)

pipe_RFC = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), RFC)

pipe_GBC = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), GBC)

pipe_HGBC = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), HGBC)

pipe_XGB = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), XGB)

pipe_QDA = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), QDA)

pipe_KNC = make_pipeline(SimpleImputer(),StandardScaler(),SelectKBest(f_regression, k='all'), KNC)

param_range = [1,3,6,9,10]
param_range_fl = [1.0, 0.5]

grid_params_svc = [{'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'], 
                    'svc__C': param_range, 'svc__shrinking': [True, False],
                  'svc__probability': [True, False], 'svc__decision_function_shape': ['ovo', 'ovr']}]

grid_params_adbc = [{'adaboostclassifier__base_estimator': [None, 'svc', 'randomforestclassifier', 'knearestneighbors'],
                    'adaboostclassifier__n_estimators': [50, 100, 200], 'adaboostclassifier__learning_rate': [0.01, 0.1, 1, 3]}]

grid_params_rf = [{'randomforestclassifier__n_estimators': [50, 100, 150, 200], 'randomforestclassifier__criterion': ['gini', 'entropy'],
                'randomforestclassifier__max_depth': param_range,
                'randomforestclassifier__min_samples_split': param_range[1:], 'randomforestclassifier__max_leaf_nodes':[5, 10, 30, None],
                  'randomforestclassifier__oob_score': [True, False], 'randomforestclassifier__warm_start': [True, False]
                  }]

grid_params_gbc = [{'gradientboostingclassifier__loss': ['deviance', 'exponential'], 'gradientboostingclassifier__learning_rate': [0.01, 0.1, 1, 3],
                   'gradientboostingclassifier__n_estimators': [200, 250, 300], 'gradientboostingclassifier__subsample': [1,2,3,4],
                   'gradientboostingclassifier__criterion': ['friedman_mse', 'mse', 'mae']}]

grid_params_hgbc = [{'histgradientboostingclassifier__loss': ['auto', 'binary_crossentropy', 'categorical_crossentropy'],
                    'histgradientboostingclassifier__learning_rate': [0.01, 0.1, 1], 'histgradientboostingclassifier__max_iter': [80,100,150,200],
                    'histgradientboostingclassifier__max_leaf_nodes': [20, 31, 40, None], 'histgradientboostingclassifier__min_samples_leaf': [2, 5, 12, 15]}]

grid_params_xgb = [{'xgbclassifier__n_estimators': [100, 200, 400, 600], 'xgbclassifier__colsample_bytree':[0.4, 0.6, 0.8,1],
                   'xgbclassifier__max_depth': [15, 20, 25], 'xgbclassifier__reg_alpha': [1.1, 1.2, 1.3],
                   'xgbclassifier__reg_lambda':[1.1, 1.2, 1.3], 'xgbclassifier__subsample':[0.7, 0.8, 0.9]}]

grid_params_knc = [{'kneighborsclassifier__n_neighbors':[2,5,8,10], 'kneighborsclassifier__weights': ['uniform', 'distance'],
                   'kneighborsclassifier__algorithm':['auto', 'ball_tree', 'kd_tree', 'brute'], 'kneighborsclassifier__leaf_size':[20,30,40],
                   'kneighborsclassifier__p':[1,2]}]

svcp = GridSearchCV(estimator=pipe_SVC, param_grid=grid_params_svc,
                  scoring='accuracy', cv=10, n_jobs=-1)

RFCp = GridSearchCV(estimator=pipe_RFC,
            param_grid=grid_params_rf,
            scoring='accuracy', cv=10, 
            n_jobs=-1)

ADBCp = GridSearchCV(estimator=pipe_ADBC,
                   param_grid = grid_params_adbc,
                   scoring='accuracy', cv=10, n_jobs=-1)

GBCp = GridSearchCV(estimator=pipe_GBC,
                  param_grid = grid_params_gbc,
                  scoring='accuracy', cv=10, n_jobs=-1)

HGBCp = GridSearchCV(estimator=pipe_HGBC,
                   param_grid=grid_params_hgbc,
                   scoring='accuracy', cv=10, n_jobs=-1)

XGBp = GridSearchCV(estimator=pipe_XGB,
                  param_grid = grid_params_xgb,
                  scoring='accuracy', cv=10, n_jobs=-1)

KNCp = GridSearchCV(estimator=pipe_KNC,
                  param_grid = grid_params_knc,
                  scoring='accuracy', cv=10, n_jobs=-1)

grids = [ ADBCp, RFCp, GBCp, HGBCp, XGBp, KNCp, svcp]

grid_dict = { 
        0: 'ADBC', 1: 'RFC', 2: 'GBC', 3: 'HGBC', 4:'XGB', 5: 'KNC', 6: 'SVC'}

x_train, x_test, y_train, y_test = train_test_split(X_train_and_test, y_train_and_test, test_size=0.4, shuffle=False)

# Fit the grid search objects
print('Performing model optimizations...')
best_acc = 0.0
best_clf = 0
best_gs = ''
for idx, gs in enumerate(grids):
    print('\nEstimator: %s' % grid_dict[idx])
    gs.fit(x_train, y_train)
    print('Best params are : %s' % gs.best_params_)
    # Best training data accuracy
    print('Best training accuracy: %.3f' % gs.best_score_)
    # Predict on test data with best params
    y_pred = gs.predict(x_test)
    # Test data accuracy of model with best params
    print('Test set accuracy score for best params: %.3f ' % accuracy_score(y_test, y_pred))
    # Track best (highest test accuracy) model
    if accuracy_score(y_test, y_pred) > best_acc:
        best_acc = accuracy_score(y_test, y_pred)
        best_gs = gs
        best_clf = idx
print('\nClassifier with best test set accuracy: %s' % grid_dict[best_clf])

Performing model optimizations...

Estimator: ADBC
Best params are : {'adaboostclassifier__base_estimator': None, 'adaboostclassifier__learning_rate': 0.01, 'adaboostclassifier__n_estimators': 200}
Best training accuracy: 0.581
Test set accuracy score for best params: 0.609 

Estimator: RFC
Best params are : {'randomforestclassifier__criterion': 'entropy', 'randomforestclassifier__max_depth': 3, 'randomforestclassifier__max_leaf_nodes': 10, 'randomforestclassifier__min_samples_split': 6, 'randomforestclassifier__n_estimators': 150, 'randomforestclassifier__oob_score': True, 'randomforestclassifier__warm_start': True}
Best training accuracy: 0.584
Test set accuracy score for best params: 0.609 

Estimator: GBC
Best params are : {'gradientboostingclassifier__criterion': 'mae', 'gradientboostingclassifier__learning_rate': 0.01, 'gradientboostingclassifier__loss': 'deviance', 'gradientboostingclassifier__n_estimators': 200, 'gradientboostingclassifier__subsample': 1}
Best training accuracy