In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot

# Warnings
import warnings
warnings.simplefilter("ignore", UserWarning)

# Models
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# These models are voting models based off the above models
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingRegressor

# Data prep
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Model evaluations
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.model_selection import KFold,StratifiedKFold, ShuffleSplit, StratifiedShuffleSplit
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification
from sklearn.feature_selection import RFE
from sklearn.inspection import permutation_importance



In [2]:
# Models

svc = SVC(kernel='rbf', gamma=0.1, C=10) # 5% increase with these hyperparamters
KNC = KNeighborsClassifier(weights='distance', p=2, n_neighbors=10, metric='euclidean', leaf_size=40) # 2.7% increase with these hp
ADBC = AdaBoostClassifier(n_estimators=155, learning_rate=0.8) # 2% increase with these hp
RFC = RandomForestClassifier(n_estimators=1000, min_samples_split=5) # 1% better with these hyperparameters

GBC = GradientBoostingClassifier(n_estimators=500, learning_rate=0.15) # 2% better
HGBC = HistGradientBoostingClassifier(min_samples_leaf=25, max_leaf_nodes=80, max_iter=100, max_depth=None, learning_rate=0.1, l2_regularization=1.5) # 2% better
XGB = XGBClassifier(n_estimators=150, learning_rate=0.1) # 1.7% better with hp
QDA = QuadraticDiscriminantAnalysis() # Same with default hp

# Imputer
imputer = SimpleImputer()
MMScaler = MinMaxScaler()

In [3]:
# Read data
data = pd.read_excel('./content/NBA_COMBINED.xlsx', parse_dates=['Date'])

# Drop unneccesary columns
data = data.drop(columns=['PTS22', 'PTS3', 'Attend.'], axis=1)

# Add point diff column to predict. True or False
data['Home Points Differ'] = data['Home PTS'] > data['Vis PTS']

y_all = data['Home Points Differ']

# data = data.loc[2621:, :]
# y_all = y_all.loc[2621:]

# X_2019_valid = X_2019.loc[3951:, :]
# y_2019_valid = y_2019.loc[3951:]

data.loc[len(data.loc[data['Vis PTS'] > 0]):,'Home Points Differ'] = 0

data

Unnamed: 0,Date,Start (ET),Visitor,Vis PTS,Home,Home PTS,2016-17 Vis Rank,2016-17 Home Rank,2017-18 Vis Rank,2017-18 Home Rank,2018-19 Vis Rank,2018-19 Home Rank,2019-20 Vis Rank,2019-20 Home Rank,Home Points Differ
0,2017-10-17,8:01p,Boston Celtics,99.0,Cleveland Cavaliers,102.0,4,5,,,,,,,True
1,2017-10-17,10:30p,Houston Rockets,122.0,Golden State Warriors,121.0,3,1,,,,,,,False
2,2017-10-18,7:00p,Charlotte Hornets,90.0,Detroit Pistons,102.0,20,19,,,,,,,True
3,2017-10-18,7:00p,Brooklyn Nets,131.0,Indiana Pacers,140.0,30,13,,,,,,,True
4,2017-10-18,7:00p,Miami Heat,109.0,Orlando Magic,116.0,17,26,,,,,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4473,2021-03-31,8:00p,New York Knicks,,Minnesota Timberwolves,,25,24,22.0,12.0,30.0,21.0,25.0,29.0,0
4474,2021-03-31,8:00p,Toronto Raptors,,Oklahoma City Thunder,,7,10,2.0,10.0,2.0,10.0,2.0,10.0,0
4475,2021-03-31,8:30p,Sacramento Kings,,San Antonio Spurs,,23,2,25.0,13.0,19.0,13.0,20.0,19.0,0
4476,2021-03-31,10:00p,Milwaukee Bucks,,Los Angeles Lakers,,14,28,16.0,21.0,1.0,20.0,1.0,3.0,0


In [4]:
# Add dates and time

# Get Day, Month and Year from date column
dates = pd.DataFrame()
dates['Year'] = data['Date'].dt.strftime('%Y')
dates['Month'] = data['Date'].dt.strftime('%m')
dates['Day'] = data['Date'].dt.strftime('%d')

# Add dates
data = pd.concat([data, dates], axis=1)

# Get start time
start_time = data['Start (ET)'].str[:-1]
start_time = start_time.str.replace(':', '.')
start_time = start_time.astype(float)
start_time.columns = ['Start Time']

# Add start time
data = pd.concat([data, start_time], axis=1)
data

Unnamed: 0,Date,Start (ET),Visitor,Vis PTS,Home,Home PTS,2016-17 Vis Rank,2016-17 Home Rank,2017-18 Vis Rank,2017-18 Home Rank,2018-19 Vis Rank,2018-19 Home Rank,2019-20 Vis Rank,2019-20 Home Rank,Home Points Differ,Year,Month,Day,Start (ET).1
0,2017-10-17,8:01p,Boston Celtics,99.0,Cleveland Cavaliers,102.0,4,5,,,,,,,True,2017,10,17,8.01
1,2017-10-17,10:30p,Houston Rockets,122.0,Golden State Warriors,121.0,3,1,,,,,,,False,2017,10,17,10.30
2,2017-10-18,7:00p,Charlotte Hornets,90.0,Detroit Pistons,102.0,20,19,,,,,,,True,2017,10,18,7.00
3,2017-10-18,7:00p,Brooklyn Nets,131.0,Indiana Pacers,140.0,30,13,,,,,,,True,2017,10,18,7.00
4,2017-10-18,7:00p,Miami Heat,109.0,Orlando Magic,116.0,17,26,,,,,,,True,2017,10,18,7.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4473,2021-03-31,8:00p,New York Knicks,,Minnesota Timberwolves,,25,24,22.0,12.0,30.0,21.0,25.0,29.0,0,2021,03,31,8.00
4474,2021-03-31,8:00p,Toronto Raptors,,Oklahoma City Thunder,,7,10,2.0,10.0,2.0,10.0,2.0,10.0,0,2021,03,31,8.00
4475,2021-03-31,8:30p,Sacramento Kings,,San Antonio Spurs,,23,2,25.0,13.0,19.0,13.0,20.0,19.0,0,2021,03,31,8.30
4476,2021-03-31,10:00p,Milwaukee Bucks,,Los Angeles Lakers,,14,28,16.0,21.0,1.0,20.0,1.0,3.0,0,2021,03,31,10.00


In [5]:
# Fill in actual values for home and visitor last win
data["HomeLastWin"] = False
data["VisitorLastWin"] = False

from collections import defaultdict
won_last = defaultdict(int) # Create dictionary won last

for index, row in data.iterrows(): # for each row
  home_team = row['Home'] # Take the home team in the row
  visitor_team = row['Visitor'] # Take the vis team in each row
  row['HomeLastWin'] = won_last[home_team] # If HomeLastWin is true set that team to won in the won_last dict
  row['VisitorLastWin'] = won_last[visitor_team] # If VisitorLastWin is true set that team to won in won_last dict
  data.loc[index] = row # Set the index for the next row?
  # Set current win
  won_last[home_team] = row['Home Points Differ'] # If home won set that in the won_last dict
  won_last[visitor_team] = not row['Home Points Differ'] # if home did not win set that in the won_last dict

In [6]:
# Add WinStreaks

data['HomeWinStreak'] = 0
data['VisitorWinStreak'] = 0

win_streak = defaultdict(int) #  Create a dictionary for teams winning streaks

for index, row in data.iterrows():
  home_team = row['Home'] # Home team = home team for that row
  visitor_team = row['Visitor'] # Vis team = vis team for that row
  row['HomeWinStreak'] = win_streak[home_team] # HomeWinStreak for that row is looked up in the dictionary win_streak
  row['VisitorWinStreak'] = win_streak[visitor_team] # Set VisitorWinStreak in the row to dict value for that team
  data.loc[index] = row # Set row to next row
  # Set current win streak number
  if row['Home Points Differ']:
    win_streak[home_team] += 1
    win_streak[visitor_team] = 0
  else:
    win_streak[home_team] = 0
    win_streak[visitor_team] += 1

In [7]:
# Which team won in their last match?

last_match_winner = defaultdict(int)

def home_team_won_last(row):
  # Variables equal the team names
  home_team = row['Home']
  visitor_team = row['Visitor']

  teams = tuple(sorted([home_team, visitor_team])) # Tuple of the home and visitor team to search for
  result = 1 if last_match_winner[teams] == row['Home'] else 0 # Look in last_match_winner dict for if these teams have played before
  winner = row['Home'] if  row['Home Points Differ'] else row['Visitor'] # Winner variable is home team if the homewin column says it is

  last_match_winner[teams] = winner # Feed the winner into the last_match_winner dict

  return result

data['HomeTeamWonLast'] = data.apply(home_team_won_last, axis=1) # Apply the function on each row (axis=1)

In [8]:
# Individual Player Rankings

# Team dictionary to change abbr. team names to full length ==== MADE IT WORSE
team_dict = {
    'ATL': 'Atlanta Hawks',
    'BOS': 'Boston Celtics',
    'BRK': 'Brooklyn Nets',
    'CHI': 'Chicago Bulls',
    'CHO': 'Charlotte Hornets',
    'CLE': 'Cleveland Cavaliers',
    'DAL': 'Dallas Mavericks',
    'DEN': 'Denver Nuggets',
    'DET': 'Detroit Pistons',
    'GSW': 'Golden State Warriors',
    'HOU': 'Houston Rockets',
    'IND': 'Indiana Pacers',
    'LAC': 'Los Angeles Clippers',
    'LAL': 'Los Angeles Lakers',
    'MEM': 'Memphis Grizzlies',
    'MIA': 'Miami Heat',
    'MIL': 'Milwaukee Bucks',
    'MIN': 'Minnesota Timberwolves',
    'NOP': 'New Orleans Pelicans',
    'NYK': 'New York Knicks',
    'OKC': 'Oklahoma City Thunder',
    'ORL': 'Orlando Magic',
    'PHI': 'Philadelphia 76ers',
    'PHO': 'Phoenix Suns',
    'POR': 'Portland Trail Blazers',
    'SAC': 'Sacramento Kings',
    'SAS': 'San Antonio Spurs',
    'TOR': 'Toronto Raptors',
    'UTA': 'Utah Jazz',
    'WAS': 'Washington Wizards'
}

# Function to add in player ranks

def add_player_ranks_from_excel(year):
    # Add in player ranks for home and vis teams
    player_ranks = pd.read_excel('./content/NBA_COMBINED.xlsx', sheet_name='Player Rank '+str(year),)

    # Map Team dictionary
    player_ranks['Team'] = player_ranks['Tm'].map(team_dict)

    # Drop multiple rank entries
    player_ranks.drop_duplicates(subset='Rk', inplace=True, keep='last')

    # Join the team and position columns so we only have 5 players per team
    player_ranks['Joined'] = player_ranks['Team'] + player_ranks['Pos']

    # drop duplicate team members
    player_ranks.drop_duplicates(subset='Joined', inplace=True)

    # drop other columns
    player_ranks = player_ranks[['Rk', 'Pos', 'Team']]

    # Make positions columns
    player_ranks = player_ranks.pivot_table(values='Rk', index='Team', columns='Pos', aggfunc='first')

    # Reset the index to numbers again
    player_ranks.reset_index(inplace=True)

    # Create Home and Vis columns
    columns = ['C', 'PF', 'PG', 'SF', 'SG']
    for col in columns:
      player_ranks['H'+col] = player_ranks[col]
      player_ranks['V'+col] = player_ranks[col]

    Home_ranks_df = player_ranks[['Team', 'HC', 'HPF', 'HPG', 'HSF', 'HSG']]
    Vis_ranks_df = player_ranks[['Team', 'VC', 'VPF', 'VPG', 'VSF', 'VSG']]
    Home_ranks_df.columns = ['Home', 'HC '+year, 'HPF '+year, 'HPG '+year, 'HSF '+year, 'HSG '+year]
    Vis_ranks_df.columns = ['Visitor', 'VC '+year, 'VPF '+year, 'VPG '+year, 'VSF '+year, 'VSG '+year]
    return Home_ranks_df, Vis_ranks_df

In [9]:
# Find individual player rankings from excel
homeranks16, visranks16 = add_player_ranks_from_excel('2016-17')
homeranks17, visranks17 = add_player_ranks_from_excel('2017-18')
homeranks18, visranks18 = add_player_ranks_from_excel('2018-19')

# Merge player rankings and data
X = data.copy()
X = X.merge(homeranks16, on='Home', how='left')
X = X.merge(visranks16, on='Visitor', how='left')
X = X.merge(homeranks17, on='Home', how='left')
X = X.merge(visranks17, on='Visitor', how='left')
X = X.merge(homeranks18, on='Home', how='left')
X = X.merge(visranks18, on='Visitor', how='left')
X = X.drop('Home Points Differ', axis=1)
X.columns


Index(['Date', 'Start (ET)', 'Visitor', 'Vis PTS', 'Home', 'Home PTS',
       '2016-17 Vis Rank', '2016-17 Home Rank', '2017-18 Vis Rank',
       '2017-18 Home Rank', '2018-19 Vis Rank', '2018-19 Home Rank',
       '2019-20 Vis Rank', '2019-20 Home Rank', 'Year', 'Month', 'Day',
       'Start (ET)', 'HomeLastWin', 'VisitorLastWin', 'HomeWinStreak',
       'VisitorWinStreak', 'HomeTeamWonLast', 'HC 2016-17', 'HPF 2016-17',
       'HPG 2016-17', 'HSF 2016-17', 'HSG 2016-17', 'VC 2016-17',
       'VPF 2016-17', 'VPG 2016-17', 'VSF 2016-17', 'VSG 2016-17',
       'HC 2017-18', 'HPF 2017-18', 'HPG 2017-18', 'HSF 2017-18',
       'HSG 2017-18', 'VC 2017-18', 'VPF 2017-18', 'VPG 2017-18',
       'VSF 2017-18', 'VSG 2017-18', 'HC 2018-19', 'HPF 2018-19',
       'HPG 2018-19', 'HSF 2018-19', 'HSG 2018-19', 'VC 2018-19',
       'VPF 2018-19', 'VPG 2018-19', 'VSF 2018-19', 'VSG 2018-19'],
      dtype='object')

In [10]:
# Drop non-feature columns
X = X.loc[:,'2016-17 Vis Rank':]
X.columns

Index(['2016-17 Vis Rank', '2016-17 Home Rank', '2017-18 Vis Rank',
       '2017-18 Home Rank', '2018-19 Vis Rank', '2018-19 Home Rank',
       '2019-20 Vis Rank', '2019-20 Home Rank', 'Year', 'Month', 'Day',
       'Start (ET)', 'HomeLastWin', 'VisitorLastWin', 'HomeWinStreak',
       'VisitorWinStreak', 'HomeTeamWonLast', 'HC 2016-17', 'HPF 2016-17',
       'HPG 2016-17', 'HSF 2016-17', 'HSG 2016-17', 'VC 2016-17',
       'VPF 2016-17', 'VPG 2016-17', 'VSF 2016-17', 'VSG 2016-17',
       'HC 2017-18', 'HPF 2017-18', 'HPG 2017-18', 'HSF 2017-18',
       'HSG 2017-18', 'VC 2017-18', 'VPF 2017-18', 'VPG 2017-18',
       'VSF 2017-18', 'VSG 2017-18', 'HC 2018-19', 'HPF 2018-19',
       'HPG 2018-19', 'HSF 2018-19', 'HSG 2018-19', 'VC 2018-19',
       'VPF 2018-19', 'VPG 2018-19', 'VSF 2018-19', 'VSG 2018-19'],
      dtype='object')

In [11]:
X = X.astype(float)
X.dtypes

2016-17 Vis Rank     float64
2016-17 Home Rank    float64
2017-18 Vis Rank     float64
2017-18 Home Rank    float64
2018-19 Vis Rank     float64
2018-19 Home Rank    float64
2019-20 Vis Rank     float64
2019-20 Home Rank    float64
Year                 float64
Month                float64
Day                  float64
Start (ET)           float64
HomeLastWin          float64
VisitorLastWin       float64
HomeWinStreak        float64
VisitorWinStreak     float64
HomeTeamWonLast      float64
HC 2016-17           float64
HPF 2016-17          float64
HPG 2016-17          float64
HSF 2016-17          float64
HSG 2016-17          float64
VC 2016-17           float64
VPF 2016-17          float64
VPG 2016-17          float64
VSF 2016-17          float64
VSG 2016-17          float64
HC 2017-18           float64
HPF 2017-18          float64
HPG 2017-18          float64
HSF 2017-18          float64
HSG 2017-18          float64
VC 2017-18           float64
VPF 2017-18          float64
VPG 2017-18   

In [12]:
# Drop games that havent been played
rows_with_results = len(data) - len(data[data['Vis PTS'].isna()])

# Training and testing
X_train_and_test = X.iloc[:rows_with_results-1,:]
y_train_and_test = y_all.iloc[:rows_with_results-1]
y_train_and_test = y_train_and_test.astype(bool)
X = X.drop(['2019-20 Vis Rank'], axis=1)
X = X.drop(['2019-20 Home Rank'], axis=1)

# Future Games
X_valid = X.iloc[rows_with_results:,:]

In [13]:
future_teams_and_dates = data.iloc[rows_with_results:,:]
future_teams_and_dates = future_teams_and_dates[['Date', 'Visitor','Home']]
future_teams_and_dates
# X_valid

Unnamed: 0,Date,Visitor,Home
4368,2021-03-19,Sacramento Kings,Boston Celtics
4369,2021-03-19,San Antonio Spurs,Cleveland Cavaliers
4370,2021-03-19,Detroit Pistons,Houston Rockets
4371,2021-03-19,Golden State Warriors,Memphis Grizzlies
4372,2021-03-19,Indiana Pacers,Miami Heat
...,...,...,...
4473,2021-03-31,New York Knicks,Minnesota Timberwolves
4474,2021-03-31,Toronto Raptors,Oklahoma City Thunder
4475,2021-03-31,Sacramento Kings,San Antonio Spurs
4476,2021-03-31,Milwaukee Bucks,Los Angeles Lakers


In [14]:
# Train model function

def train_model(X_train_and_test, y_train_and_test, model):
    ''' Scale, Split, Impute and Train one model '''
    
    X_train, X_test, y_train, y_test = train_test_split(X_train_and_test, y_train_and_test, test_size=0.2, shuffle=False)


    # Impute
    colsT = X_train.columns
    colsV = X_test.columns
    X_train = pd.DataFrame(imputer.fit_transform(X_train))
    X_test = pd.DataFrame(imputer.transform(X_test))
    X_train.columns = colsT
    X_test.columns = colsV
    
    # Scale and replace column names
    X_scaled_train = MMScaler.fit_transform(X_train)
    X_scaled_test = MMScaler.transform(X_test)
    X_scaled_train = pd.DataFrame(X_scaled_train, columns=colsT)
    X_scaled_test = pd.DataFrame(X_scaled_test, columns=colsV)
    
    # Train
    model.fit(X_scaled_train, y_train)
    preds = model.predict(X_scaled_test)

    
    # Combine predictions with actuals
    preds_df = pd.DataFrame(preds, columns=['Predictions'])
    preds_df.index = pd.RangeIndex(start=y_train.last_valid_index()+1, stop=y_train.last_valid_index()+1 + len(y_test))
    predictions_array.append(preds_df)
    preds_and_true = pd.concat([y_test, preds_df], axis=1, ignore_index=True)

    
    # Accuracy
    wins = preds_and_true.apply(lambda x: True if x[0] == True and x[1] == True else False, axis=1)
    losses = preds_and_true.apply(lambda x: True if x[0] == False and x[1] == False else False, axis=1)
    print('Model: ',str(model))
    print('Total test games: ', len(y_test))
    print('Wins predicted correctly: ',len(wins[wins == True].index))
    print('Losses predicted correctly: ',len(losses[losses == True].index))
    print('Percentage predicted correctly: ', (len(wins[wins == True].index) + len(losses[losses == True].index)) / len(preds_and_true))

In [15]:
# Train and test models

predictions_array = []

models_array = [svc, ADBC, RFC, GBC, HGBC, XGB, QDA, KNC]
X_train_and_test = X_train_and_test.drop(['2019-20 Vis Rank'], axis=1)
X_train_and_test = X_train_and_test.drop(['2019-20 Home Rank'], axis=1)
for model in models_array:
    train_model(X_train_and_test, y_train_and_test, model)

Model:  SVC(C=10, gamma=0.1)
Total test games:  874
Wins predicted correctly:  270
Losses predicted correctly:  210
Percentage predicted correctly:  0.5491990846681922
Model:  AdaBoostClassifier(learning_rate=0.8, n_estimators=155)
Total test games:  874
Wins predicted correctly:  301
Losses predicted correctly:  167
Percentage predicted correctly:  0.5354691075514875
Model:  RandomForestClassifier(min_samples_split=5, n_estimators=1000)
Total test games:  874
Wins predicted correctly:  308
Losses predicted correctly:  181
Percentage predicted correctly:  0.5594965675057209
Model:  GradientBoostingClassifier(learning_rate=0.15, n_estimators=500)
Total test games:  874
Wins predicted correctly:  279
Losses predicted correctly:  212
Percentage predicted correctly:  0.5617848970251716
Model:  HistGradientBoostingClassifier(l2_regularization=1.5, max_leaf_nodes=80,
                               min_samples_leaf=25)
Total test games:  874
Wins predicted correctly:  287
Losses predicted cor

In [16]:
# Make future predictions

count = 0

X_train, X_test, y_train, y_test = train_test_split(X_train_and_test, y_train_and_test, test_size=0.2, shuffle=False)

all_predictions = pd.DataFrame(y_test)
for i in predictions_array:
    all_predictions = pd.concat([all_predictions, i], axis=1)
    count +=1
all_predictions.columns =['Home Points Differ', 'SVC', 'ADBC', 'RFC', 'GBC', 'HGBC', 'XGB', 'QDA', 'KNC']

In [17]:
# Make future predictions

future_models = [svc, ADBC, RFC, GBC, HGBC, XGB, QDA, KNC]

def make_preds(X_train, y_train, X_predict, model):
    # Impute
    colsT = X_train.columns
    colsV = X_predict.columns
    X_train = pd.DataFrame(imputer.fit_transform(X_train))
    X_predict = pd.DataFrame(imputer.transform(X_predict))
    X_train.columns = colsT
    X_predict.columns = colsV


    # Train
    model.fit(X_train, y_train)
    preds = model.predict(X_predict)
    return preds

In [18]:
# Append the predictions onto the entire data and keep only date, teams and prediction columns
future_predictions_array = []

for model in future_models:
    preds = make_preds(X_train_and_test, y_train_and_test, X_valid, model)
    future_predictions_array.append(preds)

future_predictions_array



[array([ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False,  True,
         True,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True, False,  True,  True,  True, False, False,  True,  True,
         True,  True,  True,  True, False,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True]),
 array([ True,  True,  True,  True,  True, False,  True,  True, False,
         True,  True,  True,  True,  True,  True, Fal

In [19]:
# Concat

future_predictions = pd.DataFrame(future_teams_and_dates)
future_predictions = future_predictions.reset_index(drop=True)

for i in future_predictions_array:
    df = pd.DataFrame(i)
    future_predictions = pd.concat([future_predictions, df], axis=1)

future_predictions.columns = ['Date', 'Visitor', 'Home' , 'SVC', 'ADBC', 'RFC', 'GBC', 'HGBC', 'XGB', 'QDA', 'KNC']
future_predictions

Unnamed: 0,Date,Visitor,Home,SVC,ADBC,RFC,GBC,HGBC,XGB,QDA,KNC
0,2021-03-19,Sacramento Kings,Boston Celtics,True,True,True,True,True,True,True,True
1,2021-03-19,San Antonio Spurs,Cleveland Cavaliers,True,True,True,True,True,True,False,False
2,2021-03-19,Detroit Pistons,Houston Rockets,True,True,True,False,True,False,True,True
3,2021-03-19,Golden State Warriors,Memphis Grizzlies,True,True,False,True,True,True,False,True
4,2021-03-19,Indiana Pacers,Miami Heat,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...
105,2021-03-31,New York Knicks,Minnesota Timberwolves,True,True,False,False,False,False,True,True
106,2021-03-31,Toronto Raptors,Oklahoma City Thunder,True,False,False,True,False,True,False,False
107,2021-03-31,Sacramento Kings,San Antonio Spurs,True,True,True,True,True,True,True,True
108,2021-03-31,Milwaukee Bucks,Los Angeles Lakers,True,False,False,True,True,True,False,False


In [20]:
future_predictions.to_excel('future_predictions.xlsx')