# Logistic Regression Model

In [19]:
# ignore warnings

import warnings
warnings.simplefilter('ignore')

# imports

import pandas as pd
import numpy as np

### Read in CSV Files

In [20]:
#read csv files

# data from 1980-2019
df = pd.read_csv("Result/NBA_data_all_80-19.csv")

# data from 2020
df2020 = pd.read_csv("Result/NBA_data_all_19-20.csv")

### Set Up Training DataFrame

In [21]:
# in the csv file, each team that made it to playoffs is marked with an asterisk
# add a column that says 0 if they didn't make it and 1 if they did make it, then drop any extra rows
df["playoffs_y_n"] = df["Team"].map(lambda x: 1 if "*" in x else 0)
df["Team"] = df["Team"].str.replace("*", "")
df = df.dropna()

model_df = df.loc[(df["Year"] != 2017) & (df["Year"] != 2018) & (df["Year"] != 2019) & (df["Year"] != 2020)]
model_east_df = model_df.loc[df["Conf"] == "East"]
model_west_df = model_df.loc[df["Conf"] == "West"]

### Set Up Holdout DataFrame

In [22]:
recent_years_df = df.loc[(df["Year"] == 2017) | (df["Year"] == 2018) | (df["Year"] == 2019) | (df["Year"] == 2020)]
recent_east = recent_years_df.loc[df["Conf"] == "East"]
recent_west = recent_years_df.loc[df["Conf"] == "West"]

recent_east_test = recent_east.drop(columns=["RK","Team","Year","Conf","playoffs_y_n"])
recent_west_test = recent_west.drop(columns=["RK","Team","Year","Conf","playoffs_y_n"])

### Set Up Prediction DataFrame

In [23]:
df2020["playoffs_y_n"] = df2020["Team"].map(lambda x: 1 if "*" in x else 0)
df2020["Team"] = df2020["Team"].str.replace("*", "")
df2020 = df2020.dropna()

# split data for 2020 into east/west
df2020_east = df2020.loc[df2020["Conf"] == "East"]
df2020_west = df2020.loc[df2020["Conf"] == "West"]

# make a df for prediciting 2020 east/west data
prediction_east = df2020_east
prediction_east = prediction_east.drop(columns=["RK","Team","Year","Conf","playoffs_y_n"])

prediction_west = df2020_west
prediction_west = prediction_west.drop(columns=["RK","Team","Year","Conf","playoffs_y_n"])

### Train_Test_Split 

In [24]:
from sklearn.model_selection import train_test_split

y_east = model_east_df["playoffs_y_n"]
X_east = model_east_df.drop(columns=["RK","Team","Year","Conf","playoffs_y_n"])

X_train_east, X_test_east, y_train_east, y_test_east = train_test_split(X_east, y_east, random_state=42)

y_west = model_west_df["playoffs_y_n"]
X_west = model_west_df.drop(columns=["RK","Team","Year","Conf","playoffs_y_n"])

X_train_west, X_test_west, y_train_west, y_test_west = train_test_split(X_west, y_west, random_state=42)


### Create Logistic Regression Models

In [25]:
from sklearn.linear_model import LogisticRegression
model_east = LogisticRegression()
model_east.fit(X_train_east, y_train_east)

model_west = LogisticRegression()
model_west.fit(X_train_west, y_train_west)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [26]:
# print out training and testing scores for both models

print("Model East")
print(f"Training Data Score: {model_east.score(X_train_east, y_train_east)}")
print(f"Testing Data Score: {model_east.score(X_test_east, y_test_east)}")

print("Model West")
print(f"Training Data Score: {model_west.score(X_train_west, y_train_west)}")
print(f"Testing Data Score: {model_west.score(X_test_west, y_test_west)}")

Model East
Training Data Score: 0.9396325459317585
Testing Data Score: 0.8976377952755905
Model West
Training Data Score: 0.9656992084432717
Testing Data Score: 0.8740157480314961


### Predict 2020 Playoffs

In [27]:
# predict probabilities for both models
predictions_east = model_east.predict_proba(recent_east_test)
predictions_west = model_west.predict_proba(recent_west_test)

In [28]:
# add probabilities to dataframes and sort by ascending
recent_east["playoff prediction"] = predictions_east[:,1].tolist()
recent_west["playoff prediction"] = predictions_west[:,1].tolist()

recent_east = recent_east.sort_values(by=["Year","playoff prediction"],ascending=[True,False])
recent_west = recent_west.sort_values(by=["Year","playoff prediction"],ascending=[True,False])

In [36]:
recent_east.columns

Index(['RK', 'Team', 'Year', 'G', 'W', 'L', 'Conf', 'MP', 'FG', 'FGA', 'FG%',
       '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'FT', 'FTA', 'FT%', 'ORB',
       'DRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'playoffs_y_n',
       'playoff prediction'],
      dtype='object')

In [30]:
recent_west

Unnamed: 0,RK,Team,Year,G,W,L,Conf,MP,FG,FGA,...,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,playoffs_y_n,playoff prediction
1014,1,Golden State Warriors,2017,82,67,15,West,241.2,43.1,87.1,...,9.4,35.0,30.4,9.6,6.8,14.8,19.3,115.9,1,1.0
1027,14,San Antonio Spurs,2017,82,61,21,West,241.5,39.3,83.7,...,10.0,33.9,23.8,8.0,5.9,13.4,18.3,105.3,1,0.999996
1015,2,Houston Rockets,2017,82,55,27,West,241.2,40.3,87.2,...,10.9,33.5,25.2,8.2,4.3,15.1,19.9,115.3,1,0.9999821
1019,6,Los Angeles Clippers,2017,82,51,31,West,240.9,39.5,83.2,...,9.0,34.0,22.5,7.5,4.2,13.0,19.8,108.7,1,0.9936505
1041,28,Utah Jazz,2017,82,51,31,West,240.9,37.0,79.5,...,9.4,33.8,20.1,6.7,5.0,13.6,18.8,100.7,1,0.9308043
1024,11,Oklahoma City Thunder,2017,82,47,35,West,241.5,39.5,87.4,...,12.2,34.4,21.0,7.9,5.0,15.0,20.9,106.6,1,0.8463719
1042,29,Memphis Grizzlies,2017,82,43,39,West,242.7,36.4,83.6,...,10.8,32.0,21.3,8.0,4.2,12.9,22.4,100.5,1,0.8076295
1016,3,Denver Nuggets,2017,82,40,42,West,240.9,41.2,87.7,...,11.8,34.6,25.3,6.9,3.9,15.0,19.1,111.7,0,0.1133497
1021,8,Portland Trail Blazers,2017,82,41,41,West,243.0,39.5,86.1,...,10.1,33.5,21.1,7.0,5.0,13.7,21.2,107.9,1,0.07002903
1031,18,New Orleans Pelicans,2017,82,34,48,West,242.7,39.1,87.0,...,8.6,35.1,22.8,7.8,5.5,12.9,18.2,104.3,0,0.007617332


In [31]:
predictions_east_2020 = model_east.predict_proba(prediction_east)
predictions_west_2020 = model_west.predict_proba(prediction_west)

# print("Classes are either 0 (did not qualify for playoffs) or 1 (qualified for playoffs)")
# print(f"The new point was classified as: {predictions_east_2020}")

In [32]:
#df2019 = df2018.dropna()
df2020_east["playoff prediction"] = predictions_east_2020[:,1].tolist()
df2020_west["playoff prediction"] = predictions_west_2020[:,1].tolist()

In [33]:
df2020_east = df2020_east.sort_values(by=["playoff prediction"],ascending=False)
df2020_west = df2020_west.sort_values(by=["playoff prediction"],ascending=False)

In [34]:
df2020_east

Unnamed: 0,RK,Team,Year,G,W,L,Conf,MP,FG,FGA,...,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,playoffs_y_n,playoff prediction
1,2,Milwaukee Bucks,2020,65,53,12,East,240.8,43.5,91.2,...,9.5,42.2,25.9,7.4,6.0,14.9,19.2,118.6,1,1.0
13,14,Toronto Raptors,2020,64,46,18,East,241.6,40.6,88.5,...,9.7,35.5,25.4,8.8,4.9,14.4,21.5,113.0,1,0.999998
12,13,Boston Celtics,2020,64,43,21,East,242.0,41.2,89.6,...,10.7,35.3,22.8,8.3,5.6,13.6,21.4,113.0,1,0.999985
10,11,Miami Heat,2020,65,41,24,East,243.5,39.6,84.4,...,8.5,36.0,26.0,7.4,4.5,14.9,20.4,112.2,0,0.999825
18,19,Indiana Pacers,2020,65,39,26,East,241.5,42.2,88.4,...,8.8,34.0,25.9,7.2,5.1,13.1,19.6,109.3,0,0.999236
17,18,Philadelphia 76ers,2020,65,39,26,East,241.2,40.8,87.7,...,10.4,35.1,25.9,8.2,5.4,14.2,20.6,109.6,0,0.998713
21,22,Brooklyn Nets,2020,64,30,34,East,243.1,40.0,90.0,...,10.8,37.6,24.0,6.5,4.6,15.5,20.7,110.8,0,0.576703
27,28,Orlando Magic,2020,65,30,35,East,240.4,39.2,88.8,...,10.4,34.2,24.0,8.4,5.7,12.6,17.6,106.4,0,0.313626
7,8,Washington Wizards,2020,64,24,40,East,241.2,41.9,91.0,...,10.1,31.6,25.3,8.1,4.3,14.1,22.6,115.6,0,0.000724
29,30,Charlotte Hornets,2020,65,23,42,East,242.3,37.3,85.9,...,11.0,31.8,23.8,6.6,4.1,14.6,18.8,102.9,0,0.000447


In [35]:
df2020_west

Unnamed: 0,RK,Team,Year,G,W,L,Conf,MP,FG,FGA,...,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,playoffs_y_n,playoff prediction
14,15,Los Angeles Lakers,2020,63,49,14,West,240.8,42.9,88.6,...,10.6,35.5,25.9,8.6,6.8,15.1,20.6,114.3,1,0.999999
15,16,Denver Nuggets,2020,65,43,22,West,242.3,41.8,88.9,...,10.8,33.5,26.5,8.1,4.6,13.7,20.0,110.4,0,0.999932
6,7,Los Angeles Clippers,2020,64,44,20,West,241.2,41.6,89.7,...,11.0,37.0,23.8,7.1,5.0,14.8,22.0,116.2,0,0.999583
2,3,Houston Rockets,2020,64,40,24,West,241.2,41.1,90.7,...,10.4,34.6,21.5,8.5,5.1,14.7,21.6,118.1,0,0.999154
20,21,Oklahoma City Thunder,2020,64,40,24,West,241.6,40.3,85.1,...,8.1,34.6,21.9,7.6,5.0,13.5,18.8,110.8,0,0.994597
0,1,Dallas Mavericks,2020,67,40,27,West,241.5,41.6,90.0,...,10.6,36.4,24.5,6.3,5.0,12.8,19.0,116.4,0,0.985878
19,20,Utah Jazz,2020,64,41,23,West,240.4,40.1,84.6,...,8.8,36.3,22.2,5.9,4.0,14.9,20.0,111.0,0,0.949979
8,9,Memphis Grizzlies,2020,65,32,33,West,240.4,42.8,91.0,...,10.4,36.3,27.0,8.0,5.6,15.3,20.8,112.6,0,0.889463
5,6,New Orleans Pelicans,2020,64,28,36,West,242.3,42.6,92.2,...,11.2,35.8,27.0,7.6,5.1,16.2,21.0,116.2,0,0.56205
9,10,Phoenix Suns,2020,65,26,39,West,241.2,40.8,87.8,...,9.8,33.3,27.2,7.8,4.0,15.1,22.1,112.6,0,0.478635
