In [17]:
# Libraries being used

import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier 
from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [18]:
# Loading model dataset and viewing the first 20 lines

model_df = pd.read_csv('model_data.csv')
model_df.head(10)

Unnamed: 0.1,Unnamed: 0,RK,Team,Year,G,W,L,Conference,MP,FG,...,FT%,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,Playoffs
0,0,1,Golden State Warriors,1990,82,37,45,West,240.3,42.5,...,0.809,11.2,29.1,24.1,9.2,6.0,17.3,24.5,116.3,0
1,1,2,Phoenix Suns,1990,82,54,28,West,242.1,43.2,...,0.795,12.8,32.3,25.7,8.1,6.1,15.5,22.3,114.9,1
2,2,3,Denver Nuggets,1990,82,43,39,West,241.5,45.3,...,0.789,14.3,30.9,27.7,9.9,4.0,13.9,25.0,114.6,1
3,3,4,Portland Trail Blazers,1990,82,59,23,West,242.4,43.6,...,0.743,16.5,31.1,25.4,9.1,4.4,16.5,25.0,114.2,1
4,4,5,Orlando Magic,1990,82,18,64,East,241.5,42.2,...,0.756,15.9,30.1,24.3,7.5,3.6,17.2,24.1,110.9,0
5,5,6,Los Angeles Lakers,1990,82,63,19,West,242.1,41.9,...,0.787,13.4,30.0,27.2,8.0,5.4,15.0,21.2,110.7,1
6,6,7,Philadelphia 76ers,1990,82,53,29,East,241.5,41.9,...,0.788,13.5,29.3,23.6,8.4,4.5,14.7,20.7,110.2,1
7,7,8,Boston Celtics,1990,82,52,30,East,240.3,43.5,...,0.832,13.0,33.0,29.5,6.6,5.5,15.3,20.9,110.0,1
8,8,9,Chicago Bulls,1990,82,55,27,East,241.8,43.1,...,0.778,13.1,27.8,26.5,9.9,4.7,15.2,23.2,109.5,1
9,9,10,Indiana Pacers,1990,82,42,40,East,242.4,41.2,...,0.816,11.5,29.1,24.7,6.7,4.3,16.4,24.0,109.3,1


In [19]:
# Loading test dataset and viewing the first 20 lines

test_df = pd.read_csv('test_data.csv')
test_df.head(10)

Unnamed: 0.1,Unnamed: 0,RK,Team,Year,G,W,L,Conference,MP,FG,...,FT%,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,Playoffs
0,903,1,Brooklyn Nets,2021,38,25,13,East,243.3,43.8,...,0.804,8.7,35.6,27.1,6.4,5.3,14.4,19.2,121.1,0
1,904,2,Los Angeles Clippers,2021,39,25,14,West,240.0,41.9,...,0.843,9.5,35.0,24.4,7.0,4.6,13.3,19.4,115.2,0
2,905,3,Milwaukee Bucks,2021,37,23,14,East,240.0,44.4,...,0.74,10.4,37.6,25.8,8.1,4.9,13.5,17.9,119.2,0
3,906,4,Philadelphia 76ers,2021,38,26,12,East,242.0,41.8,...,0.788,10.2,35.6,23.5,8.8,6.2,15.5,19.9,115.3,0
4,907,5,New Orleans Pelicans,2021,38,16,22,West,241.3,42.9,...,0.729,12.1,34.8,25.6,7.2,4.1,14.0,17.6,115.2,0
5,908,6,Utah Jazz,2021,37,28,9,West,240.7,41.3,...,0.781,10.9,37.2,23.9,6.4,5.4,14.4,18.9,116.5,0
6,909,7,Golden State Warriors,2021,38,19,19,West,240.7,41.1,...,0.776,8.0,35.4,27.6,7.9,5.0,14.8,21.7,113.1,0
7,910,8,Denver Nuggets,2021,37,22,15,West,243.4,43.4,...,0.785,10.0,33.7,26.4,8.2,4.4,13.6,19.1,115.5,0
8,911,9,Sacramento Kings,2021,37,15,22,West,240.7,42.9,...,0.723,10.4,32.3,26.0,6.7,4.8,13.7,19.8,115.0,0
9,912,10,Los Angeles Lakers,2021,38,25,13,West,243.3,41.6,...,0.746,9.8,35.5,24.4,7.3,6.1,15.1,19.0,111.2,0


In [20]:
# Prepping data for testing

y = model_df['Playoffs']  # Playoff column only for 1990 - 2020  (labels)
X = model_df.drop(['Unnamed: 0', 'Playoffs', 'Year', 'Team', 'G', 'MP', 'Conference', ], axis = 1) # (Features)


# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)  # 70% training and 30% test)

X_train.columns

Index(['RK', 'W', 'L', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA',
       '2P%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 'TOV',
       'PF', 'PTS'],
      dtype='object')

In [21]:
y_test.value_counts()

1    167
0    104
Name: Playoffs, dtype: int64

In [22]:
X_test.shape

(271, 23)

In [23]:
y_test.shape

(271,)

In [24]:
# Random Forest


#Create a Gaussian Classifier
rfc_model = RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
rfc_model.fit(X_train,y_train)

y_pred = rfc_model.predict(X_test)

print("Random Forest")
print("-------------")

# Model Accuracy, how often is the classifier correct?
print("Accuracy Score:",metrics.accuracy_score(y_test, y_pred) * 100)
print("-------------")

# Confusion Matrix - Shows True Positive, True negative, False Positive, False Negative
print("Confusion Matrix:")
print(metrics.confusion_matrix(y_test, y_pred))
print("-------------")

rfc_predict = rfc_model.predict(X_test)

print("Classification Report")
print(classification_report(y_test, rfc_predict))

Random Forest
-------------
Accuracy Score: 93.35793357933579
-------------
Confusion Matrix:
[[101   3]
 [ 15 152]]
-------------
Classification Report
              precision    recall  f1-score   support

           0       0.87      0.97      0.92       104
           1       0.98      0.91      0.94       167

    accuracy                           0.93       271
   macro avg       0.93      0.94      0.93       271
weighted avg       0.94      0.93      0.93       271



In [25]:
test_df2 = test_df.drop(['Unnamed: 0','Playoffs', 'Year', 'Team', 'G', 'MP', 'Conference'], axis = 1) 

y_predict = rfc_model.predict(test_df2)
print(y_predict)

[1 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0]


In [26]:
predict_data = test_df[['Team', 'Year', 'Playoffs', "Conference"]]
predict_data['Playoffs'] = y_predict
predict_data

Unnamed: 0,Team,Year,Playoffs,Conference
0,Brooklyn Nets,2021,1,East
1,Los Angeles Clippers,2021,1,West
2,Milwaukee Bucks,2021,0,East
3,Philadelphia 76ers,2021,1,East
4,New Orleans Pelicans,2021,1,West
5,Utah Jazz,2021,1,West
6,Golden State Warriors,2021,0,West
7,Denver Nuggets,2021,1,West
8,Sacramento Kings,2021,0,West
9,Los Angeles Lakers,2021,0,West


In [27]:
predict_2021 = predict_data.query("Playoffs == 1")

predict_2021.sort_values('Conference')

Unnamed: 0,Team,Year,Playoffs,Conference
0,Brooklyn Nets,2021,1,East
3,Philadelphia 76ers,2021,1,East
10,Toronto Raptors,2021,1,East
11,Atlanta Hawks,2021,1,East
15,Chicago Bulls,2021,1,East
18,Charlotte Hornets,2021,1,East
1,Los Angeles Clippers,2021,1,West
4,New Orleans Pelicans,2021,1,West
5,Utah Jazz,2021,1,West
7,Denver Nuggets,2021,1,West


In [28]:
predict_2021.shape

(11, 4)