In [8]:
# Random Forest Model
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import our input dataset
stats_df = pd.read_csv('stats_target_clean2.csv')
stats_df.head()

Unnamed: 0,Position,Season,Tm,Team_Name,Lg,G,W,L,W/L%,MOV,...,eFG%,TS%,TOV%,ORB%,FT/FGA,opp_eFG%,opp_TS%,opp_TOV%,opp_ORB%,opp_FT/FGA
0,0,2020-21,WAS,Washington Wizards,NBA,72,34,38,0.472,-1.83,...,0.531,0.569,12.3,21.3,0.221,0.539,0.576,12.5,22.4,0.217
1,0,2020-21,UTA,Utah Jazz,NBA,72,52,20,0.722,9.25,...,0.563,0.597,12.7,24.5,0.195,0.507,0.537,10.3,20.7,0.159
2,0,2020-21,TOR,Toronto Raptors,NBA,72,27,45,0.375,-0.47,...,0.529,0.567,11.9,20.8,0.196,0.543,0.584,14.4,23.7,0.234
3,0,2020-21,SAS,San Antonio Spurs,NBA,72,33,39,0.458,-1.74,...,0.517,0.554,10.2,20.0,0.192,0.541,0.57,11.8,22.7,0.174
4,0,2020-21,SAC,Sacramento Kings,NBA,72,31,41,0.431,-3.68,...,0.549,0.578,12.0,21.3,0.185,0.557,0.591,12.1,25.0,0.199


In [9]:
# Pull out Season, TM and Team_Name into DF
name_df = stats_df[['Season', 'Tm', 'Team_Name']]
name_df.head()

Unnamed: 0,Season,Tm,Team_Name
0,2020-21,WAS,Washington Wizards
1,2020-21,UTA,Utah Jazz
2,2020-21,TOR,Toronto Raptors
3,2020-21,SAS,San Antonio Spurs
4,2020-21,SAC,Sacramento Kings


In [10]:
# Drop LG, Season, TM, Team_Name, G, W, L columns
stats_df.drop(['Lg','Season','Tm', 'Team_Name', 'G', 'W', 'L'], axis=1, inplace=True)

In [11]:
stats_df.head()

Unnamed: 0,Position,W/L%,MOV,SOS,SRS,Pace,ORtg,DRtg,eFG%,TS%,TOV%,ORB%,FT/FGA,opp_eFG%,opp_TS%,opp_TOV%,opp_ORB%,opp_FT/FGA
0,0,0.472,-1.83,-0.01,-1.85,104.1,111.2,113.0,0.531,0.569,12.3,21.3,0.221,0.539,0.576,12.5,22.4,0.217
1,0,0.722,9.25,-0.29,8.97,98.5,117.6,108.3,0.563,0.597,12.7,24.5,0.195,0.507,0.537,10.3,20.7,0.159
2,0,0.375,-0.47,-0.07,-0.54,99.2,112.0,112.5,0.529,0.567,11.9,20.8,0.196,0.543,0.584,14.4,23.7,0.234
3,0,0.458,-1.74,0.15,-1.58,98.9,111.0,112.8,0.517,0.554,10.2,20.0,0.192,0.541,0.57,11.8,22.7,0.174
4,0,0.431,-3.68,0.23,-3.45,100.0,113.6,117.2,0.549,0.578,12.0,21.3,0.185,0.557,0.591,12.1,25.0,0.199


In [12]:
# Remove Position target from features data
y = stats_df.Position
X = stats_df.drop(columns=["Position"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.970


In [26]:
# Predict WAS stats from cy_stats.csv
WAS_scale = scaler.fit([[0.714, 2.57, 1.11, 3.68, 99, 108.8, 106.3, 0.505, 0.549, 11.5, 20.4, 0.213, 0.492, 0.535, 11, 20.5, 0.197
                  ]])
rf_model.predict([[0.714, 2.57, 1.11, 3.68, 99, 108.8, 106.3, 0.505, 0.549, 11.5, 20.4, 0.213, 0.492, 0.535, 11, 20.5, 0.197
                  ]])

array([0], dtype=int64)

In [27]:
# Predict MIA stats from cy_stats.csv
rf_model.predict([[0.833,17,1.07,18.07,99.9,112.2,95.5,0.525,0.565,13.6,27.5,0.197,0.464,0.501,13.4,15.9,0.181]])

array([0], dtype=int64)

In [31]:
# Predict UTA stats from cy_stats.csv
rf_model.predict([[0.833,12.83,-3.08,9.76,99.3,111.9,99,0.526,0.574,14.2,24.6,0.236,0.45,0.485,10.8,23.1,0.164]])

array([0], dtype=int64)

In [30]:
# Predict TOR stats from cy_stats.csv
rf_model.predict([[0.625,4.63,1.25,5.88,97.6,107.4,102.7,0.487,0.518,12.1,29.8,0.158,0.521,0.554,16.8,20.6,0.178
]])

array([0], dtype=int64)

In [29]:
# Predict SAS stats from cy_stats.csv
rf_model.predict([[0.286,-0.43,-1.5,-1.93,101.7,106.6,107,0.526,0.542,12.2,22.2,0.113,0.549,0.568,15.2,21.4,0.13
]])

array([0], dtype=int64)

In [28]:
# Predict SAC stats from cy_stats.csv
rf_model.predict([[0.5,-2.83,-1.06,-3.89,101.1,107.8,110.6,0.508,0.543,12.1,21.7,0.177,0.504,0.54,10.1,24.7,0.187
]])

array([0], dtype=int64)

# Import New data set

In [15]:
# Import our current year dataset
cy_df = pd.read_csv('cy_stats.csv')
cy_df.head()

Unnamed: 0,Position,Season,Tm,Lg,G,W,L,W/L%,MOV,SOS,...,eFG%,TS%,TOV%,ORB%,FT/FGA,opp_eFG%,opp_TS%,opp_TOV%,opp_ORB%,opp_FT/FGA
0,0,2021-22,MIA,NBA,6,5,1,0.833,17.0,1.07,...,0.525,0.565,13.6,27.5,0.197,0.464,0.501,13.4,15.9,0.181
1,0,2021-22,WAS,NBA,7,5,2,0.714,2.57,1.11,...,0.505,0.549,11.5,20.4,0.213,0.492,0.535,11.0,20.5,0.197
2,0,2021-22,UTA,NBA,6,5,1,0.833,12.83,-3.08,...,0.526,0.574,14.2,24.6,0.236,0.45,0.485,10.8,23.1,0.164
3,0,2021-22,TOR,NBA,8,5,3,0.625,4.63,1.25,...,0.487,0.518,12.1,29.8,0.158,0.521,0.554,16.8,20.6,0.178
4,0,2021-22,SAS,NBA,7,2,5,0.286,-0.43,-1.5,...,0.526,0.542,12.2,22.2,0.113,0.549,0.568,15.2,21.4,0.13


In [16]:
# Pull out Season, TM into DF
cy_name_df = cy_df[['Season', 'Tm']]
cy_name_df.head()

Unnamed: 0,Season,Tm
0,2021-22,MIA
1,2021-22,WAS
2,2021-22,UTA
3,2021-22,TOR
4,2021-22,SAS


In [17]:
# Drop LG, Season, and TM column
cy_df.drop(['Lg','Season','Tm'], axis=1, inplace=True)

In [19]:
# Drop G, W, and L column
cy_df.drop(['G','W','L'], axis=1, inplace=True)

In [20]:
cy_df.head()

Unnamed: 0,Position,W/L%,MOV,SOS,SRS,Pace,ORtg,DRtg,eFG%,TS%,TOV%,ORB%,FT/FGA,opp_eFG%,opp_TS%,opp_TOV%,opp_ORB%,opp_FT/FGA
0,0,0.833,17.0,1.07,18.07,99.9,112.2,95.5,0.525,0.565,13.6,27.5,0.197,0.464,0.501,13.4,15.9,0.181
1,0,0.714,2.57,1.11,3.68,99.0,108.8,106.3,0.505,0.549,11.5,20.4,0.213,0.492,0.535,11.0,20.5,0.197
2,0,0.833,12.83,-3.08,9.76,99.3,111.9,99.0,0.526,0.574,14.2,24.6,0.236,0.45,0.485,10.8,23.1,0.164
3,0,0.625,4.63,1.25,5.88,97.6,107.4,102.7,0.487,0.518,12.1,29.8,0.158,0.521,0.554,16.8,20.6,0.178
4,0,0.286,-0.43,-1.5,-1.93,101.7,106.6,107.0,0.526,0.542,12.2,22.2,0.113,0.549,0.568,15.2,21.4,0.13


In [21]:
# Remove Position target from features data
y = cy_df.Position
X = cy_df.drop(columns=["Position"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [22]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 1.000
