# Logestic Regression Model

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sqlalchemy import create_engine
from config import db_password

In [2]:
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/nba_champs"
engine = create_engine(db_string)

In [3]:
stats_df = pd.read_sql("select * from \"stats\"", db_string);
stats_df.head()

Unnamed: 0,index,Position,Season,Tm,G,W,L,Win_pct,MOV,SOS,...,eFG_pct,TS_pct,TOV_pct,ORB_pct,FT_FGA,Opp_eFG_pct,Opp_TS_pct,Opp_TOV_pct,Opp_ORB_pct,Opp_FT_FGA
0,0,0,2020-21,WAS,72,34,38,0.472,-1.83,-0.01,...,0.531,0.569,12.3,21.3,0.221,0.539,0.576,12.5,22.4,0.217
1,1,0,2020-21,UTA,72,52,20,0.722,9.25,-0.29,...,0.563,0.597,12.7,24.5,0.195,0.507,0.537,10.3,20.7,0.159
2,2,0,2020-21,TOR,72,27,45,0.375,-0.47,-0.07,...,0.529,0.567,11.9,20.8,0.196,0.543,0.584,14.4,23.7,0.234
3,3,0,2020-21,SAS,72,33,39,0.458,-1.74,0.15,...,0.517,0.554,10.2,20.0,0.192,0.541,0.57,11.8,22.7,0.174
4,4,0,2020-21,SAC,72,31,41,0.431,-3.68,0.23,...,0.549,0.578,12.0,21.3,0.185,0.557,0.591,12.1,25.0,0.199


In [4]:
# Pull out Season, TM and Team_Name into DF
name_df = stats_df[['Season', 'Tm',]]
name_df.head()

Unnamed: 0,Season,Tm
0,2020-21,WAS
1,2020-21,UTA
2,2020-21,TOR
3,2020-21,SAS
4,2020-21,SAC


In [5]:
# Drop LG, Season, TM, Team_Name, G, W, L columns
stats_df.drop(['Season','Tm', 'G', 'W', 'L'], axis=1, inplace=True)

In [6]:
stats_df.head()

Unnamed: 0,index,Position,Win_pct,MOV,SOS,SRS,Pace,ORtg,DRtg,eFG_pct,TS_pct,TOV_pct,ORB_pct,FT_FGA,Opp_eFG_pct,Opp_TS_pct,Opp_TOV_pct,Opp_ORB_pct,Opp_FT_FGA
0,0,0,0.472,-1.83,-0.01,-1.85,104.1,111.2,113.0,0.531,0.569,12.3,21.3,0.221,0.539,0.576,12.5,22.4,0.217
1,1,0,0.722,9.25,-0.29,8.97,98.5,117.6,108.3,0.563,0.597,12.7,24.5,0.195,0.507,0.537,10.3,20.7,0.159
2,2,0,0.375,-0.47,-0.07,-0.54,99.2,112.0,112.5,0.529,0.567,11.9,20.8,0.196,0.543,0.584,14.4,23.7,0.234
3,3,0,0.458,-1.74,0.15,-1.58,98.9,111.0,112.8,0.517,0.554,10.2,20.0,0.192,0.541,0.57,11.8,22.7,0.174
4,4,0,0.431,-3.68,0.23,-3.45,100.0,113.6,117.2,0.549,0.578,12.0,21.3,0.185,0.557,0.591,12.1,25.0,0.199


In [7]:
# Remove Position target from features data
y = stats_df.Position
X = stats_df.drop(columns=["Position"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
# Create a logistic regression model
classifier = LogisticRegression(max_iter=1000)
classifier

LogisticRegression(max_iter=1000)

In [9]:
# Fit (train) our model by using the training data
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [10]:
# Validate the model by using the test data
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.9670958512160229
Testing Data Score: 0.9743589743589743
