In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sqlalchemy import create_engine
from config import db_password
import psycopg2

In [None]:
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/nba_champs"
engine = create_engine(db_string)

In [None]:
stats_df = pd.read_sql("select * from \"stats\"", db_string);
stats_df.head()

Definition of Stats: 

    1. Position -- 1 = winner, 0 = not winner --- TARGET
    2. Season -- Season --- Save for reference
    3. TM -- Team name abreviation --- Save for reference
    4. Team_Name -- Name --- Save for reference
    5. Lg -- League --- Not needed
    6. G -- Games --- Not needed
    7. W -- Wins --- Not needed
    8. L -- Losses --- Not needed
    9. W/L% -- Win-Loss Percentage Miscellaneous --- Needed
    10. MOV -- Margin of Victory --- Needed
    11. SOS -- Strength of Schedule; a rating of strength of schedule. The rating is denominated in points above/below average, where zero is average. --- Needed
    12. SRS -- Simple Rating System; a team rating that takes into account average point differential and strength of schedule. The rating is denominated in points above/below average, where zero is average. --- Needed
    13. Pace -- Pace Factor: An estimate of possessions per 48 minutes --- Needed
    14. ORtg -- Offensive Rating An estimate of points produced (players) or scored (teams) per 100 possessions --- Needed
    15. DRtg -- Defensive Rating An estimate of points allowed per 100 possessions Team eFG% Effective Field Goal Percentage This statistic adjusts for the fact that a 3-point field goal is worth one more point than a 2-point field goal. --- Needed
    16. TS% -- True Shooting Percentage A measure of shooting efficiency that takes into account 2-point field goals, 3-point field goals, and free throws. --- Needed
    17. TOV% -- Turnover Percentage An estimate of turnovers committed per 100 plays. 
    18. ORB% -- Offensive Rebound Percentage An estimate of the percentage of available offensive rebounds a player grabbed while they were on the floor. --- Needed
    19. FT/FGA -- Free Throws Per Field Goal Attempt Opponent --- Needed
    20. eFG% -- Opponent Effective Field Goal Percentage --- Needed
    21. TS% -- Opponent True Shooting Percentage --- Needed
    22. TOV% -- Opponent Turnover Percentage --- Needed
    23. ORB% -- Opponent Offensive Rebound Percentage --- Needed
    24. FT/FGA -- Opponent Free Throws Per Field Goal Attempt--- Needed

In [None]:
# Pull out Season, TM and Team_Name into DF
name_df = stats_df[['Season', 'Tm', 'Team_Name']]
name_df.head()

In [None]:
# Drop LG, Season, TM, Team_Name, G, W, L columns
stats_df.drop(['Lg','Season','Tm', 'Team_Name', 'G', 'W', 'L'], axis=1, inplace=True)

In [None]:
stats_df.head()

In [None]:
# Remove Position target from features data
y = stats_df.Position
X = stats_df.drop(columns=["Position"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create a logistic regression model
classifier = LogisticRegression(max_iter=1000)
classifier

In [None]:
# Fit (train) our model by using the training data
classifier.fit(X_train, y_train)

In [None]:
# Validate the model by using the test data
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

## Import new data set

In [None]:
# Import our current year dataset
cy_df = pd.read_csv('cy_stats.csv')
cy_df.head()

In [None]:
# Pull out Season, TM into DF
cy_name_df = cy_df[['Season', 'Tm']]
cy_name_df.head()

In [None]:
# Drop LG, Season, and TM column
cy_df.drop(['Lg','Season','Tm'], axis=1, inplace=True)

In [None]:
cy_df.drop(['G','W','L'], axis=1, inplace=True)

In [None]:
cy_df.head()

In [None]:
# Remove Position target from features data
y1 = cy_df.Position
X1 = cy_df.drop(columns=["Position"])

In [None]:
X1.head()

In [None]:
y1

In [None]:
# Split training/test datasets
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, random_state=1)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X1_scaler = scaler.fit(X1_train)

# Scale the data
X1_train_scaled = X1_scaler.transform(X1_train)
X1_test_scaled = X1_scaler.transform(X1_test)

In [None]:
# Create a logistic regression model
classifier = LogisticRegression(max_iter=1000)
classifier

In [None]:
# Fit (train) our model by using the training data
classifier.fit(X1_train, y1_train)

In [None]:
# joined_df = test_df.join(stats_df)
# joined_df.head(100)

In [None]:
# full_df = joined_df.join(name_df)
# full_df.head()

In [None]:
# full_df.columns

In [None]:
# full2_df = full_df[['Season',
#        'Tm', 'Team_Name','Prediction', 'Actual', 'Position', 'G', 'W', 'L', 'W/L%', 'MOV', 'SOS',
#        'SRS', 'Pace', 'ORtg', 'DRtg', 'eFG%', 'TS%', 'TOV%', 'ORB%', 'FT/FGA',
#        'opp_eFG%', 'opp_TS%', 'opp_TOV%', 'opp_ORB%', 'opp_FT/FGA' 
# ]]
# full2_df.head()

In [None]:
# full2_df.groupby('Season').sum()

In [None]:
# y = name_df.Team_Name
# X = name_df.drop(columns=["Team_Name"])
# y

In [None]:
# # Split training/test datasets
# X1_train, X1_test, y1_train, y1_test = train_test_split(X, y, random_state=1, stratify=y)

In [None]:
# y1_test