In [1]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import (
    PlayerGameLog,
    BoxScoreTraditionalV2,
    BoxScoreAdvancedV2,
    DraftCombinePlayerAnthro,
    CommonPlayerInfo
)
import pandas as pd


In [2]:
# Get the ID for the player so we can have portable lookups
pid = players.find_players_by_full_name("Stephen Curry")[0]["id"]

print(pid)
logs = PlayerGameLog(
    player_id=pid,
    season="2016-17",
    season_type_all_star="Playoffs"
).get_data_frames()[0]
# print(logs)

finals_logs = logs[logs["MATCHUP"].str.contains("CLE")] # GSW played CLE in the finals in 2017
print(finals_logs)

201939
  SEASON_ID  Player_ID     Game_ID     GAME_DATE      MATCHUP WL  MIN  FGM  \
0     42016     201939  0041600405  Jun 12, 2017  GSW vs. CLE  W   41   10   
1     42016     201939  0041600404  Jun 09, 2017    GSW @ CLE  L   38    4   
2     42016     201939  0041600403  Jun 07, 2017    GSW @ CLE  W   39    8   
3     42016     201939  0041600402  Jun 04, 2017  GSW vs. CLE  W   36    7   
4     42016     201939  0041600401  Jun 01, 2017  GSW vs. CLE  W   34   11   

   FGA  FG_PCT  ...  DREB  REB  AST  STL  BLK  TOV  PF  PTS  PLUS_MINUS  \
0   20   0.500  ...     4    6   10    3    0    4   3   34           3   
1   13   0.308  ...     4    5   10    2    0    4   2   14         -24   
2   19   0.421  ...     8   13    6    2    0    1   3   26          11   
3   17   0.412  ...     8   10   11    1    0    8   3   32          21   
4   22   0.500  ...     5    6   10    3    0    2   3   28          20   

   VIDEO_AVAILABLE  
0                1  
1                1  
2         

In [1]:
1 + 1 

2

In [2]:
import sqlite3
import pandas as pd
import os

db_path = os.path.join("..", "BALL.db")

print("CWD:", os.getcwd())
print("DB exists:", os.path.exists(db_path), "->", db_path)

with sqlite3.connect(db_path) as conn:
    tables = pd.read_sql_query(
        "SELECT name FROM sqlite_master WHERE type='table';",
        conn
    )

tables  # last line -> VS Code will display this DataFrame under the cell


CWD: c:\Users\nevat\OneDrive\Documents\GitHub\BALL\Machine_Learning
DB exists: True -> ..\BALL.db


Unnamed: 0,name
0,players
1,anthro
2,injury_list
3,player_injury_profile


In [4]:
import sqlite3
import pandas as pd
import os

db_path = os.path.join("..", "BALL.db")

with sqlite3.connect(db_path) as conn:
    df = pd.read_sql_query("SELECT * FROM player_injury_profile;", conn)

df.head()
df.columns


Index(['TEMP_PLAYER_ID', 'PLAYER_ID', 'FIRST_NAME', 'LAST_NAME', 'PLAYER_NAME',
       'POSITION', 'HEIGHT_WO_SHOES', 'HEIGHT_WO_SHOES_FT_IN',
       'HEIGHT_W_SHOES', 'HEIGHT_W_SHOES_FT_IN', 'WEIGHT', 'WINGSPAN',
       'WINGSPAN_FT_IN', 'STANDING_REACH', 'STANDING_REACH_FT_IN',
       'BODY_FAT_PCT', 'HAND_LENGTH', 'HAND_WIDTH', 'SEASON_YEAR', 'player',
       'total_days_out', 'injury_count'],
      dtype='object')

In [5]:
import sqlite3
import pandas as pd
import os

db_path = os.path.join("..", "BALL.db")

with sqlite3.connect(db_path) as conn:
    df = pd.read_sql_query("SELECT * FROM player_injury_profile;", conn)

df.head()
df.shape


(1795, 22)

In [None]:
TARGET_COL = "total_days_out"

df_model = df.dropna(subset=[TARGET_COL]).copy()


y = df_model[TARGET_COL]

# Use all numeric columns except the target as features
numeric_cols = df_model.select_dtypes(include=["int64", "float64"]).columns.tolist()
numeric_cols.remove(TARGET_COL)

X = df_model[numeric_cols]

X.head(), y.head()


(   injury_count
 0          24.0
 1           0.0
 2           0.0
 3           0.0
 4          13.0,
 0    317.0
 1      0.0
 2      0.0
 3      0.0
 4    494.0
 Name: total_days_out, dtype: float64)

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("rf", RandomForestRegressor(
        n_estimators=200,
        random_state=42,
        n_jobs=-1
    ))
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae  = mean_absolute_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"MAE:  {mae:.1f} days")
print(f"RMSE: {rmse:.1f} days")
print(f"R²:   {r2:.3f}")


MAE:  23.1 days
RMSE: 56.6 days
R²:   0.598
