In [158]:
import team_individual_stat
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from nba_api.stats.endpoints import playergamelog

In [159]:
!pip install nba_api
!pip install seaborn

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m
Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m


## RETRIEVE DATA FOR ML PART

In [270]:
player_id = 201939  # Stephen Curry
opponent_abbreviation = 'SUNS'  # Denver Nuggets

# Fetching Curry's performance against Denver Nuggets for multiple seasons
curry_vs_denver_df_2425 = team_individual_stat.fetch_player_vs_team_stats(player_id, '2024-25', opponent_abbreviation)
curry_vs_denver_df_2324 = team_individual_stat.fetch_player_vs_team_stats(player_id, '2023-24', opponent_abbreviation)
curry_vs_denver_df_2223 = team_individual_stat.fetch_player_vs_team_stats(player_id, '2022-23', opponent_abbreviation)


In [271]:
curry_vs_denver_all = pd.concat([curry_vs_denver_df_2425, curry_vs_denver_df_2324, curry_vs_denver_df_2223], ignore_index=True)

In [272]:
curry_vs_denver_all.head()

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,SEASON,SEASON_TYPE


In [273]:
curry_vs_denver_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 29 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   SEASON_ID        0 non-null      object
 1   Player_ID        0 non-null      object
 2   Game_ID          0 non-null      object
 3   GAME_DATE        0 non-null      object
 4   MATCHUP          0 non-null      object
 5   WL               0 non-null      object
 6   MIN              0 non-null      object
 7   FGM              0 non-null      object
 8   FGA              0 non-null      object
 9   FG_PCT           0 non-null      object
 10  FG3M             0 non-null      object
 11  FG3A             0 non-null      object
 12  FG3_PCT          0 non-null      object
 13  FTM              0 non-null      object
 14  FTA              0 non-null      object
 15  FT_PCT           0 non-null      object
 16  OREB             0 non-null      object
 17  DREB             0 non-null      object
 18  RE

In [274]:
def fetch_last_n_games(player_id, season, n, season_type):
    """
    Fetches the last N completed games of a player from NBA API for a given season.
    
    Parameters:
        player_id (int): The unique ID of the player.
        season (str): NBA season in 'YYYY-YY' format (e.g., '2024-25').
        n (int): Number of last games to retrieve.
        season_type (str): Type of season ('Regular Season' or 'Playoffs').
        
    Returns:
        pd.DataFrame: A DataFrame containing the last N completed games.
    """
    try:
        # Fetch the game logs
        time.sleep(1)  # Prevent hitting the rate limit
        logs = playergamelog.PlayerGameLog(player_id=player_id, season=season, season_type_all_star=season_type)
        logs_df = logs.get_data_frames()[0]
        
        # Ensure GAME_DATE is a datetime object and sort by date
        logs_df['GAME_DATE'] = pd.to_datetime(logs_df['GAME_DATE'])
        logs_df = logs_df.sort_values(by='GAME_DATE', ascending=False)
        
        # Filter out games that have not been completed (those missing 'MIN')
        completed_games = logs_df[logs_df['MIN'].notna()]
        
        # Return only the last n completed games
        last_n_games = completed_games.head(n)
        
        return last_n_games
    except Exception as e:
        print(f"Error retrieving last {n} completed games for player {player_id}: {e}")
        return pd.DataFrame()

In [275]:
# Fetching the last 10 games for Stephen Curry in the 2024-25 Regular Season
curry_last_50_games_2425 = fetch_last_n_games(player_id, '2024-25', n=100, season_type='Regular Season')
curry_last_50_games_2425.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66 entries, 0 to 65
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   SEASON_ID        66 non-null     object        
 1   Player_ID        66 non-null     int64         
 2   Game_ID          66 non-null     object        
 3   GAME_DATE        66 non-null     datetime64[ns]
 4   MATCHUP          66 non-null     object        
 5   WL               66 non-null     object        
 6   MIN              66 non-null     int64         
 7   FGM              66 non-null     int64         
 8   FGA              66 non-null     int64         
 9   FG_PCT           66 non-null     float64       
 10  FG3M             66 non-null     int64         
 11  FG3A             66 non-null     int64         
 12  FG3_PCT          66 non-null     float64       
 13  FTM              66 non-null     int64         
 14  FTA              66 non-null     int64      

In [276]:
curry_all_games_2223_playoff = fetch_last_n_games(player_id, '2022-23', n=13, season_type='Playoffs')
curry_all_games_2122_playoff = fetch_last_n_games(player_id, '2021-22', n=13, season_type='Playoffs')
#merge the two dataframes
curry_all_games_playoff = pd.concat([curry_all_games_2223_playoff, curry_all_games_2122_playoff], ignore_index=True)
curry_all_games_playoff.head()


Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,42022,201939,42200236,2023-05-12,GSW @ LAL,L,39,11,28,0.393,...,5,6,5,1,1,4,3,32,-21,1
1,42022,201939,42200235,2023-05-10,GSW vs. LAL,W,39,12,24,0.5,...,3,3,8,0,1,2,0,27,12,1
2,42022,201939,42200234,2023-05-08,GSW @ LAL,L,42,12,30,0.4,...,7,10,14,3,0,2,5,31,1,1
3,42022,201939,42200233,2023-05-06,GSW @ LAL,L,32,9,21,0.429,...,2,4,3,1,1,3,2,23,-26,1
4,42022,201939,42200232,2023-05-04,GSW vs. LAL,W,30,7,12,0.583,...,4,4,12,1,0,3,3,20,24,1


In [277]:
curry_all_games_playoff.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   SEASON_ID        26 non-null     object        
 1   Player_ID        26 non-null     int64         
 2   Game_ID          26 non-null     object        
 3   GAME_DATE        26 non-null     datetime64[ns]
 4   MATCHUP          26 non-null     object        
 5   WL               26 non-null     object        
 6   MIN              26 non-null     int64         
 7   FGM              26 non-null     int64         
 8   FGA              26 non-null     int64         
 9   FG_PCT           26 non-null     float64       
 10  FG3M             26 non-null     int64         
 11  FG3A             26 non-null     int64         
 12  FG3_PCT          26 non-null     float64       
 13  FTM              26 non-null     int64         
 14  FTA              26 non-null     int64      

In [278]:
# Fill missing SEASON_TYPE for curry_last_50_games_2425
curry_last_50_games_2425['SEASON_TYPE'] = 'Regular Season'

# Fill missing SEASON_TYPE for curry_all_games_playoff
curry_all_games_playoff['SEASON_TYPE'] = 'Playoffs'
# Assuming you've already preprocessed your data
combined_df = pd.concat([curry_vs_denver_all, curry_last_50_games_2425, curry_all_games_playoff], ignore_index=True)

# Sorting by GAME_DATE to create rolling statistics
combined_df = combined_df.sort_values(by='GAME_DATE')

In [279]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 92 entries, 91 to 0
Data columns (total 29 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   SEASON_ID        92 non-null     object        
 1   Player_ID        92 non-null     object        
 2   Game_ID          92 non-null     object        
 3   GAME_DATE        92 non-null     datetime64[ns]
 4   MATCHUP          92 non-null     object        
 5   WL               92 non-null     object        
 6   MIN              92 non-null     object        
 7   FGM              92 non-null     object        
 8   FGA              92 non-null     object        
 9   FG_PCT           92 non-null     float64       
 10  FG3M             92 non-null     object        
 11  FG3A             92 non-null     object        
 12  FG3_PCT          92 non-null     float64       
 13  FTM              92 non-null     object        
 14  FTA              92 non-null     object        
 

# TIME SERIES

In [280]:
!pip install tensorflow


Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m


In [281]:
combined_df.tail()

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,SEASON,SEASON_TYPE
4,22024,201939,22401086,2025-03-30,GSW @ SAS,W,26,4,10,0.4,...,6,1,1,0,2,13,24,1,,Regular Season
3,22024,201939,22401100,2025-04-01,GSW @ MEM,W,37,16,31,0.516,...,8,5,1,2,2,52,17,1,,Regular Season
2,22024,201939,22401117,2025-04-03,GSW @ LAL,W,34,10,21,0.476,...,6,0,0,2,1,37,-1,1,,Regular Season
1,22024,201939,22401125,2025-04-04,GSW vs. DEN,W,32,13,24,0.542,...,5,2,0,2,2,36,6,1,,Regular Season
0,22024,201939,22401143,2025-04-06,GSW vs. HOU,L,33,1,10,0.1,...,8,0,0,4,0,3,-4,1,,Regular Season


In [282]:
combined_df.tail()

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,SEASON,SEASON_TYPE
4,22024,201939,22401086,2025-03-30,GSW @ SAS,W,26,4,10,0.4,...,6,1,1,0,2,13,24,1,,Regular Season
3,22024,201939,22401100,2025-04-01,GSW @ MEM,W,37,16,31,0.516,...,8,5,1,2,2,52,17,1,,Regular Season
2,22024,201939,22401117,2025-04-03,GSW @ LAL,W,34,10,21,0.476,...,6,0,0,2,1,37,-1,1,,Regular Season
1,22024,201939,22401125,2025-04-04,GSW vs. DEN,W,32,13,24,0.542,...,5,2,0,2,2,36,6,1,,Regular Season
0,22024,201939,22401143,2025-04-06,GSW vs. HOU,L,33,1,10,0.1,...,8,0,0,4,0,3,-4,1,,Regular Season


In [283]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# --- Step 1: Select Only 'PTS' and 'AST' from the DataFrame ---
features = ['PTS', 'AST']

# Assume your DataFrame `df` is sorted chronologically by GAME_DATE.
# Extract the values for PTS and AST.
data = combined_df[features].values  # Shape: (num_games, 2)

# --- Step 2: Normalize the Data ---
# (Scaling is optional but can help training; here we use StandardScaler)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# --- Step 3: Create Sequences for RNN Training ---
sequence_length = 5  # Number of previous games to consider
X_rnn = []
y_rnn = []

# For each index starting from sequence_length, use the previous sequence_length games as input,
# and the current game (next game) as the target.
for i in range(sequence_length, len(data_scaled)):
    X_rnn.append(data_scaled[i-sequence_length:i])
    y_rnn.append(data_scaled[i])  # Target: the PTS and AST of the next game

X_rnn = np.array(X_rnn)  # Shape: (num_samples, sequence_length, 2)
y_rnn = np.array(y_rnn)  # Shape: (num_samples, 2)

# --- Step 4: Build the RNN Model (LSTM) ---
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(sequence_length, 2)))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=2))  # Output layer predicting 2 values: PTS and AST

model.compile(optimizer='adam', loss='mean_squared_error')

# --- Step 5: Train the Model ---
model.fit(X_rnn, y_rnn, epochs=200, batch_size=32)

# --- Step 6: Predict Next Game's PTS and AST ---
# Prepare the latest sequence of the last 'sequence_length' games
latest_sequence = data_scaled[-sequence_length:]
latest_sequence = np.expand_dims(latest_sequence, axis=0)  # Shape: (1, sequence_length, 2)

# Predict the next game (in scaled space)
predicted_scaled = model.predict(latest_sequence)[0]

# Inverse-transform the prediction to get back to original scale
predicted = scaler.inverse_transform(predicted_scaled.reshape(1, -1))[0]

print(f"Predicted Points: {predicted[0]:.2f}")
print(f"Predicted Assists: {predicted[1]:.2f}")


Epoch 1/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.0455  
Epoch 2/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.9175
Epoch 3/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0281
Epoch 4/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0609
Epoch 5/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0087
Epoch 6/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.9849 
Epoch 7/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0083 
Epoch 8/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0037
Epoch 9/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.9131
Epoch 10/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.9397
Epoch 11/200
[