In [1]:
USER_AGENT = {'Host':'stats.nba.com',
              'Connection': 'keep-alive',
              'Cache-Control': 'max-age=0',
              'Upgrade-Insecure-Requests': '1',
              'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
              'Referer': 'stats.nba.com',
              'Accept-Encoding': 'gzip, deflate, br',
              'Accept-Language': 'en-US,en;q=0.9',
              'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'}

In [2]:
import pandas as pd
from nba_api.stats.static import players, teams
from nba_api.stats.library import data
from nba_api.stats.endpoints import boxscoreadvancedv3, leaguegamefinder, boxscoretraditionalv3


TEAM = "NYK"
FROM_DATE = "10/05/2025"
PLAYER_NAME = "Karl-Anthony Towns"
FIRST_NAME = PLAYER_NAME.split()[0]
LAST_NAME = PLAYER_NAME.split()[1]

# data.players
data.teams
ids = {}

for team in data.teams:
     ids[team[1]] = team[0]
games = leaguegamefinder.LeagueGameFinder(headers=USER_AGENT,team_id_nullable=ids[TEAM], date_from_nullable=FROM_DATE).get_data_frames()

In [None]:
from time import sleep

GAMES_RECORDED = 15
box_scores = list(map(lambda x: boxscoretraditionalv3.BoxScoreTraditionalV3(timeout=60, headers=USER_AGENT,game_id=x).get_data_frames()[0], games[0]["GAME_ID"].tolist()[:GAMES_RECORDED]))
sleep(12)
advanced_box_scores = list(map(lambda x: boxscoreadvancedv3.BoxScoreAdvancedV3(timeout=10, headers=USER_AGENT,game_id=x).get_data_frames()[0], games[0]["GAME_ID"].tolist()[:GAMES_RECORDED]))


In [4]:
player_games = list(map(lambda x: x[x["familyName"] == LAST_NAME], advanced_box_scores))

In [5]:
import pandas as pd

combined_games = []

for trad_df, adv_df in zip(box_scores, advanced_box_scores):
    # Filter for player
    player_trad = trad_df[(trad_df["firstName"] == FIRST_NAME) & (trad_df["familyName"] == LAST_NAME)]
    player_adv = adv_df[(adv_df["firstName"] == FIRST_NAME) & (adv_df["familyName"] == LAST_NAME)]
    
    if player_trad.empty or player_adv.empty:
        continue
    
    # Drop columns from advanced that already exist in traditional
    adv_unique = player_adv.drop(columns=[c for c in player_adv.columns if c in player_trad.columns and c not in ["gameId", "personId"]])
    
    # Merge without suffixes
    merged = pd.merge(player_trad, adv_unique, on=["gameId", "personId"], how="inner")
    
    combined_games.append(merged)

# Concatenate all games
player_df = pd.concat(combined_games, ignore_index=True)

# Optional: reset index
player_df.reset_index(drop=True, inplace=True)

# Preview
print(player_df.columns)
print(player_df.head())

Index(['gameId', 'teamId', 'teamCity', 'teamName', 'teamTricode', 'teamSlug',
       'personId', 'firstName', 'familyName', 'nameI', 'playerSlug',
       'position', 'comment', 'jerseyNum', 'minutes', 'fieldGoalsMade',
       'fieldGoalsAttempted', 'fieldGoalsPercentage', 'threePointersMade',
       'threePointersAttempted', 'threePointersPercentage', 'freeThrowsMade',
       'freeThrowsAttempted', 'freeThrowsPercentage', 'reboundsOffensive',
       'reboundsDefensive', 'reboundsTotal', 'assists', 'steals', 'blocks',
       'turnovers', 'foulsPersonal', 'points', 'plusMinusPoints',
       'estimatedOffensiveRating', 'offensiveRating',
       'estimatedDefensiveRating', 'defensiveRating', 'estimatedNetRating',
       'netRating', 'assistPercentage', 'assistToTurnover', 'assistRatio',
       'offensiveReboundPercentage', 'defensiveReboundPercentage',
       'reboundPercentage', 'turnoverRatio', 'effectiveFieldGoalPercentage',
       'trueShootingPercentage', 'usagePercentage', 'estimated

In [6]:
def convert_minutes(min_str):
    """Convert 'MM:SS' string to float minutes."""
    if isinstance(min_str, str):
        mins, secs = map(int, min_str.split(":"))
        return mins + secs / 60
    return float(min_str)  # if already numeric
player_df['minutes'] = player_df['minutes'].apply(convert_minutes)

In [7]:
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np

# Features and targets
feature_cols = ['minutes', 'usagePercentage', 'effectiveFieldGoalPercentage']
target_stats = ['points', 'reboundsTotal', 'assists', 'PIE', 'trueShootingPercentage']

# Number of past games to use
N = 5
num_games = len(player_df)
if num_games == 0:
    raise ValueError("No games available to train the model!")

# Use NumPy slicing instead of iloc
start_idx = max(0, num_games - N)
X_train = pd.DataFrame(player_df[feature_cols].values[start_idx:], columns=feature_cols)
predicted_next_game = {}

for stat in target_stats:
    y_train = pd.Series(player_df[stat].values[start_idx:])
    
    # Train model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Predict next game using last available row
    next_game_features = X_train.values[-1].reshape(1, -1)
    predicted_value = model.predict(next_game_features)[0]
    
    predicted_next_game[stat] = predicted_value

# Print predicted stats
print(f"Predicted stats for {PLAYER_NAME}'s next game:")
for stat, value in predicted_next_game.items():
    if 'Percentage' in stat:
        print(f"{stat}: {value:.3f}")
    else:
        print(f"{stat}: {value:.1f}")

Predicted stats for Karl-Anthony Towns's next game:
points: 17.6
reboundsTotal: 12.9
assists: 3.4
PIE: 0.2
trueShootingPercentage: 0.495


In [16]:
games[0]["GAME_ID"].tolist()[1:GAMES_RECORDED]


['0022500835',
 '0022500825',
 '0022500816',
 '0022500796',
 '0022500780',
 '0022500771',
 '0022500757',
 '0022500742',
 '0022500726',
 '0022500718',
 '0022500708',
 '0022500690',
 '0022500675',
 '0022500665']