In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
from hmmlearn import hmm

In [2]:
# import numpy as np
# import pandas as pd
# from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
# from hmmlearn import hmm

# def predict_next_game_stats(player_df):
#     df = player_df.copy()
#     stat_columns = ['Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers', 'Points']

#     # Ensure 'Opponent' column exists
#     if 'Opponent' not in df.columns:
#         raise ValueError("DataFrame must include an 'Opponent' column.")

#     # Compute per-minute stats and 5-game rolling averages
#     for stat in stat_columns:
#         df[f'{stat}_per_min'] = df[stat] / df['Minutes'].replace(0, np.nan)
#         df[f'rolling_5_{stat}'] = df[stat].rolling(5, min_periods=1).mean()

#     df.dropna(inplace=True)

#     # Select feature columns (excluding basic columns)
#     feature_columns = [col for col in df.columns if any(key in col for key in ['per_min', 'rolling_5'])]

#     # Standardize
#     scaler = StandardScaler()
#     df[feature_columns] = scaler.fit_transform(df[feature_columns])

#     # Discretize
#     discretizer = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy='quantile')
#     discrete_features = discretizer.fit_transform(df[feature_columns]).astype(int)

#     # Train HMM on all but last row
#     model = hmm.CategoricalHMM(n_components=4, random_state=42, n_iter=200)
#     model.fit(discrete_features[:-1])

#     # Predict hidden states
#     all_states = model.predict(discrete_features)
#     recent_state = all_states[-1]
#     upcoming_opponent = df.iloc[-1]['Opponent']

#     # Match previous games with same hidden state
#     match_indices = np.where(all_states[:-1] == recent_state)[0]
#     next_game_indices = match_indices + 1
#     valid_indices = next_game_indices[next_game_indices < len(df)]

#     # Assign weights based on opponent match
#     weights = []
#     for idx in valid_indices:
#         future_opponent = df.iloc[idx]['Opponent']
#         if future_opponent == upcoming_opponent:
#             weights.append(3.0)  # boost weight if opponent matches
#         else:
#             weights.append(1.0)

#     weights = np.array(weights)
#     state_next_stats = df.iloc[valid_indices][stat_columns].to_numpy()

#     # Compute weighted average (you could also try weighted median with interpolation)
#     weighted_stats = np.average(state_next_stats, axis=0, weights=weights)

#     return dict(zip(stat_columns, weighted_stats))


In [3]:
team_def_ratings = {
        'OKC': 106.6,
        'ORL': 109.1,
        'LAC': 109.4,
        'BOS': 110.1,
        'HOU': 110.3,
        'MIN': 110.8,
        'GSW': 111.0,
        'CLE': 111.8,
        'MIA': 112.0,
        'DET': 112.5,
        'MEM': 112.6,
        'MIL': 112.7,
        'NYK': 113.3,
        'IND': 113.3,
        'TOR': 113.6,
        'POR': 113.7,
        'LAL': 113.8,
        'CHI': 114.8,
        'ATL': 115.7,
        'BKN': 114.8,
        'DAL': 115.0,
        'DEN': 115.1,
        'SAC': 115.3,
        'BKN': 115.4,
        'CHA': 115.7,
        'SAS': 116.3,
        'PHI': 117.3,
        'PHO': 117.7,
        'WAS': 118.0,
        'NOP': 119.1,
        'UTA': 119.4
    }

In [4]:
def predict_stats_vs_opponent_with_def_rtg(player_df, target_opponent, team_def_ratings, def_rtg_tolerance=2.0):
    """
    Predict next game stats for a player against a specific opponent,
    considering all past games against teams with similar defensive ratings.
    
    Args:
        player_df (pd.DataFrame): Player game log, must include 'Opponent' and 'Minutes' columns.
        target_opponent (str): Opponent team name for the next game (used to get their DRTG).
        team_def_ratings (dict): Map from team name to defensive rating.
        def_rtg_tolerance (float): How close the opponent DEF_RTG must be to be considered similar.
    
    Returns:
        dict: Predicted stats (median) for next game against target opponent.
    """
    df = player_df.copy()
    stat_columns = ['Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers', 'Points']

    # Map opponent defensive rating to each game
    df['Opponent_DEF_RTG'] = df['Opponent'].map(team_def_ratings)
    if df['Opponent_DEF_RTG'].isnull().any():
        missing_teams = df[df['Opponent_DEF_RTG'].isnull()]['Opponent'].unique()
        raise ValueError(f"Defensive ratings missing for teams: {missing_teams}")

    # Get target opponent's defensive rating
    target_drtg = team_def_ratings.get(target_opponent)
    if target_drtg is None:
        raise ValueError(f"Defensive rating not found for {target_opponent}")

    # Find all games where opponent's DRTG was similar to target opponent's DRTG
    similar_drtg_games = df[
        (df['Opponent_DEF_RTG'] >= target_drtg - def_rtg_tolerance) &
        (df['Opponent_DEF_RTG'] <= target_drtg + def_rtg_tolerance)
    ]

    if len(similar_drtg_games) == 0:
        print(f"No past games against teams with DRTG within {def_rtg_tolerance} of {target_opponent}'s {target_drtg}")
        return None

    # Calculate rolling averages for recent form (last 5 games)
    for stat in stat_columns:
        df[f'rolling_5_{stat}'] = df[stat].rolling(5, min_periods=1).mean()
    
    # Get player's recent form (last game's rolling averages)
    recent_form = df.iloc[-1][[f'rolling_5_{stat}' for stat in stat_columns]].to_dict()
    
    # Calculate stats against similar DRTG teams
    similar_stats = similar_drtg_games[stat_columns].median().to_dict()
    
    # Combine recent form and similar DRTG performance (weighted average)
    predicted_stats = {}
    for stat in stat_columns:
        # Weight recent form 60% and similar DRTG performance 40%
        predicted_stats[stat] = 0.6 * recent_form[f'rolling_5_{stat}'] + 0.4 * similar_stats[stat]
    
    return predicted_stats

In [5]:
def print_predicted_stats(predicted_stats):
    if predicted_stats is None:
        print("No matching historical data found for prediction.")
        return

    print("📊 Predicted Stats:")
    for stat in ['Points', 'Assists', 'Rebounds', 'Steals', 'Blocks', 'Turnovers']:
        value = predicted_stats.get(stat, None)
        if value is not None:
            print(f"- {stat:<10}: {int(round(value))}")

In [6]:
tyrese_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/TyreseSznStats.csv")  # Must include stat columns
shai_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/ShaiSznStats.csv")  # Must include stat columns
siakam_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/SiakamSznStats.csv")  # Must include stat columns
jdub_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/JalenWilliamsSznStats.csv")  # Must include stat columns
nesmith_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/NesmithSznStats.csv")  # Must include stat columns
chet_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/ChetSznStats.csv")  # Must include stat columns

nemhard_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/NemhardSznStats.csv")  # Must include stat columns
hartenstein_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/iHartSznStats.csv")  # Must include stat columns
turner_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/TurnerSznStats.csv")  # Must include stat columns
tj_df = pd.read_csv("/Users/visve/Downloads/CS275 Project/Datasets/TJSznStats.csv")  # Must include stat columns


In [7]:
prediction = predict_stats_vs_opponent_with_def_rtg(player_df=tyrese_df, 
                                                   target_opponent='OKC', 
                                                   team_def_ratings=team_def_ratings, 
                                                   def_rtg_tolerance=0.5)

print('Tyrese Haliburton Prediction for Game 1 vs OKC:')
print_predicted_stats(prediction)

Tyrese Haliburton Prediction for Game 1 vs OKC:
📊 Predicted Stats:
- Points    : 18
- Assists   : 8
- Rebounds  : 5
- Steals    : 2
- Blocks    : 1
- Turnovers : 3


In [8]:
prediction1 = predict_stats_vs_opponent_with_def_rtg(player_df=shai_df, 
                                                   target_opponent='IND', 
                                                   team_def_ratings=team_def_ratings, 
                                                   def_rtg_tolerance=0.5)

print('Shai-Gilgeous Alexader Prediction for Game 1 vs IND:')
print_predicted_stats(prediction1)

Shai-Gilgeous Alexader Prediction for Game 1 vs IND:
📊 Predicted Stats:
- Points    : 33
- Assists   : 5
- Rebounds  : 5
- Steals    : 2
- Blocks    : 1
- Turnovers : 2


In [9]:
prediction2 = predict_stats_vs_opponent_with_def_rtg(player_df=siakam_df, 
                                                   target_opponent='OKC', 
                                                   team_def_ratings=team_def_ratings, 
                                                   def_rtg_tolerance=0.5)

print('Pascal Siakam Prediction for Game 1 vs OKC:')
print_predicted_stats(prediction2)

Pascal Siakam Prediction for Game 1 vs OKC:
📊 Predicted Stats:
- Points    : 21
- Assists   : 4
- Rebounds  : 8
- Steals    : 1
- Blocks    : 1
- Turnovers : 1


In [10]:
prediction3 = predict_stats_vs_opponent_with_def_rtg(player_df=nesmith_df, 
                                                   target_opponent='OKC', 
                                                   team_def_ratings=team_def_ratings, 
                                                   def_rtg_tolerance=0.5)

print('Aaron Nesmith Prediction for Game 1 vs OKC:')
print_predicted_stats(prediction3)

Aaron Nesmith Prediction for Game 1 vs OKC:
📊 Predicted Stats:
- Points    : 9
- Assists   : 0
- Rebounds  : 5
- Steals    : 1
- Blocks    : 1
- Turnovers : 1


In [11]:
prediction4 = predict_stats_vs_opponent_with_def_rtg(player_df=jdub_df, 
                                                   target_opponent='IND', 
                                                   team_def_ratings=team_def_ratings, 
                                                   def_rtg_tolerance=0.5)

print('Jalen Williams Prediction for Game 1 vs IND:')
print_predicted_stats(prediction4)

Jalen Williams Prediction for Game 1 vs IND:
📊 Predicted Stats:
- Points    : 21
- Assists   : 4
- Rebounds  : 5
- Steals    : 1
- Blocks    : 0
- Turnovers : 2


In [12]:
######################

In [13]:
####SECOND APPROACH####
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings('ignore')

def enhanced_drtg_prediction(player_df, team_def_ratings, target_opponent, player_name, use_recent_form=True):
    """
    Enhanced prediction that properly accounts for defensive matchups.
    Uses multiple approaches and combines them intelligently.
    """
    print(f"\n{'='*50}")
    print(f"ENHANCED PREDICTION: {player_name} vs {target_opponent}")
    print(f"{'='*50}")
    
    # Data preparation
    player_df = player_df.copy()
    player_df['Opponent_DEF_RTG'] = player_df['Opponent'].map(team_def_ratings)
    player_df = player_df.dropna(subset=['Opponent_DEF_RTG'])
    
    stat_cols = ['Points', 'Assists', 'Rebounds', 'Steals', 'Blocks', 'Turnovers']
    player_df = player_df.dropna(subset=stat_cols)
    
    target_drtg = team_def_ratings[target_opponent]
    avg_drtg = player_df['Opponent_DEF_RTG'].mean()
    
    print(f"📊 Matchup Analysis:")
    print(f"  - Target opponent: {target_opponent} (DRTG: {target_drtg:.1f})")
    print(f"  - Season avg DRTG faced: {avg_drtg:.1f}")
    print(f"  - Defensive difficulty: {target_drtg/avg_drtg:.3f} (lower = harder)")
    
    # Approach 1: Recent Form + Defensive Adjustment
    recent_games = 10 if use_recent_form else len(player_df)
    recent_avg = player_df[stat_cols].tail(recent_games).mean()
    
    # Simple defensive factor
    def_factor = avg_drtg / target_drtg
    
    # Apply stronger adjustment to offense-sensitive stats
    prediction_1 = recent_avg.copy()
    offensive_stats = ['Points', 'Assists']
    for stat in offensive_stats:
        prediction_1[stat] *= def_factor
    
    print(f"\n🎯 Approach 1 - Recent Form + Defense:")
    for stat in stat_cols:
        print(f"  {stat}: {prediction_1[stat]:.1f}")
    
    # Approach 2: Historical Matchup Analysis
    def get_similar_matchups(player_df, target_drtg, tolerance=3.0):
        """Find games against teams with similar defensive ratings."""
        similar_mask = abs(player_df['Opponent_DEF_RTG'] - target_drtg) <= tolerance
        return player_df[similar_mask]
    
    similar_games = get_similar_matchups(player_df, target_drtg)
    
    if len(similar_games) >= 3:
        prediction_2 = similar_games[stat_cols].mean()
        print(f"\n🎯 Approach 2 - Similar Matchups ({len(similar_games)} games):")
        for stat in stat_cols:
            print(f"  {stat}: {prediction_2[stat]:.1f}")
    else:
        prediction_2 = prediction_1.copy()
        print(f"\n🎯 Approach 2 - Insufficient similar matchups ({len(similar_games)} games)")
        print("  Using Approach 1 values")
    
    # Approach 3: Regression-Based Prediction
    def regression_prediction(player_df, stat_cols, target_drtg):
        """Use linear regression to predict based on DRTG."""
        predictions = {}
        
        for stat in stat_cols:
            # Only use regression if there's some correlation
            correlation = player_df[stat].corr(player_df['Opponent_DEF_RTG'])
            
            if abs(correlation) > 0.1:  # Only if correlation > 0.1
                X = player_df[['Opponent_DEF_RTG']].values
                y = player_df[stat].values
                
                # Fit regression
                reg = LinearRegression()
                reg.fit(X, y)
                
                # Predict for target DRTG
                pred_value = reg.predict([[target_drtg]])[0]
                predictions[stat] = pred_value
            else:
                # Use recent average if no correlation
                predictions[stat] = player_df[stat].tail(10).mean()
        
        return predictions
    
    prediction_3 = regression_prediction(player_df, stat_cols, target_drtg)
    
    print(f"\n🎯 Approach 3 - Regression-Based:")
    for stat in stat_cols:
        print(f"  {stat}: {prediction_3[stat]:.1f}")
    
    # Combined Prediction (weighted average)
    weights = [0.4, 0.3, 0.3]  # Recent form gets highest weight
    
    final_prediction = {}
    for stat in stat_cols:
        combined_value = (
            weights[0] * prediction_1[stat] +
            weights[1] * prediction_2[stat] +
            weights[2] * prediction_3[stat]
        )
        final_prediction[stat] = combined_value
    
    print(f"\n🏆 FINAL COMBINED PREDICTION:")
    for stat in stat_cols:
        print(f"  {stat}: {int(round(final_prediction[stat]))}")
    
    # Confidence assessment
    print(f"\n📈 Confidence Factors:")
    similar_games_count = len(similar_games)
    recent_consistency = player_df[stat_cols].tail(5).std().mean()
    
    confidence_score = 0
    if similar_games_count >= 5:
        confidence_score += 30
    elif similar_games_count >= 3:
        confidence_score += 20
    else:
        confidence_score += 10
    
    if recent_consistency < 3:
        confidence_score += 25  # Consistent recent performance
    elif recent_consistency < 5:
        confidence_score += 15
    else:
        confidence_score += 5
    
    # Check if target defense is significantly different
    drtg_diff = abs(target_drtg - avg_drtg)
    if drtg_diff < 2:
        confidence_score += 25  # Similar to usual opponents
    elif drtg_diff < 4:
        confidence_score += 15
    else:
        confidence_score += 5
    
    print(f"  - Similar matchups: {similar_games_count} games")
    print(f"  - Recent consistency: {recent_consistency:.1f}")
    print(f"  - Defensive difference: {drtg_diff:.1f}")
    print(f"  - Overall confidence: {confidence_score}/80")
    
    return final_prediction

def team_based_adjustments(player_df, team_def_ratings, target_opponent, player_name):
    """
    Additional team-specific adjustments based on playing style.
    """
    # Define team characteristics (you can expand this)
    team_styles = {
        'OKC': {'pace': 'fast', 'defense': 'elite', 'style': 'athletic'},
        'IND': {'pace': 'medium', 'defense': 'good', 'style': 'balanced'},
        'BOS': {'pace': 'medium', 'defense': 'elite', 'style': 'versatile'},
        'MIA': {'pace': 'slow', 'defense': 'good', 'style': 'physical'},
        # Add more teams as needed
    }
    
    adjustments = {}
    target_style = team_styles.get(target_opponent, {'pace': 'medium', 'defense': 'average', 'style': 'balanced'})
    
    # Pace adjustments
    if target_style['pace'] == 'fast':
        adjustments['pace_factor'] = 1.05  # Slightly more opportunities
    elif target_style['pace'] == 'slow':
        adjustments['pace_factor'] = 0.95  # Fewer opportunities
    else:
        adjustments['pace_factor'] = 1.0
    
    # Defense adjustments
    if target_style['defense'] == 'elite':
        adjustments['def_factor'] = 0.90  # Harder to score
    elif target_style['defense'] == 'good':
        adjustments['def_factor'] = 0.95
    else:
        adjustments['def_factor'] = 1.0
    
    return adjustments

def make_final_prediction(player_df, team_def_ratings, target_opponent, player_name):
    """
    Complete prediction system combining all approaches.
    """
    # Get base prediction
    base_prediction = enhanced_drtg_prediction(player_df, team_def_ratings, target_opponent, player_name)
    
    # Get team-specific adjustments
    adjustments = team_based_adjustments(player_df, team_def_ratings, target_opponent, player_name)
    
    # Apply adjustments
    final_prediction = {}
    stat_cols = ['Points', 'Assists', 'Rebounds', 'Steals', 'Blocks', 'Turnovers']
    
    for stat in stat_cols:
        adjusted_value = base_prediction[stat]
        
        # Apply pace adjustment to counting stats
        if stat in ['Points', 'Assists', 'Rebounds']:
            adjusted_value *= adjustments['pace_factor']
        
        # Apply defensive adjustment to offensive stats
        if stat in ['Points', 'Assists']:
            adjusted_value *= adjustments['def_factor']
        
        final_prediction[stat] = adjusted_value
    
    print(f"\n🎯 FINAL ADJUSTED PREDICTION:")
    for stat in stat_cols:
        print(f"  {stat}: {int(round(final_prediction[stat]))}")
    
    return final_prediction

In [14]:
# Replace your prediction calls with:
make_final_prediction(tyrese_df, team_def_ratings, "OKC", "Tyrese Haliburton")
make_final_prediction(shai_df, team_def_ratings, "IND", "Shai Gilgeous-Alexander")
make_final_prediction(siakam_df, team_def_ratings, "OKC", "Pascal Siakam")
make_final_prediction(nesmith_df, team_def_ratings, "OKC", "Aaron Nesmith")
make_final_prediction(chet_df, team_def_ratings, "IND", "Chet Holmgren")
make_final_prediction(hartenstein_df, team_def_ratings, "IND", "Isaiah Hartenstein")
make_final_prediction(nemhard_df, team_def_ratings, "OKC", "Andrew Nembhard")
make_final_prediction(jdub_df, team_def_ratings, "IND", "Jalen Williams")
make_final_prediction(turner_df, team_def_ratings, "OKC", "Myles Turner")
make_final_prediction(tj_df, team_def_ratings, "OKC", "T.J. McConnell")

# ... etc


ENHANCED PREDICTION: Tyrese Haliburton vs OKC
📊 Matchup Analysis:
  - Target opponent: OKC (DRTG: 106.6)
  - Season avg DRTG faced: 113.3
  - Defensive difficulty: 0.940 (lower = harder)

🎯 Approach 1 - Recent Form + Defense:
  Points: 20.9
  Assists: 9.7
  Rebounds: 6.2
  Steals: 2.1
  Blocks: 0.6
  Turnovers: 2.3

🎯 Approach 2 - Similar Matchups (11 games):
  Points: 16.5
  Assists: 7.6
  Rebounds: 4.5
  Steals: 1.5
  Blocks: 0.5
  Turnovers: 2.0

🎯 Approach 3 - Regression-Based:
  Points: 19.7
  Assists: 9.1
  Rebounds: 4.9
  Steals: 2.1
  Blocks: 0.4
  Turnovers: 2.3

🏆 FINAL COMBINED PREDICTION:
  Points: 19
  Assists: 9
  Rebounds: 5
  Steals: 2
  Blocks: 1
  Turnovers: 2

📈 Confidence Factors:
  - Similar matchups: 11 games
  - Recent consistency: 2.3
  - Defensive difference: 6.7
  - Overall confidence: 60/80

🎯 FINAL ADJUSTED PREDICTION:
  Points: 18
  Assists: 8
  Rebounds: 6
  Steals: 2
  Blocks: 1
  Turnovers: 2

ENHANCED PREDICTION: Shai Gilgeous-Alexander vs IND
📊 Matchu

{'Points': np.float64(8.442926581690877),
 'Assists': np.float64(3.499153245965516),
 'Rebounds': np.float64(2.1445207604895105),
 'Steals': np.float64(0.9550000000000001),
 'Blocks': np.float64(0.1431936367204223),
 'Turnovers': np.float64(1.777290722670187)}