In [2]:
"""
05_advanced_models.ipynb
Train advanced ML models (XGBoost, LightGBM) with 
hyperparameter tuning.

Goals:
- PTS: ‚â§5.09 MAE (Tier 1), ‚â§4.50 (Tier 2)
- REB: ‚â§1.97 MAE (Tier 1), ‚â§2.00 (Tier 2)
- AST: ‚â§1.51 MAE (Tier 1), ‚â§1.50 (Tier 2)

Current baseline results (from notebook 04):
- PTS: 5.293 MAE (need 0.203 improvement)
- REB: 2.080 MAE (need 0.110 improvement)
- AST: 1.539 MAE (basically there!)
"""

import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import xgboost as xgb
import lightgbm as lgb
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("‚úì Imports complete")
print(f"Package versions:")
print(f"  - XGBoost: {xgb.__version__}")
print(f"  - LightGBM: {lgb.__version__}")

‚úì Imports complete
Package versions:
  - XGBoost: 2.1.2
  - LightGBM: 4.5.0


In [3]:
# Load training data
X_train = pd.read_csv('../data/processed/X_train.csv')
y_train_pts = pd.read_csv('../data/processed/y_train_pts.csv')['PTS']
y_train_reb = pd.read_csv('../data/processed/y_train_reb.csv')['REB']
y_train_ast = pd.read_csv('../data/processed/y_train_ast.csv')['AST']

# Load validation data
X_val = pd.read_csv('../data/processed/X_val.csv')
y_val_pts = pd.read_csv('../data/processed/y_val_pts.csv')['PTS']
y_val_reb = pd.read_csv('../data/processed/y_val_reb.csv')['REB']
y_val_ast = pd.read_csv('../data/processed/y_val_ast.csv')['AST']

# Load test data (we'll save this for final evaluation)
X_test = pd.read_csv('../data/processed/X_test.csv')
y_test_pts = pd.read_csv('../data/processed/y_test_pts.csv')['PTS']
y_test_reb = pd.read_csv('../data/processed/y_test_reb.csv')['REB']
y_test_ast = pd.read_csv('../data/processed/y_test_ast.csv')['AST']

# Load feature names
with open('../data/processed/feature_names.json', 'r') as f:
  feature_names = json.load(f)

print("‚úì Data loaded successfully")
print(f"\nüìä Dataset shapes:")
print(f"  Train: {X_train.shape[0]:,} games √ó {X_train.shape[1]} features")
print(f"  Val:   {X_val.shape[0]:,} games √ó {X_val.shape[1]} features")
print(f"  Test:  {X_test.shape[0]:,} games √ó {X_test.shape[1]} features")

print(f"\nüéØ Current best results (from notebook 04):")
print(f"  PTS: 5.293 MAE ‚Üí Goal: ‚â§5.09 (Tier 1), ‚â§4.50 (Tier 2)")
print(f"  REB: 2.080 MAE ‚Üí Goal: ‚â§1.97 (Tier 1), ‚â§2.00 (Tier 2)")
print(f"  AST: 1.539 MAE ‚Üí Goal: ‚â§1.51 (Tier 1), ‚â§1.50 (Tier 2)")

‚úì Data loaded successfully

üìä Dataset shapes:
  Train: 38,315 games √ó 45 features
  Val:   14,020 games √ó 45 features
  Test:  14,074 games √ó 45 features

üéØ Current best results (from notebook 04):
  PTS: 5.293 MAE ‚Üí Goal: ‚â§5.09 (Tier 1), ‚â§4.50 (Tier 2)
  REB: 2.080 MAE ‚Üí Goal: ‚â§1.97 (Tier 1), ‚â§2.00 (Tier 2)
  AST: 1.539 MAE ‚Üí Goal: ‚â§1.51 (Tier 1), ‚â§1.50 (Tier 2)


In [4]:
print("="*70)
print("TRAINING DEFAULT XGBOOST MODELS")
print("="*70)

# Function to train and evaluate
def train_and_evaluate_tree(model, model_name, X_train, y_train, X_val, y_val):
  """Train tree model and return metrics + predictions"""
  model.fit(X_train, y_train)

  y_pred_train = model.predict(X_train)
  y_pred_val = model.predict(X_val)

  metrics = {
      'model': model_name,
      'train_mae': mean_absolute_error(y_train, y_pred_train),
      'val_mae': mean_absolute_error(y_val, y_pred_val),
      'train_r2': r2_score(y_train, y_pred_train),
      'val_r2': r2_score(y_val, y_pred_val)
  }

  return metrics, model

# Train XGBoost for each target with default params
results_xgb_default = []

print("\n1. Training XGBoost for PTS...")
xgb_pts = xgb.XGBRegressor(
  random_state=42,
  n_jobs=-1,
  tree_method='hist'  # Fast histogram-based algorithm
)
metrics_pts, model_pts = train_and_evaluate_tree(xgb_pts, 'XGBoost_default', X_train, y_train_pts, X_val, y_val_pts)
results_xgb_default.append({**metrics_pts, 'target': 'PTS'})
print(f"   Val MAE: {metrics_pts['val_mae']:.3f}")

print("\n2. Training XGBoost for REB...")
xgb_reb = xgb.XGBRegressor(
  random_state=42,
  n_jobs=-1,
  tree_method='hist'
)
metrics_reb, model_reb = train_and_evaluate_tree(xgb_reb, 'XGBoost_default', X_train, y_train_reb, X_val, y_val_reb)
results_xgb_default.append({**metrics_reb, 'target': 'REB'})
print(f"   Val MAE: {metrics_reb['val_mae']:.3f}")

print("\n3. Training XGBoost for AST...")
xgb_ast = xgb.XGBRegressor(
  random_state=42,
  n_jobs=-1,
  tree_method='hist'
)
metrics_ast, model_ast = train_and_evaluate_tree(xgb_ast, 'XGBoost_default', X_train, y_train_ast, X_val, y_val_ast)
results_xgb_default.append({**metrics_ast, 'target': 'AST'})
print(f"   Val MAE: {metrics_ast['val_mae']:.3f}")

# Summary
print("\n" + "="*70)
print("DEFAULT XGBOOST RESULTS")
print("="*70)
results_df = pd.DataFrame(results_xgb_default)
print(results_df[['target', 'val_mae', 'val_r2']].to_string(index=False))

# Compare to baseline
baseline_comparison = pd.DataFrame({
  'Target': ['PTS', 'REB', 'AST'],
  'Linear Model': [5.293, 2.080, 1.539],
  'XGBoost Default': [metrics_pts['val_mae'], metrics_reb['val_mae'], metrics_ast['val_mae']],
  'Tier 1 Goal': [5.09, 1.97, 1.51]
})
baseline_comparison['Beat Linear?'] = baseline_comparison['XGBoost Default'] < baseline_comparison['Linear Model']
baseline_comparison['Hit Tier 1?'] = baseline_comparison['XGBoost Default'] <= baseline_comparison['Tier 1 Goal']

print("\n" + "="*70)
print("COMPARISON TO LINEAR MODELS")
print("="*70)
print(baseline_comparison.to_string(index=False))

TRAINING DEFAULT XGBOOST MODELS

1. Training XGBoost for PTS...
   Val MAE: 5.478

2. Training XGBoost for REB...
   Val MAE: 2.172

3. Training XGBoost for AST...
   Val MAE: 1.612

DEFAULT XGBOOST RESULTS
target  val_mae   val_r2
   PTS 5.477806 0.418834
   REB 2.172228 0.362597
   AST 1.612178 0.450105

COMPARISON TO LINEAR MODELS
Target  Linear Model  XGBoost Default  Tier 1 Goal  Beat Linear?  Hit Tier 1?
   PTS         5.293         5.477806         5.09         False        False
   REB         2.080         2.172228         1.97         False        False
   AST         1.539         1.612178         1.51         False        False


In [5]:
print("="*70)
print("DIAGNOSING TRAIN VS VAL PERFORMANCE")
print("="*70)

# Check train MAE for each model
diagnosis = pd.DataFrame({
  'Target': ['PTS', 'REB', 'AST'],
  'Train MAE': [
      results_xgb_default[0]['train_mae'],
      results_xgb_default[1]['train_mae'],
      results_xgb_default[2]['train_mae']
  ],
  'Val MAE': [
      results_xgb_default[0]['val_mae'],
      results_xgb_default[1]['val_mae'],
      results_xgb_default[2]['val_mae']
  ]
})
diagnosis['Gap (Val - Train)'] = diagnosis['Val MAE'] - diagnosis['Train MAE']
diagnosis['Overfitting?'] = diagnosis['Gap (Val - Train)'] > 0.5

print(diagnosis.to_string(index=False))

print("\n" + "="*70)
print("DIAGNOSIS")
print("="*70)

for idx, row in diagnosis.iterrows():
  gap = row['Gap (Val - Train)']
  target = row['Target']

  if gap > 1.0:
      print(f"  {target}: SEVERE overfitting (gap={gap:.3f}) ‚Üí Need regularization")
  elif gap > 0.5:
      print(f"  {target}: Moderate overfitting (gap={gap:.3f}) ‚Üí Tune max_depth, min_child_weight")
  elif gap < 0.1:
      print(f"  {target}: Underfitting (gap={gap:.3f}) ‚Üí Need more complexity")
  else:
      print(f"  {target}: Good fit (gap={gap:.3f})")

print("\nüí° NEXT STEP: Hyperparameter tuning with GridSearchCV")
print("   Focus on: learning_rate, max_depth, n_estimators, subsample")

DIAGNOSING TRAIN VS VAL PERFORMANCE
Target  Train MAE  Val MAE  Gap (Val - Train)  Overfitting?
   PTS   4.131616 5.477806           1.346191          True
   REB   1.664972 2.172228           0.507256          True
   AST   1.206033 1.612178           0.406146         False

DIAGNOSIS
  PTS: SEVERE overfitting (gap=1.346) ‚Üí Need regularization
  REB: Moderate overfitting (gap=0.507) ‚Üí Tune max_depth, min_child_weight
  AST: Good fit (gap=0.406)

üí° NEXT STEP: Hyperparameter tuning with GridSearchCV
   Focus on: learning_rate, max_depth, n_estimators, subsample


In [6]:
print("="*70)
print("HYPERPARAMETER TUNING FOR PTS (RandomizedSearchCV)")
print("="*70)
print("This will take ~5-10 minutes...")

# Define parameter grid focused on reducing overfitting
param_distributions = {
  'max_depth': [3, 4, 5, 6],
  'learning_rate': [0.01, 0.05, 0.1],
  'n_estimators': [100, 200, 300],
  'min_child_weight': [1, 3, 5],
  'subsample': [0.7, 0.8, 0.9, 1.0],
  'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
  'reg_alpha': [0, 0.1, 1.0],  # L1 regularization
  'reg_lambda': [1, 2, 5]      # L2 regularization
}

# Use RandomizedSearchCV (faster than GridSearchCV)
xgb_model = xgb.XGBRegressor(
  random_state=42,
  n_jobs=-1,
  tree_method='hist'
)

random_search = RandomizedSearchCV(
  xgb_model,
  param_distributions=param_distributions,
  n_iter=50,  # Try 50 random combinations
  scoring='neg_mean_absolute_error',
  cv=3,  # 3-fold CV
  random_state=42,
  n_jobs=-1,
  verbose=1
)

print("\nStarting hyperparameter search...")
print(f"  Total combinations to try: 50")
print(f"  Cross-validation folds: 3")
print(f"  Scoring metric: MAE")

random_search.fit(X_train, y_train_pts)

print("\n‚úì Search complete!")
print(f"\nBest parameters:")
for param, value in random_search.best_params_.items():
  print(f"  {param}: {value}")

# Evaluate best model
best_xgb_pts = random_search.best_estimator_
y_pred_train = best_xgb_pts.predict(X_train)
y_pred_val = best_xgb_pts.predict(X_val)

train_mae = mean_absolute_error(y_train_pts, y_pred_train)
val_mae = mean_absolute_error(y_val_pts, y_pred_val)

print(f"\nüìä Best XGBoost for PTS:")
print(f"  Train MAE: {train_mae:.3f}")
print(f"  Val MAE:   {val_mae:.3f}")
print(f"  Gap:       {val_mae - train_mae:.3f}")

print(f"\nüéØ Comparison:")
print(f"  Linear Model:     5.293 MAE")
print(f"  XGBoost Default:  5.478 MAE")
print(f"  XGBoost Tuned:    {val_mae:.3f} MAE")
print(f"  Tier 1 Goal:      5.09 MAE")

if val_mae <= 5.09:
  print(f"  ‚úÖ HIT TIER 1 GOAL!")
elif val_mae < 5.293:
  print(f"  ‚úÖ Beat linear model!")
else:
  print(f"  ‚ùå Still need improvement")

HYPERPARAMETER TUNING FOR PTS (RandomizedSearchCV)
This will take ~5-10 minutes...

Starting hyperparameter search...
  Total combinations to try: 50
  Cross-validation folds: 3
  Scoring metric: MAE
Fitting 3 folds for each of 50 candidates, totalling 150 fits

‚úì Search complete!

Best parameters:
  subsample: 1.0
  reg_lambda: 2
  reg_alpha: 0
  n_estimators: 100
  min_child_weight: 1
  max_depth: 3
  learning_rate: 0.05
  colsample_bytree: 0.8

üìä Best XGBoost for PTS:
  Train MAE: 5.202
  Val MAE:   5.305
  Gap:       0.103

üéØ Comparison:
  Linear Model:     5.293 MAE
  XGBoost Default:  5.478 MAE
  XGBoost Tuned:    5.305 MAE
  Tier 1 Goal:      5.09 MAE
  ‚ùå Still need improvement


In [7]:
print("="*70)
print("HYPERPARAMETER TUNING FOR REB")
print("="*70)

# Same parameter grid
param_distributions = {
  'max_depth': [3, 4, 5, 6],
  'learning_rate': [0.01, 0.05, 0.1],
  'n_estimators': [100, 200, 300],
  'min_child_weight': [1, 3, 5],
  'subsample': [0.7, 0.8, 0.9, 1.0],
  'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
  'reg_alpha': [0, 0.1, 1.0],
  'reg_lambda': [1, 2, 5]
}

xgb_model = xgb.XGBRegressor(random_state=42, n_jobs=-1, tree_method='hist')

random_search_reb = RandomizedSearchCV(
  xgb_model,
  param_distributions=param_distributions,
  n_iter=50,
  scoring='neg_mean_absolute_error',
  cv=3,
  random_state=42,
  n_jobs=-1,
  verbose=1
)

print("\nStarting search for REB...")
random_search_reb.fit(X_train, y_train_reb)

print("\n‚úì Search complete!")
print(f"\nBest parameters:")
for param, value in random_search_reb.best_params_.items():
  print(f"  {param}: {value}")

# Evaluate
best_xgb_reb = random_search_reb.best_estimator_
y_pred_train = best_xgb_reb.predict(X_train)
y_pred_val = best_xgb_reb.predict(X_val)

train_mae = mean_absolute_error(y_train_reb, y_pred_train)
val_mae = mean_absolute_error(y_val_reb, y_pred_val)

print(f"\nüìä Best XGBoost for REB:")
print(f"  Train MAE: {train_mae:.3f}")
print(f"  Val MAE:   {val_mae:.3f}")
print(f"  Gap:       {val_mae - train_mae:.3f}")

print(f"\nüéØ Comparison:")
print(f"  Linear Model:     2.080 MAE")
print(f"  XGBoost Default:  2.172 MAE")
print(f"  XGBoost Tuned:    {val_mae:.3f} MAE")
print(f"  Tier 1 Goal:      1.97 MAE")

if val_mae <= 1.97:
  print(f"  ‚úÖ HIT TIER 1 GOAL!")
elif val_mae < 2.080:
  print(f"  ‚úÖ Beat linear model!")
else:
  print(f"  ‚ùå Still need improvement")

HYPERPARAMETER TUNING FOR REB

Starting search for REB...
Fitting 3 folds for each of 50 candidates, totalling 150 fits

‚úì Search complete!

Best parameters:
  subsample: 1.0
  reg_lambda: 2
  reg_alpha: 0.1
  n_estimators: 100
  min_child_weight: 3
  max_depth: 3
  learning_rate: 0.05
  colsample_bytree: 0.8

üìä Best XGBoost for REB:
  Train MAE: 2.083
  Val MAE:   2.085
  Gap:       0.001

üéØ Comparison:
  Linear Model:     2.080 MAE
  XGBoost Default:  2.172 MAE
  XGBoost Tuned:    2.085 MAE
  Tier 1 Goal:      1.97 MAE
  ‚ùå Still need improvement


In [8]:
print("="*70)
print("HYPERPARAMETER TUNING FOR AST")
print("="*70)

param_distributions = {
  'max_depth': [3, 4, 5, 6],
  'learning_rate': [0.01, 0.05, 0.1],
  'n_estimators': [100, 200, 300],
  'min_child_weight': [1, 3, 5],
  'subsample': [0.7, 0.8, 0.9, 1.0],
  'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
  'reg_alpha': [0, 0.1, 1.0],
  'reg_lambda': [1, 2, 5]
}

xgb_model = xgb.XGBRegressor(random_state=42, n_jobs=-1, tree_method='hist')

random_search_ast = RandomizedSearchCV(
  xgb_model,
  param_distributions=param_distributions,
  n_iter=50,
  scoring='neg_mean_absolute_error',
  cv=3,
  random_state=42,
  n_jobs=-1,
  verbose=1
)

print("\nStarting search for AST...")
random_search_ast.fit(X_train, y_train_ast)

print("\n‚úì Search complete!")
print(f"\nBest parameters:")
for param, value in random_search_ast.best_params_.items():
  print(f"  {param}: {value}")

# Evaluate
best_xgb_ast = random_search_ast.best_estimator_
y_pred_train = best_xgb_ast.predict(X_train)
y_pred_val = best_xgb_ast.predict(X_val)

train_mae = mean_absolute_error(y_train_ast, y_pred_train)
val_mae = mean_absolute_error(y_val_ast, y_pred_val)

print(f"\nüìä Best XGBoost for AST:")
print(f"  Train MAE: {train_mae:.3f}")
print(f"  Val MAE:   {val_mae:.3f}")
print(f"  Gap:       {val_mae - train_mae:.3f}")

print(f"\nüéØ Comparison:")
print(f"  Linear Model:     1.539 MAE")
print(f"  XGBoost Default:  1.612 MAE")
print(f"  XGBoost Tuned:    {val_mae:.3f} MAE")
print(f"  Tier 1 Goal:      1.51 MAE")

if val_mae <= 1.51:
  print(f"  ‚úÖ HIT TIER 1 GOAL!")
elif val_mae < 1.539:
  print(f"  ‚úÖ Beat linear model!")
else:
  print(f"  ‚ùå Still need improvement")

HYPERPARAMETER TUNING FOR AST

Starting search for AST...
Fitting 3 folds for each of 50 candidates, totalling 150 fits

‚úì Search complete!

Best parameters:
  subsample: 1.0
  reg_lambda: 2
  reg_alpha: 0
  n_estimators: 100
  min_child_weight: 1
  max_depth: 3
  learning_rate: 0.05
  colsample_bytree: 0.8

üìä Best XGBoost for AST:
  Train MAE: 1.520
  Val MAE:   1.543
  Gap:       0.023

üéØ Comparison:
  Linear Model:     1.539 MAE
  XGBoost Default:  1.612 MAE
  XGBoost Tuned:    1.543 MAE
  Tier 1 Goal:      1.51 MAE
  ‚ùå Still need improvement


In [9]:
print("="*70)
print("TRYING LIGHTGBM (ALTERNATIVE TREE ALGORITHM)")
print("="*70)
print("LightGBM uses leaf-wise growth vs XGBoost's level-wise")

# LightGBM parameter grid
param_distributions_lgb = {
  'num_leaves': [15, 31, 63],  # LightGBM uses leaves instead of depth
  'learning_rate': [0.01, 0.05, 0.1],
  'n_estimators': [100, 200, 300],
  'min_child_samples': [20, 30, 50],
  'subsample': [0.7, 0.8, 0.9, 1.0],
  'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
  'reg_alpha': [0, 0.1, 1.0],
  'reg_lambda': [1, 2, 5]
}

print("\n1. Tuning LightGBM for PTS...")
lgb_model = lgb.LGBMRegressor(random_state=42, n_jobs=-1, verbose=-1)

random_search_lgb_pts = RandomizedSearchCV(
  lgb_model,
  param_distributions=param_distributions_lgb,
  n_iter=50,
  scoring='neg_mean_absolute_error',
  cv=3,
  random_state=42,
  n_jobs=-1,
  verbose=1
)

random_search_lgb_pts.fit(X_train, y_train_pts)

best_lgb_pts = random_search_lgb_pts.best_estimator_
y_pred_val_pts = best_lgb_pts.predict(X_val)
val_mae_pts = mean_absolute_error(y_val_pts, y_pred_val_pts)

print(f"\n   LightGBM PTS Val MAE: {val_mae_pts:.3f}")
print(f"   Best params: {random_search_lgb_pts.best_params_}")

print("\n2. Tuning LightGBM for REB...")
random_search_lgb_reb = RandomizedSearchCV(
  lgb.LGBMRegressor(random_state=42, n_jobs=-1, verbose=-1),
  param_distributions=param_distributions_lgb,
  n_iter=50,
  scoring='neg_mean_absolute_error',
  cv=3,
  random_state=42,
  n_jobs=-1,
  verbose=1
)

random_search_lgb_reb.fit(X_train, y_train_reb)

best_lgb_reb = random_search_lgb_reb.best_estimator_
y_pred_val_reb = best_lgb_reb.predict(X_val)
val_mae_reb = mean_absolute_error(y_val_reb, y_pred_val_reb)

print(f"\n   LightGBM REB Val MAE: {val_mae_reb:.3f}")

print("\n3. Tuning LightGBM for AST...")
random_search_lgb_ast = RandomizedSearchCV(
  lgb.LGBMRegressor(random_state=42, n_jobs=-1, verbose=-1),
  param_distributions=param_distributions_lgb,
  n_iter=50,
  scoring='neg_mean_absolute_error',
  cv=3,
  random_state=42,
  n_jobs=-1,
  verbose=1
)

random_search_lgb_ast.fit(X_train, y_train_ast)

best_lgb_ast = random_search_lgb_ast.best_estimator_
y_pred_val_ast = best_lgb_ast.predict(X_val)
val_mae_ast = mean_absolute_error(y_val_ast, y_pred_val_ast)

print(f"\n   LightGBM AST Val MAE: {val_mae_ast:.3f}")

print("\n" + "="*70)
print("LIGHTGBM SUMMARY")
print("="*70)
print(f"  PTS: {val_mae_pts:.3f} (Goal: ‚â§5.09)")
print(f"  REB: {val_mae_reb:.3f} (Goal: ‚â§1.97)")
print(f"  AST: {val_mae_ast:.3f} (Goal: ‚â§1.51)")

TRYING LIGHTGBM (ALTERNATIVE TREE ALGORITHM)
LightGBM uses leaf-wise growth vs XGBoost's level-wise

1. Tuning LightGBM for PTS...
Fitting 3 folds for each of 50 candidates, totalling 150 fits

   LightGBM PTS Val MAE: 5.299
   Best params: {'subsample': 1.0, 'reg_lambda': 2, 'reg_alpha': 0.1, 'num_leaves': 15, 'n_estimators': 100, 'min_child_samples': 50, 'learning_rate': 0.05, 'colsample_bytree': 0.8}

2. Tuning LightGBM for REB...
Fitting 3 folds for each of 50 candidates, totalling 150 fits

   LightGBM REB Val MAE: 2.086

3. Tuning LightGBM for AST...
Fitting 3 folds for each of 50 candidates, totalling 150 fits

   LightGBM AST Val MAE: 1.542

LIGHTGBM SUMMARY
  PTS: 5.299 (Goal: ‚â§5.09)
  REB: 2.086 (Goal: ‚â§1.97)
  AST: 1.542 (Goal: ‚â§1.51)


In [11]:
print("="*70)
print("COMPREHENSIVE MODEL COMPARISON")
print("="*70)

# Create comparison table
comparison = pd.DataFrame({
  'Model': ['Linear Regression', 'Ridge (best)', 'XGBoost (tuned)', 'LightGBM (tuned)'],
  'PTS MAE': [5.293, 5.296, 5.305, 5.299],
  'REB MAE': [2.081, 2.080, 2.085, 2.086],
  'AST MAE': [1.539, 1.539, 1.543, 1.542]
})

print(comparison.to_string(index=False))

print("\n" + "="*70)
print("TIER 1 GOALS (10% improvement over rolling avg baseline)")
print("="*70)
print("  PTS: ‚â§5.09 MAE  (current best: 5.293, need -0.203)")
print("  REB: ‚â§1.97 MAE  (current best: 2.080, need -0.110)")
print("  AST: ‚â§1.51 MAE  (current best: 1.539, need -0.029) ‚≠ê SO CLOSE!")

print("\n" + "="*70)
print("KEY FINDINGS")
print("="*70)
print("‚úÖ All models perform essentially the same (~5.29 PTS, ~2.08 REB, ~1.54 AST)")
print("‚úÖ Tree models chose shallow configs (max_depth=3, num_leaves=15)")
print("‚úÖ Conclusion: Relationships are mostly LINEAR with current 45 features")
print("\n‚ö†Ô∏è  HITTING A PLATEAU - Need new information to break through!")

COMPREHENSIVE MODEL COMPARISON
            Model  PTS MAE  REB MAE  AST MAE
Linear Regression    5.293    2.081    1.539
     Ridge (best)    5.296    2.080    1.539
  XGBoost (tuned)    5.305    2.085    1.543
 LightGBM (tuned)    5.299    2.086    1.542

TIER 1 GOALS (10% improvement over rolling avg baseline)
  PTS: ‚â§5.09 MAE  (current best: 5.293, need -0.203)
  REB: ‚â§1.97 MAE  (current best: 2.080, need -0.110)
  AST: ‚â§1.51 MAE  (current best: 1.539, need -0.029) ‚≠ê SO CLOSE!

KEY FINDINGS
‚úÖ All models perform essentially the same (~5.29 PTS, ~2.08 REB, ~1.54 AST)
‚úÖ Tree models chose shallow configs (max_depth=3, num_leaves=15)
‚úÖ Conclusion: Relationships are mostly LINEAR with current 45 features

‚ö†Ô∏è  HITTING A PLATEAU - Need new information to break through!


Exception ignored in: <function ResourceTracker.__del__ at 0x10b3853a0>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.13/3.13.7/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 84, in __del__
  File "/opt/homebrew/Cellar/python@3.13/3.13.7/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 93, in _stop
  File "/opt/homebrew/Cellar/python@3.13/3.13.7/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 118, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x10b7853a0>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.13/3.13.7/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 84, in __del__
  File "/opt/homebrew/Cellar/python@3.13/3.13.7/Frameworks/Python.framework/Versi