# NBA Injury Prediction - Exploratory Analysis

This notebook provides exploratory data analysis and interactive model testing.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from data_loader import NBADataLoader
from preprocessing import NBAPreprocessor
from train import InjuryPredictor
from evaluate import ModelEvaluator
from explainability import SHAPAnalyzer

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load Data

In [None]:
# Load data
loader = NBADataLoader('../data/raw')
df = loader.merge_datasets()

print(f"Dataset shape: {df.shape}")
df.head()

## 2. Exploratory Data Analysis

In [None]:
# Check for missing values
missing = df.isnull().sum()
missing[missing > 0].sort_values(ascending=False)

In [None]:
# Distribution of injury labels
if 'injury' in df.columns:
    print(df['injury'].value_counts())
    df['injury'].value_counts().plot(kind='bar', title='Injury Distribution')
    plt.xlabel('Injury Status')
    plt.ylabel('Count')
    plt.show()

## 3. Feature Engineering

In [None]:
# Apply feature engineering
preprocessor = NBAPreprocessor(
    train_years=list(range(2010, 2019)),
    test_years=[2019, 2020]
)

df = preprocessor.engineer_features(df)
print(f"Features created. New shape: {df.shape}")
df.head()

## 4. Load Trained Model

In [None]:
# Load trained model
predictor = InjuryPredictor()
model = predictor.load_model('../models/xgboost_injury.pkl')
print("Model loaded successfully!")

## 5. Make Predictions

In [None]:
# Prepare test data
train_df, test_df = preprocessor.create_train_test_split(df)
X_test, y_test = preprocessor.prepare_features(test_df)

# Evaluate
evaluator = ModelEvaluator(model, threshold=0.2)
metrics = evaluator.evaluate(X_test, y_test, feature_names=preprocessor.feature_cols)

## 6. SHAP Analysis

In [None]:
# SHAP analysis
X_train, y_train = preprocessor.prepare_features(train_df)
shap_analyzer = SHAPAnalyzer(model, X_train, feature_names=preprocessor.feature_cols)
shap_values = shap_analyzer.analyze(X_test)