# NBA Game Prediction - Model Deployment and Monitoring

This notebook focuses on deploying the model and setting up monitoring. We'll:
1. Create a prediction pipeline
2. Set up model monitoring
3. Create a simple API for predictions
4. Document the deployment process

## 1. Import Libraries
Import all necessary libraries for model deployment and monitoring.

In [None]:
import pandas as pd
import numpy as np
import joblib
from datetime import datetime
import json
import os
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set plot style
plt.style.use('seaborn')
sns.set_palette('husl')

## 2. Load Model and Create Prediction Pipeline
Load the best model and create a prediction pipeline.

In [None]:
class NBAPredictionPipeline:
    def __init__(self, model_path, feature_columns_path):
        self.model = joblib.load(model_path)
        self.feature_columns = pd.read_csv(feature_columns_path)['selected_features'].tolist()
        self.predictions_log = []
    
    def preprocess_input(self, data):
        """Preprocess input data to match model requirements."""
        # Ensure all required features are present
        for feature in self.feature_columns:
            if feature not in data.columns:
                raise ValueError(f'Missing required feature: {feature}')
        
        # Select only required features
        return data[self.feature_columns]
    
    def predict(self, data):
        """Make predictions and log them."""
        # Preprocess input
        X = self.preprocess_input(data)
        
        # Make predictions
        predictions = self.model.predict(X)
        probabilities = self.model.predict_proba(X)[:, 1]
        
        # Log predictions
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
            self.predictions_log.append({
                'timestamp': timestamp,
                'prediction': int(pred),
                'probability': float(prob),
                'input_data': X.iloc[i].to_dict()
            })
        
        return predictions, probabilities
    
    def save_predictions_log(self, path):
        """Save prediction logs to file."""
        with open(path, 'w') as f:
            json.dump(self.predictions_log, f, indent=2)

# Initialize pipeline
pipeline = NBAPredictionPipeline(
    model_path='../models/best_model.joblib',
    feature_columns_path='../data/processed/selected_features.csv'
)

## 3. Model Monitoring Setup
Set up monitoring for model performance and data drift.

In [None]:
class ModelMonitor:
    def __init__(self, pipeline):
        self.pipeline = pipeline
        self.performance_metrics = []
        self.feature_stats = {}
    
    def update_metrics(self, y_true, y_pred, y_prob):
        """Update performance metrics."""
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        metrics = {
            'timestamp': timestamp,
            'accuracy': accuracy_score(y_true, y_pred),
            'mean_probability': np.mean(y_prob),
            'std_probability': np.std(y_prob)
        }
        self.performance_metrics.append(metrics)
    
    def update_feature_stats(self, X):
        """Update feature statistics."""
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        for feature in X.columns:
            if feature not in self.feature_stats:
                self.feature_stats[feature] = []
            
            self.feature_stats[feature].append({
                'timestamp': timestamp,
                'mean': float(X[feature].mean()),
                'std': float(X[feature].std())
            })
    
    def plot_performance_trends(self):
        """Plot performance metrics over time."""
        metrics_df = pd.DataFrame(self.performance_metrics)
        
        plt.figure(figsize=(12, 6))
        plt.plot(metrics_df['timestamp'], metrics_df['accuracy'], label='Accuracy')
        plt.title('Model Accuracy Over Time')
        plt.xlabel('Time')
        plt.ylabel('Accuracy')
        plt.xticks(rotation=45)
        plt.legend()
        plt.tight_layout()
        plt.show()
    
    def plot_feature_drift(self, feature):
        """Plot feature drift over time."""
        if feature not in self.feature_stats:
            print(f'No data available for feature: {feature}')
            return
        
        stats_df = pd.DataFrame(self.feature_stats[feature])
        
        plt.figure(figsize=(12, 6))
        plt.plot(stats_df['timestamp'], stats_df['mean'], label='Mean')
        plt.fill_between(
            stats_df['timestamp'],
            stats_df['mean'] - stats_df['std'],
            stats_df['mean'] + stats_df['std'],
            alpha=0.2
        )
        plt.title(f'{feature} Distribution Over Time')
        plt.xlabel('Time')
        plt.ylabel('Value')
        plt.xticks(rotation=45)
        plt.legend()
        plt.tight_layout()
        plt.show()
    
    def save_monitoring_data(self, base_path):
        """Save monitoring data to files."""
        # Save performance metrics
        pd.DataFrame(self.performance_metrics).to_csv(
            f'{base_path}/performance_metrics.csv', index=False
        )
        
        # Save feature statistics
        for feature, stats in self.feature_stats.items():
            pd.DataFrame(stats).to_csv(
                f'{base_path}/feature_stats_{feature}.csv', index=False
            )

# Initialize monitor
monitor = ModelMonitor(pipeline)

## 4. Create Simple API
Create a simple API for making predictions.

In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Get input data
        data = request.get_json()
        input_df = pd.DataFrame(data)
        
        # Make predictions
        predictions, probabilities = pipeline.predict(input_df)
        
        # Update monitoring
        monitor.update_feature_stats(input_df)
        
        # Prepare response
        response = {
            'predictions': predictions.tolist(),
            'probabilities': probabilities.tolist()
        }
        
        return jsonify(response)
    
    except Exception as e:
        return jsonify({'error': str(e)}), 400

if __name__ == '__main__':
    app.run(debug=True, port=5000)

## 5. Save Deployment Configuration
Save the deployment configuration and documentation.

In [None]:
# Create deployment directory
os.makedirs('../deployment', exist_ok=True)

# Save API code
with open('../deployment/app.py', 'w') as f:
    f.write(app.__str__())

# Create requirements.txt
requirements = [
    'flask==2.0.1',
    'pandas==1.3.3',
    'numpy==1.21.2',
    'scikit-learn==0.24.2',
    'joblib==1.0.1'
]

with open('../deployment/requirements.txt', 'w') as f:
    f.write('\n'.join(requirements))

# Create README
readme = """# NBA Game Prediction API

This API provides predictions for NBA game outcomes based on team statistics and performance metrics.

## Setup
1. Install requirements: `pip install -r requirements.txt`
2. Run the API: `python app.py`

## Usage
Send POST requests to `/predict` with game data in JSON format.

Example request:
```json
{
    "NET_RATING": [5.2],
    "OFF_RATING": [110.5],
    "DEF_RATING": [105.3],
    "PACE": [100.2],
    "WIN_STREAK": [2],
    "ROLL_WIN_PCT_5": [0.6],
    "ROLL_WIN_PCT_10": [0.55],
    "REST_DAYS": [2],
    "IS_BACK_TO_BACK": [0]
}
```

## Monitoring
The API includes built-in monitoring for:
- Model performance metrics
- Feature drift detection
- Prediction logging

## Maintenance
Regular maintenance tasks:
1. Monitor model performance
2. Check for data drift
3. Retrain model if necessary
4. Update feature statistics
"""

with open('../deployment/README.md', 'w') as f:
    f.write(readme)

print('Deployment files have been saved to ../deployment/')