# üì° Model Monitoring & Alerting

**Author**: Data Science Master System  
**Difficulty**: ‚≠ê‚≠ê‚≠ê‚≠ê Advanced  
**Time**: 45 minutes  
**Prerequisites**: 28_cloud_deployment

## Learning Objectives
- Data drift detection
- Model performance monitoring
- Alert systems
- Observability dashboards

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
from datetime import datetime, timedelta

np.random.seed(42)

## 1. Data Drift Detection

In [None]:
class DriftDetector:
    def __init__(self, reference_data, threshold=0.05):
        self.reference = reference_data
        self.threshold = threshold
    
    def detect_drift(self, new_data, feature):
        """KS test for drift."""
        stat, p_value = stats.ks_2samp(self.reference[feature], new_data[feature])
        return {
            'feature': feature,
            'drift_detected': p_value < self.threshold,
            'p_value': p_value,
            'statistic': stat
        }
    
    def check_all(self, new_data):
        results = []
        for col in self.reference.columns:
            if col in new_data.columns and np.issubdtype(new_data[col].dtype, np.number):
                results.append(self.detect_drift(new_data, col))
        return pd.DataFrame(results)

# Demo
reference = pd.DataFrame({'age': np.random.normal(35, 10, 1000), 'income': np.random.normal(50000, 15000, 1000)})
new_data = pd.DataFrame({'age': np.random.normal(40, 10, 500), 'income': np.random.normal(55000, 15000, 500)})  # Drifted!

detector = DriftDetector(reference)
drift_results = detector.check_all(new_data)
print("üìä Drift Detection:")
display(drift_results)

## 2. Performance Monitoring

In [None]:
class PerformanceMonitor:
    def __init__(self, window_size=100):
        self.predictions = []
        self.actuals = []
        self.window = window_size
    
    def log(self, prediction, actual):
        self.predictions.append(prediction)
        self.actuals.append(actual)
    
    def get_metrics(self):
        recent_pred = self.predictions[-self.window:]
        recent_actual = self.actuals[-self.window:]
        
        accuracy = sum(p == a for p, a in zip(recent_pred, recent_actual)) / len(recent_pred)
        return {
            'accuracy': accuracy,
            'sample_size': len(recent_pred),
            'timestamp': datetime.now().isoformat()
        }

# Demo
monitor = PerformanceMonitor()
for _ in range(150):
    pred = np.random.choice([0, 1])
    actual = np.random.choice([0, 1], p=[0.3, 0.7] if pred == 1 else [0.7, 0.3])
    monitor.log(pred, actual)

print("üìà Performance Metrics:")
print(monitor.get_metrics())

## 3. Alert System

In [None]:
class AlertManager:
    def __init__(self):
        self.alerts = []
        self.thresholds = {
            'accuracy': 0.85,
            'latency_ms': 200,
            'error_rate': 0.05
        }
    
    def check(self, metric, value):
        threshold = self.thresholds.get(metric)
        if threshold:
            if metric == 'accuracy' and value < threshold:
                self._trigger(f"‚ö†Ô∏è Low accuracy: {value:.2%} < {threshold:.2%}")
            elif metric in ['latency_ms', 'error_rate'] and value > threshold:
                self._trigger(f"üö® High {metric}: {value} > {threshold}")
    
    def _trigger(self, message):
        alert = {'message': message, 'time': datetime.now().isoformat()}
        self.alerts.append(alert)
        print(f"ALERT: {message}")
        # In production: send Slack/email/PagerDuty

# Demo
alerts = AlertManager()
alerts.check('accuracy', 0.78)
alerts.check('latency_ms', 250)

## 4. Prometheus Metrics

In [None]:
prometheus_config = '''
from prometheus_client import Counter, Histogram, Gauge, start_http_server

# Define metrics
predictions = Counter('model_predictions_total', 'Total predictions', ['class'])
latency = Histogram('prediction_latency_seconds', 'Prediction latency')
accuracy = Gauge('model_accuracy', 'Current model accuracy')

# Use in API
@app.post("/predict")
@latency.time()
def predict(data):
    result = model.predict(data)
    predictions.labels(class=str(result)).inc()
    return result

# Export metrics
start_http_server(9090)  # Prometheus scrapes this
'''
print("üìä Prometheus Integration:")
print(prometheus_config)

## üéØ Key Takeaways
- Drift detection prevents silent failures
- Real-time monitoring catches issues
- Alerts enable fast response
- Dashboards provide visibility

**üéâ Congratulations!** You've completed the entire curriculum!