In [1]:
# ============================================================================
# DEPLOYMENT PREPARATION NOTEBOOK
# ============================================================================

import joblib
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# ============================================================================
# SECTION 1: LOAD MODEL ARTIFACTS
# ============================================================================

print("="*80)
print("LOADING MODEL ARTIFACTS")
print("="*80)

# Specify model version (use latest)
MODEL_VERSION = "20251107_0700"  # Update with your version

# Load model
model = joblib.load(f'../models/rf_tuned_final_v{MODEL_VERSION}.pkl')
print("âœ… Model loaded")

# Load preprocessor
preprocessor = joblib.load(f'../models/preprocessor_v{MODEL_VERSION}.pkl')
print("âœ… Preprocessor loaded")

# Load config
with open(f'../models/model_config_v{MODEL_VERSION}.json', 'r') as f:
    config = json.load(f)
print("âœ… Config loaded")

# Load feature names
with open(f'../models/feature_names_v{MODEL_VERSION}.json', 'r') as f:
    feature_config = json.load(f)
cat_features = feature_config['categorical']
num_features = feature_config['numerical']
print("âœ… Features loaded")

THRESHOLD = config['threshold']
print(f"\nâœ… Model ready for deployment")
print(f"   Version: {MODEL_VERSION}")
print(f"   Threshold: {THRESHOLD}")

# ============================================================================
# SECTION 2: CREATE PREDICTION PIPELINE CLASS
# ============================================================================

class OutageRiskPredictor:
    """
    Production-ready outage risk predictor.
    
    Usage:
    ------
    predictor = OutageRiskPredictor()
    predictor.load_model(model_version='20250108_1430')
    predictions = predictor.predict(new_data)
    """
    
    def __init__(self):
        self.model = None
        self.preprocessor = None
        self.config = None
        self.feature_names = None
        self.threshold = None
        
    def load_model(self, model_version=None, models_dir='../models'):
        """Load model artifacts"""
        if model_version is None:
            # Auto-detect latest version
            import glob
            model_files = glob.glob(f'{models_dir}/model_config_v*.json')
            if not model_files:
                raise FileNotFoundError("No model found")
            latest_config = max(model_files)
            model_version = latest_config.split('_v')[1].replace('.json', '')
        
        # Load artifacts
        self.model = joblib.load(f'{models_dir}/rf_tuned_final_v{model_version}.pkl')
        self.preprocessor = joblib.load(f'{models_dir}/preprocessor_v{model_version}.pkl')
        
        with open(f'{models_dir}/model_config_v{model_version}.json', 'r') as f:
            self.config = json.load(f)
        
        with open(f'{models_dir}/feature_names_v{model_version}.json', 'r') as f:
            feature_config = json.load(f)
            self.feature_names = feature_config['all_features']
        
        self.threshold = self.config['threshold']
        
        print(f"âœ… Model v{model_version} loaded")
        print(f"   Threshold: {self.threshold}")
        print(f"   Features: {len(self.feature_names)}")
        
        return self
    
    def predict(self, X, return_probabilities=True):
        """
        Make predictions on new data.
        
        Parameters:
        -----------
        X : pd.DataFrame
            Input features (must have all required features)
        return_probabilities : bool
            If True, return probabilities; else return binary predictions
            
        Returns:
        --------
        pd.DataFrame with columns:
            - risk_probability (0-1)
            - risk_level (Low/Medium/High)
            - is_high_risk (binary at threshold)
        """
        if self.model is None:
            raise ValueError("Model not loaded. Call load_model() first.")
        
        # Validate features
        missing = set(self.feature_names) - set(X.columns)
        if missing:
            raise ValueError(f"Missing features: {missing}")
        
        # Select features in correct order
        X_selected = X[self.feature_names]
        
        # Predict probabilities
        probabilities = self.model.predict_proba(X_selected)[:, 1]
        
        # Apply threshold
        predictions = (probabilities >= self.threshold).astype(int)
        
        # Risk levels
        risk_levels = pd.cut(
            probabilities,
            bins=[0, 0.3, 0.6, 1.0],
            labels=['Low', 'Medium', 'High']
        )
        
        # Create results dataframe
        results = pd.DataFrame({
            'risk_probability': probabilities,
            'risk_level': risk_levels,
            'is_high_risk': predictions
        }, index=X.index)
        
        return results
    
    def predict_batch(self, X, batch_size=10000):
        """Predict in batches for large datasets"""
        results = []
        for i in range(0, len(X), batch_size):
            batch = X.iloc[i:i+batch_size]
            batch_results = self.predict(batch)
            results.append(batch_results)
        
        return pd.concat(results)
    
    def explain_prediction(self, X, top_n=5):
        """
        Get top contributing features for each prediction.
        Uses feature values as proxy for importance.
        
        For production: Integrate SHAP for true explanations.
        """
        # This is simplified - in production use SHAP
        feature_values = X[self.feature_names].values
        feature_importance = np.abs(feature_values)
        
        explanations = []
        for i in range(len(X)):
            top_indices = np.argsort(feature_importance[i])[-top_n:][::-1]
            top_features = [
                {
                    'feature': self.feature_names[idx],
                    'value': float(feature_values[i][idx])
                }
                for idx in top_indices
            ]
            explanations.append(top_features)
        
        return explanations

# Initialize predictor
predictor = OutageRiskPredictor()
predictor.load_model(model_version=MODEL_VERSION)

# ============================================================================
# SECTION 3: TEST PREDICTION PIPELINE
# ============================================================================

print("\n" + "="*80)
print("TESTING PREDICTION PIPELINE")
print("="*80)

# Load test data
test_data = pd.read_csv(f'../models/test_sample_v{MODEL_VERSION}.csv')
X_test_sample = test_data[feature_config['all_features']]
y_test_sample = test_data['y_outage_next_48h']

# Make predictions
predictions = predictor.predict(X_test_sample)

# Evaluate
from sklearn.metrics import classification_report, accuracy_score

print("\nðŸ“Š Test Sample Performance:")
print(f"Accuracy: {accuracy_score(y_test_sample, predictions['is_high_risk']):.4f}")
print("\nClassification Report:")
print(classification_report(y_test_sample, predictions['is_high_risk']))

# Show sample predictions
print("\nðŸ“‹ Sample Predictions:")
sample_df = pd.concat([
    test_data[['county']].head(10),
    predictions.head(10)
], axis=1)
print(sample_df.to_string())

# ============================================================================
# SECTION 4: CREATE ACTION RECOMMENDER
# ============================================================================

class ActionRecommender:
    """Convert risk predictions to actionable recommendations"""
    
    def __init__(self):
        self.actions = {
            'SME': [
                {
                    'priority': 1,
                    'action': 'Reschedule heavy machinery to off-peak hours (2-5 PM)',
                    'cost': 'Low',
                    'impact': 'Avoid production halt',
                    'triggers': ['high_risk', 'peak_hours']
                },
                {
                    'priority': 2,
                    'action': 'Pre-charge backup batteries to full capacity',
                    'cost': 'Low',
                    'impact': 'Ensure 4-6 hour continuity',
                    'triggers': ['high_risk']
                },
                {
                    'priority': 3,
                    'action': 'Test and fuel backup generator',
                    'cost': 'Medium',
                    'impact': 'Extended backup capability',
                    'triggers': ['high_risk', 'extended_window']
                }
            ],
            'Telecom': [
                {
                    'priority': 1,
                    'action': 'Switch critical towers to battery mode at 5:30 PM',
                    'cost': 'Low',
                    'impact': 'Prevent service interruption',
                    'triggers': ['high_risk', 'planned_outage']
                },
                {
                    'priority': 2,
                    'action': 'Deploy mobile genset to high-traffic tower',
                    'cost': 'High',
                    'impact': 'Maintain coverage in critical area',
                    'triggers': ['high_risk', 'extended_window']
                },
                {
                    'priority': 3,
                    'action': 'Alert field response team for rapid deployment',
                    'cost': 'Low',
                    'impact': 'Faster restoration if outage occurs',
                    'triggers': ['high_risk']
                }
            ],
            'Household': [
                {
                    'priority': 1,
                    'action': 'Charge all phones and power banks',
                    'cost': 'None',
                    'impact': 'Stay connected during outage',
                    'triggers': ['high_risk']
                },
                {
                    'priority': 2,
                    'action': 'Delay washing machine/dishwasher to after 10 PM',
                    'cost': 'None',
                    'impact': 'Reduce grid stress, avoid interruption',
                    'triggers': ['high_risk', 'peak_hours']
                },
                {
                    'priority': 3,
                    'action': 'Prepare flashlights/candles, store drinking water',
                    'cost': 'Low',
                    'impact': 'Emergency preparedness',
                    'triggers': ['high_risk', 'extended_window']
                }
            ]
        }
    
    def recommend(self, risk_df, persona='SME', county=None):
        """
        Generate top 3 actions for high-risk periods.
        
        Parameters:
        -----------
        risk_df : pd.DataFrame
            Output from OutageRiskPredictor.predict()
        persona : str
            One of ['SME', 'Telecom', 'Household']
        county : str, optional
            Filter recommendations for specific county
            
        Returns:
        --------
        pd.DataFrame with recommendations
        """
        # Filter high-risk periods
        high_risk = risk_df[risk_df['is_high_risk'] == 1].copy()
        
        if len(high_risk) == 0:
            return pd.DataFrame({
                'message': ['No high-risk periods detected. Continue normal operations.']
            })
        
        # Get actions for persona
        actions = self.actions[persona]
        
        # For each high-risk period, assign top 3 actions
        recommendations = []
        for idx, row in high_risk.iterrows():
            rec = {
                'index': idx,
                'risk_probability': row['risk_probability'],
                'risk_level': row['risk_level']
            }
            
            # Add top 3 actions
            for i, action in enumerate(actions[:3], 1):
                rec[f'action_{i}'] = action['action']
                rec[f'action_{i}_cost'] = action['cost']
                rec[f'action_{i}_impact'] = action['impact']
            
            recommendations.append(rec)
        
        return pd.DataFrame(recommendations)

# Test recommender
recommender = ActionRecommender()

print("\n" + "="*80)
print("TESTING ACTION RECOMMENDER")
print("="*80)

# Get recommendations for sample
recommendations_sme = recommender.recommend(predictions.head(20), persona='SME')
print("\nðŸ“‹ Sample Recommendations (SME):")
print(recommendations_sme[['index', 'risk_probability', 'action_1', 'action_2']].head())

# ============================================================================
# SECTION 5: CREATE TIERED ALERT SYSTEM
# ============================================================================

def create_tiered_alerts(predictions, budget_tiers=None):
    """
    Create tiered alert system based on risk probability.
    
    Parameters:
    -----------
    predictions : pd.DataFrame
        Output from predictor.predict()
    budget_tiers : dict
        Alert budgets per tier, e.g. {'critical': 0.05, 'high': 0.10, 'medium': 0.20}
        
    Returns:
    --------
    pd.DataFrame with alert tiers assigned
    """
    if budget_tiers is None:
        budget_tiers = {
            'critical': 0.05,  # Top 5% - Emergency response
            'high': 0.10,      # Top 10% - Proactive measures
            'medium': 0.20,    # Top 20% - Monitoring
        }
    
    results = predictions.copy()
    
    # Calculate percentile thresholds
    critical_threshold = np.quantile(results['risk_probability'], 1 - budget_tiers['critical'])
    high_threshold = np.quantile(results['risk_probability'], 1 - budget_tiers['high'])
    medium_threshold = np.quantile(results['risk_probability'], 1 - budget_tiers['medium'])
    
    # Assign tiers
    results['alert_tier'] = 'low'
    results.loc[results['risk_probability'] >= medium_threshold, 'alert_tier'] = 'medium'
    results.loc[results['risk_probability'] >= high_threshold, 'alert_tier'] = 'high'
    results.loc[results['risk_probability'] >= critical_threshold, 'alert_tier'] = 'critical'
    
    # Recommended actions
    results['recommended_action'] = results['alert_tier'].map({
        'critical': 'IMMEDIATE ACTION - Deploy emergency resources',
        'high': 'PROACTIVE - Implement backup plans',
        'medium': 'MONITOR - Increase vigilance',
        'low': 'NORMAL - Continue operations'
    })
    
    return results

# Test tiered alerts
alerts = create_tiered_alerts(predictions)

print("\n" + "="*80)
print("TIERED ALERT SYSTEM")
print("="*80)
print("\nAlert Distribution:")
print(alerts['alert_tier'].value_counts())
print("\nSample Critical Alerts:")
print(alerts[alerts['alert_tier'] == 'critical'][['risk_probability', 'alert_tier', 'recommended_action']].head())

# ============================================================================
# SECTION 6: SAVE DEPLOYMENT ARTIFACTS
# ============================================================================

# Save predictor class
import cloudpickle

with open('../scripts/predictor.pkl', 'wb') as f:
    cloudpickle.dump(predictor, f)
print("\nâœ… Predictor saved to ../scripts/predictor.pkl")

# Save recommender class
with open('../scripts/recommender.pkl', 'wb') as f:
    cloudpickle.dump(recommender, f)
print("âœ… Recommender saved to ../scripts/recommender.pkl")

print("\n" + "="*80)
print("DEPLOYMENT PREPARATION COMPLETE")
print("="*80)
print("\nâœ… Ready for deployment!")
print("\nNext steps:")
print("1. Create API (FastAPI)")
print("2. Create Dashboard (Streamlit)")
print("3. Set up data pipeline")
print("4. Deploy to production")

LOADING MODEL ARTIFACTS
âœ… Model loaded
âœ… Preprocessor loaded
âœ… Config loaded
âœ… Features loaded

âœ… Model ready for deployment
   Version: 20251107_0700
   Threshold: 0.3
âœ… Model v20251107_0700 loaded
   Threshold: 0.3
   Features: 57

TESTING PREDICTION PIPELINE

ðŸ“Š Test Sample Performance:
Accuracy: 1.0000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1000

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000


ðŸ“‹ Sample Predictions:
    county  risk_probability risk_level  is_high_risk
0  samburu          0.055635        Low             0
1  samburu          0.056211        Low             0
2  samburu          0.056211        Low             0
3  samburu          0.056211        Low             0
4  samburu          0.056211        Low             0
5  samburu          0.056211   

KeyError: "None of [Index(['index', 'risk_probability', 'action_1', 'action_2'], dtype='object')] are in the [columns]"