# 🚀 Deployment Guide: Production Infrastructure & Scaling

This notebook provides comprehensive guidance for deploying KubeSentiment to production environments, covering infrastructure setup, scaling strategies, and operational best practices.

## 🎯 Learning Objectives

By the end of this notebook, you will:
1. Understand deployment strategies and environments
2. Learn infrastructure provisioning with Terraform
3. Master Kubernetes deployment with Helm
4. Implement scaling and high availability
5. Configure monitoring and alerting in production
6. Understand backup, recovery, and disaster recovery
7. Optimize costs and performance

## 🏗️ Deployment Architecture

### Environment Strategy

```
┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
│   Development   │ -> │    Staging      │ -> │  Production     │
│                 │    │                 │    │                 │
│ • Local testing │    │ • Integration   │    │ • Live traffic  │
│ • Fast feedback │    │ • Load testing  │    │ • High avail.   │
│ • Cost: Low     │    │ • User acceptance│    │ • Auto-scaling  │
└─────────────────┘    └─────────────────┘    └─────────────────┘
```

### Infrastructure Components

- **Kubernetes Cluster**: Container orchestration
- **Load Balancer**: Traffic distribution
- **Monitoring Stack**: Prometheus + Grafana
- **Logging**: ELK stack or Loki
- **CI/CD Pipeline**: GitHub Actions
- **Secret Management**: Kubernetes secrets or external vault
- **Database**: For metadata and caching (optional)

In [None]:
# Setup and imports
import os
import json
import yaml
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('default')
sns.set_palette("husl")

# Define paths
PROJECT_ROOT = Path("..")
INFRA_DIR = PROJECT_ROOT / "infrastructure"
HELM_DIR = PROJECT_ROOT / "helm"
DOCKER_DIR = PROJECT_ROOT

print("✅ Libraries imported successfully!")
print(f"📁 Infrastructure directory: {INFRA_DIR.absolute()}")
print(f"⚓️ Helm directory: {HELM_DIR.absolute()}")
print(f"🐳 Docker directory: {DOCKER_DIR.absolute()}")

## 🏗️ Infrastructure as Code

Let's examine the Terraform infrastructure configuration.

In [None]:
# Analyze infrastructure configuration
def analyze_infrastructure():
    """Analyze the infrastructure as code setup."""
    
    infra_analysis = {}
    
    # Check Terraform files
    terraform_files = list(INFRA_DIR.rglob("*.tf"))
    infra_analysis['terraform_files'] = []
    
    for tf_file in terraform_files:
        try:
            with open(tf_file, 'r') as f:
                content = f.read()
                
            infra_analysis['terraform_files'].append({
                "file": str(tf_file.relative_to(PROJECT_ROOT)),
                "size": len(content),
                "lines": len(content.split('\n')),
                "resources": content.count('resource "'),
                "variables": content.count('variable "'),
                "outputs": content.count('output "')
            })
        except Exception as e:
            infra_analysis['terraform_files'].append({
                "file": str(tf_file.relative_to(PROJECT_ROOT)),
                "error": str(e)
            })
    
    # Check Helm charts
    helm_files = list(HELM_DIR.rglob("*")
    helm_analysis = []
    
    for helm_file in helm_files:
        if helm_file.is_file():
            try:
                with open(helm_file, 'r') as f:
                    content = f.read()
                    
                helm_analysis.append({
                    "file": str(helm_file.relative_to(PROJECT_ROOT)),
                    "size": len(content),
                    "lines": len(content.split('\n')),
                    "type": helm_file.suffix
                })
            except:
                pass
    
    infra_analysis['helm_files'] = helm_analysis
    
    # Check Docker files
    docker_files = ['Dockerfile', 'docker-compose.yml', '.dockerignore']
    docker_analysis = []
    
    for docker_file in docker_files:
        file_path = PROJECT_ROOT / docker_file
        if file_path.exists():
            try:
                with open(file_path, 'r') as f:
                    content = f.read()
                    
                docker_analysis.append({
                    "file": docker_file,
                    "size": len(content),
                    "lines": len(content.split('\n'))
                })
            except:
                pass
    
    infra_analysis['docker_files'] = docker_analysis
    
    return infra_analysis

# Analyze infrastructure
print("🏗️ Infrastructure Analysis:")
print("=" * 50)

infra_analysis = analyze_infrastructure()

# Display Terraform analysis
if infra_analysis.get('terraform_files'):
    print("\n📄 Terraform Files:")
    for tf_file in infra_analysis['terraform_files']:
        print(f"   📝 {tf_file['file']}")
        print(f"      Lines: {tf_file['lines']}, Resources: {tf_file['resources']}, Variables: {tf_file['variables']}")

# Display Helm analysis
if infra_analysis.get('helm_files'):
    print("\n⚓️ Helm Chart Files:")
    file_types = {}
    for helm_file in infra_analysis['helm_files']:
        file_type = helm_file['type'] or 'no extension'
        file_types[file_type] = file_types.get(file_type, 0) + 1
    
    for file_type, count in file_types.items():
        print(f"   {file_type}: {count} files")
    
    # Show key templates
    templates = [f for f in infra_analysis['helm_files'] if 'templates' in f['file']]
    if templates:
        print("\n📋 Key Templates:")
        for template in templates[:5]:  # Show first 5
            template_name = Path(template['file']).name
            print(f"   • {template_name}")

# Display Docker analysis
if infra_analysis.get('docker_files'):
    print("\n🐳 Docker Files:")
    for docker_file in infra_analysis['docker_files']:
        print(f"   📦 {docker_file['file']}: {docker_file['lines']} lines")

# Create infrastructure complexity visualization
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Terraform complexity
if infra_analysis.get('terraform_files'):
    tf_data = pd.DataFrame(infra_analysis['terraform_files'])
    if 'resources' in tf_data.columns:
        axes[0].bar(range(len(tf_data)), tf_data['resources'], color='skyblue')
        axes[0].set_xticks(range(len(tf_data)))
        axes[0].set_xticklabels([Path(f['file']).name for f in infra_analysis['terraform_files']], rotation=45, ha='right')
        axes[0].set_title('Terraform Resources by File')
        axes[0].set_ylabel('Number of Resources')

# Helm chart complexity
if infra_analysis.get('helm_files'):
    helm_data = pd.DataFrame(infra_analysis['helm_files'])
    if 'lines' in helm_data.columns:
        file_types = helm_data.groupby('type')['lines'].sum()
        axes[1].pie(file_types.values, labels=file_types.index, autopct='%1.1f%%')
        axes[1].set_title('Helm Chart Complexity by File Type')

# Docker complexity
if infra_analysis.get('docker_files'):
    docker_data = pd.DataFrame(infra_analysis['docker_files'])
    if 'lines' in docker_data.columns:
        axes[2].bar(range(len(docker_data)), docker_data['lines'], color='lightgreen')
        axes[2].set_xticks(range(len(docker_data)))
        axes[2].set_xticklabels(docker_data['file'], rotation=45, ha='right')
        axes[2].set_title('Docker File Complexity')
        axes[2].set_ylabel('Lines of Code')

plt.tight_layout()
plt.show()

# Deployment strategy analysis
deployment_strategies = {
    "development": {
        "replicas": 1,
        "resources": {"cpu": "250m", "memory": "512Mi"},
        "scaling": "manual",
        "cost_estimate": "low"
    },
    "staging": {
        "replicas": 2,
        "resources": {"cpu": "500m", "memory": "1Gi"},
        "scaling": "manual",
        "cost_estimate": "medium"
    },
    "production": {
        "replicas": 3,
        "resources": {"cpu": "1000m", "memory": "2Gi"},
        "scaling": "hpa",
        "cost_estimate": "high"
    }
}

print("\n🏢 Deployment Strategy by Environment:")
print("=" * 50)

for env, config in deployment_strategies.items():
    print(f"\n🌍 {env.upper()} Environment:")
    print(f"   🔢 Replicas: {config['replicas']}")
    print(f"   💾 CPU: {config['resources']['cpu']}, Memory: {config['resources']['memory']}")
    print(f"   📈 Scaling: {config['scaling']}")
    print(f"   💰 Cost: {config['cost_estimate']}")

print("\n✅ Infrastructure Analysis Complete!")
print("\n💡 Infrastructure as Code Benefits:")
print("   • Reproducible deployments")
print("   • Environment consistency")
print("   • Version-controlled infrastructure")
print("   • Automated provisioning")

## ☸️ Kubernetes Deployment

Let's examine the Kubernetes deployment configuration and Helm charts.

In [None]:
# Analyze Kubernetes deployment
def analyze_kubernetes_deployment():
    """Analyze Kubernetes deployment configuration."""
    
    k8s_analysis = {}
    
    # Load Helm values files
    values_files = ['values.yaml', 'values-dev.yaml', 'values-prod.yaml']
    values_configs = {}
    
    for values_file in values_files:
        file_path = HELM_DIR / "mlops-sentiment" / values_file
        if file_path.exists():
            try:
                with open(file_path, 'r') as f:
                    values_configs[values_file] = yaml.safe_load(f)
            except Exception as e:
                values_configs[values_file] = {"error": str(e)}
    
    k8s_analysis['values_configs'] = values_configs
    
    # Analyze deployment templates
    template_files = list((HELM_DIR / "mlops-sentiment" / "templates").glob("*.yaml"))
    templates_analysis = []
    
    for template_file in template_files:
        try:
            with open(template_file, 'r') as f:
                template_content = yaml.safe_load(f)
                
            if template_content:
                # Analyze different resource types
                if template_content.get('kind') == 'Deployment':
                    spec = template_content.get('spec', {})
                    template_info = {
                        "file": template_file.name,
                        "kind": "Deployment",
                        "replicas": spec.get('replicas', 'templated'),
                        "strategy": spec.get('strategy', {}).get('type', 'RollingUpdate'),
                        "containers": len(spec.get('template', {}).get('spec', {}).get('containers', []))
                    }
                elif template_content.get('kind') == 'Service':
                    spec = template_content.get('spec', {})
                    template_info = {
                        "file": template_file.name,
                        "kind": "Service",
                        "type": spec.get('type', 'ClusterIP'),
                        "ports": len(spec.get('ports', []))
                    }
                elif template_content.get('kind') == 'Ingress':
                    spec = template_content.get('spec', {})
                    template_info = {
                        "file": template_file.name,
                        "kind": "Ingress",
                        "rules": len(spec.get('rules', []))
                    }
                else:
                    template_info = {
                        "file": template_file.name,
                        "kind": template_content.get('kind', 'Unknown')
                    }
                
                templates_analysis.append(template_info)
                
        except Exception as e:
            templates_analysis.append({
                "file": template_file.name,
                "error": str(e)
            })
    
    k8s_analysis['templates'] = templates_analysis
    
    return k8s_analysis

# Analyze Kubernetes deployment
print("☸️ Kubernetes Deployment Analysis:")
print("=" * 50)

k8s_analysis = analyze_kubernetes_deployment()

# Display values configurations
if k8s_analysis.get('values_configs'):
    print("\n⚙️ Helm Values Configurations:")
    for config_file, config_data in k8s_analysis['values_configs'].items():
        print(f"\n📄 {config_file}:")
        if isinstance(config_data, dict) and 'error' not in config_data:
            # Show key configuration items
            if 'replicaCount' in config_data:
                print(f"   🔢 Replica Count: {config_data['replicaCount']}")
            if 'image' in config_data:
                print(f"   🐳 Image: {config_data['image'].get('repository', 'N/A')}:{config_data['image'].get('tag', 'N/A')}")
            if 'resources' in config_data:
                resources = config_data['resources']
                if 'limits' in resources:
                    limits = resources['limits']
                    print(f"   💾 Resource Limits: CPU {limits.get('cpu', 'N/A')}, Memory {limits.get('memory', 'N/A')}")
            if 'autoscaling' in config_data and config_data['autoscaling'].get('enabled'):
                hpa = config_data['autoscaling']
                print(f"   📈 HPA: Min {hpa.get('minReplicas', 'N/A')}, Max {hpa.get('maxReplicas', 'N/A')}")
        else:
            print(f"   ❌ Error loading config: {config_data.get('error', 'Unknown error')}")

# Display templates analysis
if k8s_analysis.get('templates'):
    print("\n📋 Kubernetes Templates:")
    templates_df = pd.DataFrame(k8s_analysis['templates'])
    
    # Group by kind
    if 'kind' in templates_df.columns:
        kind_counts = templates_df['kind'].value_counts()
        print("Resource Types:")
        for kind, count in kind_counts.items():
            print(f"   • {kind}: {count} template(s)")
    
    # Show detailed template info
    print("\n📄 Template Details:")
    for template in k8s_analysis['templates']:
        print(f"   📝 {template['file']} ({template.get('kind', 'Unknown')})")
        
        if template.get('kind') == 'Deployment':
            print(f"      Replicas: {template.get('replicas', 'N/A')}")
            print(f"      Strategy: {template.get('strategy', 'N/A')}")
            print(f"      Containers: {template.get('containers', 'N/A')}")
        elif template.get('kind') == 'Service':
            print(f"      Type: {template.get('type', 'N/A')}")
            print(f"      Ports: {template.get('ports', 'N/A')}")
        elif template.get('kind') == 'Ingress':
            print(f"      Rules: {template.get('rules', 'N/A')}")

# Create deployment architecture visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Kubernetes Deployment Architecture', fontsize=16, fontweight='bold')

# Environment configurations comparison
if k8s_analysis.get('values_configs'):
    env_configs = {}
    for config_file, config_data in k8s_analysis['values_configs'].items():
        if isinstance(config_data, dict) and 'error' not in config_data:
            env_name = config_file.replace('values-', '').replace('.yaml', '')
            if env_name == 'values':
                env_name = 'default'
            env_configs[env_name] = {
                'replicas': config_data.get('replicaCount', 1),
                'cpu_limit': config_data.get('resources', {}).get('limits', {}).get('cpu', '1000m'),
                'memory_limit': config_data.get('resources', {}).get('limits', {}).get('memory', '1Gi')
            }
    
    if env_configs:
        env_names = list(env_configs.keys())
        replicas = [env_configs[env]['replicas'] for env in env_names]
        
        axes[0, 0].bar(env_names, replicas, color=['lightblue', 'lightgreen', 'red'])
        axes[0, 0].set_title('Replica Count by Environment')
        axes[0, 0].set_ylabel('Number of Replicas')
        axes[0, 0].grid(True, alpha=0.3)

# Resource allocation
resource_data = {
    'CPU Limits': ['250m', '500m', '1000m', '2000m'],
    'Memory Limits': ['512Mi', '1Gi', '2Gi', '4Gi'],
    'Environments': ['dev', 'staging', 'prod-small', 'prod-large']
}

# Convert CPU to cores for visualization
cpu_cores = [0.25, 0.5, 1.0, 2.0]
memory_gb = [0.5, 1.0, 2.0, 4.0]

axes[0, 1].plot(resource_data['Environments'], cpu_cores, marker='o', linewidth=2, label='CPU')
axes[0, 1].plot(resource_data['Environments'], memory_gb, marker='s', linewidth=2, label='Memory')
axes[0, 1].set_title('Resource Allocation by Environment')
axes[0, 1].set_ylabel('Resources')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Scaling comparison
scaling_data = {
    'Environment': ['Development', 'Staging', 'Production'],
    'Min Replicas': [1, 2, 3],
    'Max Replicas': [1, 3, 10],
    'Target CPU %': [70, 70, 70]
}

x = np.arange(len(scaling_data['Environment']))
width = 0.35

axes[1, 0].bar(x - width/2, scaling_data['Min Replicas'], width, label='Min Replicas', alpha=0.8)
axes[1, 0].bar(x + width/2, scaling_data['Max Replicas'], width, label='Max Replicas', alpha=0.8)
axes[1, 0].set_title('Horizontal Pod Autoscaling Configuration')
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(scaling_data['Environment'])
axes[1, 0].set_ylabel('Number of Replicas')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Service mesh features
service_features = {
    'Load Balancing': 95,
    'Circuit Breaking': 90,
    'Traffic Splitting': 85,
    'Observability': 95,
    'Security': 90,
    'Multi-cluster': 80
}

features = list(service_features.keys())
scores = list(service_features.values())

bars = axes[1, 1].barh(range(len(features)), scores, color='lightcoral', alpha=0.7)
axes[1, 1].set_yticks(range(len(features)))
axes[1, 1].set_yticklabels(features)
axes[1, 1].set_xlabel('Maturity Score (%)')
axes[1, 1].set_title('Service Mesh Capabilities')
axes[1, 1].set_xlim(0, 100)
axes[1, 1].grid(True, alpha=0.3)

# Add value labels
for bar, score in zip(bars, scores):
    width = bar.get_width()
    axes[1, 1].text(width + 1, bar.get_y() + bar.get_height()/2, 
                    f'{score}%', ha='left', va='center')

plt.tight_layout()
plt.show()

print("\n✅ Kubernetes Deployment Analysis Complete!")
print("\n💡 Kubernetes Benefits:")
print("   • Automated scaling and self-healing")
print("   • Declarative configuration")
print("   • Service discovery and load balancing")
print("   • Rolling updates with zero downtime")
print("   • Resource optimization and cost efficiency")

## 📊 Scaling Strategies

Let's analyze scaling strategies and performance optimization.

In [None]:
# Scaling analysis and recommendations
def analyze_scaling_strategies():
    """Analyze different scaling strategies and provide recommendations."""
    
    scaling_analysis = {
        "horizontal_pod_autoscaling": {
            "description": "Automatically scale pods based on CPU/memory usage",
            "metrics": ["cpu", "memory"],
            "advantages": [
                "Automatic scaling based on resource usage",
                "Handles traffic spikes automatically",
                "Cost-effective - scales down during low traffic"
            ],
            "disadvantages": [
                "Cold start latency for new pods",
                "May not handle all traffic patterns optimally",
                "Requires careful resource limit tuning"
            ],
            "use_case": "Variable traffic patterns",
            "configuration": {
                "min_replicas": 3,
                "max_replicas": 10,
                "target_cpu_utilization": 70,
                "target_memory_utilization": 80
            }
        },
        "cluster_autoscaling": {
            "description": "Automatically scale the number of nodes in the cluster",
            "metrics": ["node_cpu", "node_memory", "pending_pods"],
            "advantages": [
                "Handles pod scaling limits",
                "Optimizes cluster resource utilization",
                "Reduces infrastructure costs during low usage"
            ],
            "disadvantages": [
                "Slower scaling compared to pod autoscaling",
                "More complex configuration",
                "May cause service disruptions during node changes"
            ],
            "use_case": "Highly variable or unpredictable workloads",
            "configuration": {
                "min_nodes": 3,
                "max_nodes": 20,
                "scale_down_delay": "10m",
                "scale_up_delay": "1m"
            }
        },
        "predictive_scaling": {
            "description": "Scale based on predicted traffic patterns",
            "metrics": ["historical_data", "time_patterns", "external_signals"],
            "advantages": [
                "Proactive scaling before traffic spikes",
                "Better user experience with reduced latency",
                "Can incorporate business metrics"
            ],
            "disadvantages": [
                "Requires historical data and ML models",
                "More complex to implement",
                "May over-provision resources"
            ],
            "use_case": "Predictable traffic patterns with known seasonality",
            "configuration": {
                "prediction_window": "1h",
                "scaling_buffer": 20,
                "min_prediction_confidence": 0.8
            }
        },
        "event_driven_scaling": {
            "description": "Scale based on external events or custom metrics",
            "metrics": ["queue_depth", "api_calls", "business_metrics"],
            "advantages": [
                "Responds to business logic rather than just resources",
                "Can handle complex scaling scenarios",
                "Integrates with external systems"
            ],
            "disadvantages": [
                "Requires custom development",
                "May be overkill for simple applications",
                "Debugging can be complex"
            ],
            "use_case": "Complex applications with specific scaling requirements",
            "configuration": {
                "custom_metrics": ["sentiment_requests_per_minute"],
                "scaling_policies": ["step_scaling", "target_tracking"],
                "cooldown_period": "5m"
            }
        }
    }
    
    return scaling_analysis

# Performance optimization recommendations
def get_performance_optimization():
    """Get performance optimization recommendations."""
    
    optimizations = {
        "model_optimization": {
            "techniques": [
                "Use ONNX Runtime for faster inference",
                "Implement model quantization",
                "Use model distillation for smaller models",
                "Implement model versioning and A/B testing"
            ],
            "expected_improvement": "2-3x faster inference",
            "complexity": "Medium"
        },
        "caching_strategies": {
            "techniques": [
                "Implement LRU cache for predictions",
                "Use Redis for distributed caching",
                "Cache model artifacts in memory",
                "Implement response compression"
            ],
            "expected_improvement": "10-50% faster response times",
            "complexity": "Low"
        },
        "infrastructure_optimization": {
            "techniques": [
                "Use GPU instances for inference",
                "Implement horizontal pod autoscaling",
                "Use spot instances for cost optimization",
                "Implement multi-region deployment"
            ],
            "expected_improvement": "3-5x throughput improvement",
            "complexity": "High"
        },
        "api_optimization": {
            "techniques": [
                "Implement async request processing",
                "Use connection pooling",
                "Implement rate limiting",
                "Optimize JSON serialization"
            ],
            "expected_improvement": "20-40% reduced latency",
            "complexity": "Medium"
        }
    }
    
    return optimizations

# Display scaling analysis
print("📊 Scaling Strategies Analysis:")
print("=" * 60)

scaling_strategies = analyze_scaling_strategies()

for strategy_name, strategy_info in scaling_strategies.items():
    print(f"\n🔄 {strategy_name.replace('_', ' ').title()}:")
    print(f"   📝 {strategy_info['description']}")
    print(f"   🎯 Use Case: {strategy_info['use_case']}")
    
    print(f"   ✅ Advantages:")
    for advantage in strategy_info['advantages'][:2]:  # Show first 2
        print(f"      • {advantage}")
    
    print(f"   ⚙️ Key Configuration:")
    for key, value in list(strategy_info['configuration'].items())[:3]:  # Show first 3
        print(f"      • {key}: {value}")

# Performance optimization
print("\n⚡ Performance Optimization Recommendations:")
print("=" * 50)

optimizations = get_performance_optimization()

for opt_name, opt_info in optimizations.items():
    print(f"\n🔧 {opt_name.replace('_', ' ').title()}:")
    print(f"   📈 Expected Improvement: {opt_info['expected_improvement']}")
    print(f"   🎚️ Complexity: {opt_info['complexity']}")
    
    print(f"   🛠️ Key Techniques:")
    for technique in opt_info['techniques'][:3]:  # Show first 3
        print(f"      • {technique}")

# Create scaling strategy comparison
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Scaling Strategy Comparison', fontsize=16, fontweight='bold')

# Scaling speed comparison
strategies = list(scaling_strategies.keys())
scaling_speed = [3, 8, 2, 1]  # Relative scaling speed (1-10 scale)
cost_efficiency = [8, 6, 7, 5]  # Cost efficiency (1-10 scale)
complexity = [3, 7, 9, 8]  # Implementation complexity (1-10 scale)
reliability = [9, 8, 6, 7]  # Reliability (1-10 scale)

axes[0, 0].bar(strategies, scaling_speed, color='skyblue', alpha=0.7)
axes[0, 0].set_title('Scaling Speed')
axes[0, 0].set_ylabel('Speed Score (1-10)')
axes[0, 0].set_xticklabels(strategies, rotation=45, ha='right')
axes[0, 0].grid(True, alpha=0.3)

axes[0, 1].bar(strategies, cost_efficiency, color='lightgreen', alpha=0.7)
axes[0, 1].set_title('Cost Efficiency')
axes[0, 1].set_ylabel('Efficiency Score (1-10)')
axes[0, 1].set_xticklabels(strategies, rotation=45, ha='right')
axes[0, 1].grid(True, alpha=0.3)

axes[1, 0].bar(strategies, complexity, color='orange', alpha=0.7)
axes[1, 0].set_title('Implementation Complexity')
axes[1, 0].set_ylabel('Complexity Score (1-10)')
axes[1, 0].set_xticklabels(strategies, rotation=45, ha='right')
axes[1, 0].grid(True, alpha=0.3)

axes[1, 1].bar(strategies, reliability, color='lightcoral', alpha=0.7)
axes[1, 1].set_title('Reliability')
axes[1, 1].set_ylabel('Reliability Score (1-10)')
axes[1, 1].set_xticklabels(strategies, rotation=45, ha='right')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Scaling recommendations
scaling_recommendations = {
    "small_startup": {
        "strategy": "horizontal_pod_autoscaling",
        "justification": "Simple, cost-effective, handles basic scaling needs",
        "expected_cost_savings": "20-30%"
    },
    "growing_business": {
        "strategy": "cluster_autoscaling + hpa",
        "justification": "Handles variable traffic, optimizes infrastructure costs",
        "expected_cost_savings": "30-40%"
    },
    "enterprise_large_scale": {
        "strategy": "predictive_scaling + cluster_autoscaling",
        "justification": "Proactive scaling for predictable patterns, maximum efficiency",
        "expected_cost_savings": "40-50%"
    },
    "specialized_use_case": {
        "strategy": "event_driven_scaling",
        "justification": "Custom business logic integration, maximum flexibility",
        "expected_cost_savings": "35-45%"
    }
}

print("\n🎯 Scaling Strategy Recommendations:")
print("=" * 50)

for use_case, recommendation in scaling_recommendations.items():
    print(f"\n🏢 {use_case.replace('_', ' ').title()}:")
    print(f"   🔄 Recommended Strategy: {recommendation['strategy']}")
    print(f"   💡 Justification: {recommendation['justification']}")
    print(f"   💰 Expected Cost Savings: {recommendation['expected_cost_savings']}")

print("\n✅ Scaling Analysis Complete!")
print("\n💡 Scaling Best Practices:")
print("   • Start with horizontal pod autoscaling")
print("   • Monitor resource utilization continuously")
print("   • Implement gradual rollout strategies")
print("   • Use multiple metrics for scaling decisions")
print("   • Test scaling behavior under load")
print("   • Monitor cost implications of scaling decisions")

## 🔒 Production Readiness Checklist

Let's create a comprehensive production readiness checklist.

In [None]:
# Production readiness checklist
def create_production_readiness_checklist():
    """Create a comprehensive production readiness checklist."""
    
    checklist = {
        "infrastructure": {
            "category": "Infrastructure",
            "items": {
                "kubernetes_cluster": {
                    "description": "Production-ready Kubernetes cluster with proper networking",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "kubectl cluster-info && kubectl get nodes"
                },
                "monitoring_stack": {
                    "description": "Prometheus + Grafana monitoring stack deployed",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "kubectl get pods -n monitoring"
                },
                "load_balancer": {
                    "description": "Load balancer configured for external access",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "kubectl get svc -n ingress"
                },
                "backup_system": {
                    "description": "Automated backup system for persistent data",
                    "priority": "High",
                    "status": "Recommended",
                    "verification": "Check backup job schedules and test restores"
                }
            }
        },
        "security": {
            "category": "Security",
            "items": {
                "secret_management": {
                    "description": "Secrets stored securely (not in code/config)",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "kubectl get secrets && check for hardcoded secrets"
                },
                "network_policies": {
                    "description": "Kubernetes network policies restrict pod communication",
                    "priority": "High",
                    "status": "Required",
                    "verification": "kubectl get networkpolicies"
                },
                "rbac_configured": {
                    "description": "Role-based access control properly configured",
                    "priority": "High",
                    "status": "Required",
                    "verification": "kubectl get clusterroles,clusterrolebindings"
                },
                "security_scanning": {
                    "description": "Regular security scanning of containers and dependencies",
                    "priority": "High",
                    "status": "Required",
                    "verification": "Check CI/CD security scan results"
                }
            }
        },
        "reliability": {
            "category": "Reliability",
            "items": {
                "health_checks": {
                    "description": "Liveness and readiness probes configured",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "kubectl describe deployment | grep -A5 Probes"
                },
                "resource_limits": {
                    "description": "CPU and memory limits set for all containers",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "kubectl get pods -o=jsonpath='{.items[*].spec.containers[*].resources}'"
                },
                "autoscaling": {
                    "description": "Horizontal Pod Autoscaler configured",
                    "priority": "High",
                    "status": "Required",
                    "verification": "kubectl get hpa"
                },
                "disaster_recovery": {
                    "description": "Disaster recovery plan documented and tested",
                    "priority": "High",
                    "status": "Recommended",
                    "verification": "Check DR documentation and test results"
                }
            }
        },
        "performance": {
            "category": "Performance",
            "items": {
                "load_testing": {
                    "description": "Load testing completed with acceptable performance",
                    "priority": "High",
                    "status": "Required",
                    "verification": "Check load test results and performance metrics"
                },
                "caching_strategy": {
                    "description": "Caching strategy implemented and tested",
                    "priority": "Medium",
                    "status": "Recommended",
                    "verification": "Check cache hit rates and performance improvement"
                },
                "optimization_applied": {
                    "description": "Performance optimizations applied (ONNX, etc.)",
                    "priority": "Medium",
                    "status": "Recommended",
                    "verification": "Compare performance before/after optimization"
                }
            }
        },
        "observability": {
            "category": "Observability",
            "items": {
                "structured_logging": {
                    "description": "Structured JSON logging with correlation IDs",
                    "priority": "High",
                    "status": "Required",
                    "verification": "Check log format and correlation ID usage"
                },
                "alerting_rules": {
                    "description": "Comprehensive alerting rules configured",
                    "priority": "High",
                    "status": "Required",
                    "verification": "kubectl get prometheusrules && check alert definitions"
                },
                "dashboards_created": {
                    "description": "Grafana dashboards created and populated",
                    "priority": "Medium",
                    "status": "Recommended",
                    "verification": "Access Grafana dashboards and verify metrics display"
                },
                "tracing_enabled": {
                    "description": "Distributed tracing enabled for request tracking",
                    "priority": "Medium",
                    "status": "Optional",
                    "verification": "Check OpenTelemetry configuration"
                }
            }
        },
        "operations": {
            "category": "Operations",
            "items": {
                "ci_cd_pipeline": {
                    "description": "Complete CI/CD pipeline with automated deployment",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "Check GitHub Actions workflow and deployment history"
                },
                "rollback_plan": {
                    "description": "Rollback plan documented and tested",
                    "priority": "Critical",
                    "status": "Required",
                    "verification": "Check rollback documentation and test results"
                },
                "documentation": {
                    "description": "Complete operational documentation available",
                    "priority": "High",
                    "status": "Required",
                    "verification": "Check docs/ directory and runbooks"
                },
                "on_call_rotation": {
                    "description": "On-call rotation and incident response documented",
                    "priority": "High",
                    "status": "Recommended",
                    "verification": "Check incident response documentation"
                }
            }
        }
    }
    
    return checklist

# Display production readiness checklist
print("✅ Production Readiness Checklist:")
print("=" * 60)

checklist = create_production_readiness_checklist()

total_items = 0
critical_items = 0
required_items = 0

for category_name, category_info in checklist.items():
    print(f"\n📋 {category_info['category']}:")
    print("-" * 40)
    
    for item_name, item_info in category_info['items'].items():
        total_items += 1
        
        if item_info['priority'] == 'Critical':
            critical_items += 1
        
        if item_info['status'] == 'Required':
            required_items += 1
        
        priority_icon = "🔴" if item_info['priority'] == 'Critical' else "🟠" if item_info['priority'] == 'High' else "🟢"
        status_icon = "✅" if item_info['status'] == 'Required' else "⚠️" if item_info['status'] == 'Recommended' else "ℹ️"
        
        print(f"   {priority_icon}{status_icon} {item_name.replace('_', ' ').title()}")
        print(f"      📝 {item_info['description']}")
        print(f"      🔍 Verification: {item_info['verification']}")

print(f"\n📊 Checklist Summary:")
print(f"   📋 Total Items: {total_items}")
print(f"   🔴 Critical Items: {critical_items}")
print(f"   ✅ Required Items: {required_items}")
print(f"   ⚠️ Recommended Items: {total_items - required_items}")

# Create checklist visualization
fig, ax = plt.subplots(figsize=(12, 8))

# Count items by category and priority
categories = []
critical_counts = []
high_counts = []
medium_counts = []

for category_name, category_info in checklist.items():
    categories.append(category_info['category'])
    
    critical = sum(1 for item in category_info['items'].values() if item['priority'] == 'Critical')
    high = sum(1 for item in category_info['items'].values() if item['priority'] == 'High')
    medium = sum(1 for item in category_info['items'].values() if item['priority'] == 'Medium')
    
    critical_counts.append(critical)
    high_counts.append(high)
    medium_counts.append(medium)

x = np.arange(len(categories))
width = 0.25

ax.bar(x - width, critical_counts, width, label='Critical', color='red', alpha=0.7)
ax.bar(x, high_counts, width, label='High', color='orange', alpha=0.7)
ax.bar(x + width, medium_counts, width, label='Medium', color='green', alpha=0.7)

ax.set_xlabel('Category')
ax.set_ylabel('Number of Items')
ax.set_title('Production Readiness Checklist by Category and Priority')
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=45, ha='right')
ax.legend()
ax.grid(True, alpha=0.3)

# Add value labels on bars
for i, (crit, high, med) in enumerate(zip(critical_counts, high_counts, medium_counts)):
    if crit > 0:
        ax.text(i - width, crit + 0.1, str(crit), ha='center', va='bottom')
    if high > 0:
        ax.text(i, high + 0.1, str(high), ha='center', va='bottom')
    if med > 0:
        ax.text(i + width, med + 0.1, str(med), ha='center', va='bottom')

plt.tight_layout()
plt.show()

print("\n🚀 Deployment Checklist Complete!")
print("\n💡 Production Deployment Best Practices:")
print("   • Use this checklist before going live")
print("   • Automate verification where possible")
print("   • Have rollback plans for all changes")
print("   • Monitor closely after deployment")
print("   • Document all production procedures")
print("   • Regular security and performance audits")
print("\n🎯 Remember: Production readiness is about confidence, not perfection!")
print("\n🏁 KubeSentiment Deployment Guide Complete!")
print("\n📚 Additional Resources:")
print("   • Kubernetes documentation: https://kubernetes.io/docs/")
print("   • Helm charts: https://helm.sh/docs/")
print("   • Terraform: https://www.terraform.io/docs/")
print("   • Production readiness: https://landing.google.com/sre/books/")
print("\n🎉 Ready to deploy KubeSentiment to production!")