In [4]:
!pip install boto3 sagemaker scikit-learn pandas numpy joblib matplotlib seaborn -q


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m95.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m418.2/418.2 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.3/82.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
# AWS Model Deployment - Google Colab Compatible Examples
# Module 9: Model Deployment on AWS Cloud

# =============================================================================
# 1. Install Required Packages
# =============================================================================
# Import all required libraries
import boto3
import pandas as pd
import numpy as np
import pickle
import joblib
from io import BytesIO, StringIO
import json
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

print("✅ All packages installed successfully!")

# =============================================================================
# 2. Mock AWS Credentials Setup (For Learning Purposes)
# =============================================================================

class MockS3Client:
    """Mock S3 client for demonstration without real AWS credentials"""
    def __init__(self):
        self.buckets = {}
        print("🔧 Mock S3 Client initialized")

    def create_bucket(self, Bucket, **kwargs):
        self.buckets[Bucket] = {}
        print(f"✅ Mock bucket '{Bucket}' created")
        return {"ResponseMetadata": {"HTTPStatusCode": 200}}

    def put_object(self, Bucket, Key, Body):
        if Bucket not in self.buckets:
            self.buckets[Bucket] = {}
        self.buckets[Bucket][Key] = Body
        print(f"✅ Object uploaded to s3://{Bucket}/{Key}")
        return {"ResponseMetadata": {"HTTPStatusCode": 200}}

    def get_object(self, Bucket, Key):
        if Bucket in self.buckets and Key in self.buckets[Bucket]:
            return {"Body": StringIO(self.buckets[Bucket][Key])}
        else:
            raise Exception(f"Object s3://{Bucket}/{Key} not found")

    def list_objects_v2(self, Bucket, **kwargs):
        if Bucket in self.buckets:
            contents = [{"Key": key} for key in self.buckets[Bucket].keys()]
            return {"Contents": contents}
        return {"Contents": []}

# Initialize mock S3 client
s3_client = MockS3Client()

# =============================================================================
# 3. S3 Data Storage Examples (Mock Implementation)
# =============================================================================

def create_sample_dataset():
    """Create sample dataset for demonstration"""
    print("📊 Creating sample dataset...")

    # Generate synthetic classification dataset
    X, y = make_classification(
        n_samples=1000,
        n_features=4,
        n_classes=2,
        n_informative=3,
        random_state=42
    )

    # Create DataFrame
    df = pd.DataFrame(X, columns=['feature1', 'feature2', 'feature3', 'feature4'])
    df['target'] = y

    print(f"✅ Dataset created: {df.shape[0]} rows, {df.shape[1]} columns")
    print("\n📈 Dataset Info:")
    print(df.info())
    print("\n📊 Target Distribution:")
    print(df['target'].value_counts())

    # Visualize dataset
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    fig.suptitle('Dataset Overview', fontsize=16)

    # Feature distributions
    for i, col in enumerate(['feature1', 'feature2', 'feature3', 'feature4']):
        ax = axes[i//2, i%2]
        df.boxplot(column=col, by='target', ax=ax)
        ax.set_title(f'{col} by Target')

    # Correlation heatmap
    ax = axes[1, 2]
    correlation_matrix = df.corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', ax=ax)
    ax.set_title('Feature Correlation')

    plt.tight_layout()
    plt.show()

    return df

def upload_csv_to_s3_mock(df, bucket_name, key):
    """Upload DataFrame to mock S3 as CSV"""
    csv_content = df.to_csv(index=False)
    s3_client.put_object(Bucket=bucket_name, Key=key, Body=csv_content)
    return f"s3://{bucket_name}/{key}"

def upload_model_to_s3_mock(model, bucket_name, key):
    """Upload trained model to mock S3"""
    model_buffer = BytesIO()
    joblib.dump(model, model_buffer)
    model_content = model_buffer.getvalue()
    s3_client.put_object(Bucket=bucket_name, Key=key, Body=model_content)
    return f"s3://{bucket_name}/{key}"

def download_csv_from_s3_mock(bucket_name, key):
    """Download CSV from mock S3"""
    try:
        obj = s3_client.get_object(Bucket=bucket_name, Key=key)
        df = pd.read_csv(obj['Body'])
        print(f"✅ Downloaded CSV from s3://{bucket_name}/{key}")
        return df
    except Exception as e:
        print(f"❌ Error downloading CSV: {e}")
        return None

def download_model_from_s3_mock(bucket_name, key):
    """Download model from mock S3"""
    try:
        # For demonstration, we'll return a newly trained model
        print(f"✅ Downloaded model from s3://{bucket_name}/{key}")
        # Return a simple model for demo
        X, y = make_classification(n_samples=100, n_features=4, random_state=42)
        model = RandomForestClassifier(random_state=42)
        model.fit(X, y)
        return model
    except Exception as e:
        print(f"❌ Error downloading model: {e}")
        return None

# =============================================================================
# 4. Model Training and Evaluation
# =============================================================================

def train_and_evaluate_model(df):
    """Train and evaluate machine learning model"""
    print("🤖 Training machine learning model...")

    # Prepare data
    X = df[['feature1', 'feature2', 'feature3', 'feature4']]
    y = df['target']

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    print(f"📊 Training set: {X_train.shape[0]} samples")
    print(f"📊 Test set: {X_test.shape[0]} samples")

    # Train model
    model = RandomForestClassifier(
        n_estimators=100,
        max_depth=10,
        random_state=42
    )

    model.fit(X_train, y_train)
    print("✅ Model training completed")

    # Make predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)

    print(f"\n📈 Model Performance:")
    print(f"Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)

    print("\n🎯 Feature Importance:")
    print(feature_importance)

    # Visualize results
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))

    # Feature importance plot
    axes[0].barh(feature_importance['feature'], feature_importance['importance'])
    axes[0].set_title('Feature Importance')
    axes[0].set_xlabel('Importance')

    # Prediction distribution
    axes[1].hist(y_pred_proba[:, 1], bins=20, alpha=0.7, edgecolor='black')
    axes[1].set_title('Prediction Probability Distribution')
    axes[1].set_xlabel('Probability of Class 1')
    axes[1].set_ylabel('Frequency')

    plt.tight_layout()
    plt.show()

    return model, accuracy

# =============================================================================
# 5. AWS EC2 Simulation (Mock Implementation)
# =============================================================================

class MockEC2:
    """Mock EC2 service for demonstration"""
    def __init__(self):
        self.instances = {}
        print("🔧 Mock EC2 service initialized")

    def create_instances(self, **kwargs):
        instance_id = f"i-{np.random.randint(100000, 999999)}"
        self.instances[instance_id] = {
            'ImageId': kwargs.get('ImageId', 'ami-12345678'),
            'InstanceType': kwargs.get('InstanceType', 'ml.m5.large'),
            'State': 'running'
        }
        print(f"✅ EC2 Instance created: {instance_id}")
        print(f"   Instance Type: {kwargs.get('InstanceType')}")
        print(f"   Image ID: {kwargs.get('ImageId')}")
        return [MockInstance(instance_id)]

class MockInstance:
    def __init__(self, instance_id):
        self.id = instance_id

def simulate_ec2_deep_learning_setup():
    """Simulate EC2 deep learning instance setup"""
    print("🚀 Simulating EC2 Deep Learning Instance Setup")
    print("="*50)

    ec2 = MockEC2()

    # Simulate instance creation
    instances = ec2.create_instances(
        ImageId='ami-0c02fb55956c7d316',  # Deep Learning AMI
        InstanceType='p3.2xlarge',  # GPU instance
        MinCount=1,
        MaxCount=1
    )

    instance_id = instances[0].id

    # Simulate training script execution
    print(f"\n📝 Executing training script on instance {instance_id}...")
    print("   - Installing dependencies...")
    print("   - Downloading data from S3...")
    print("   - Training model...")
    print("   - Uploading results to S3...")
    print("✅ Training completed successfully!")

    return instance_id

# =============================================================================
# 6. SageMaker Simulation (Mock Implementation)
# =============================================================================

class MockSageMaker:
    """Mock SageMaker service for demonstration"""
    def __init__(self):
        self.training_jobs = {}
        self.endpoints = {}
        print("🔧 Mock SageMaker service initialized")

    def create_training_job(self, job_name, algorithm_spec, input_data):
        job_id = f"sm-training-{np.random.randint(1000, 9999)}"
        self.training_jobs[job_id] = {
            'JobName': job_name,
            'JobStatus': 'Completed',
            'AlgorithmSpecification': algorithm_spec,
            'ModelArtifacts': f's3://sagemaker-models/{job_id}/model.tar.gz'
        }
        print(f"✅ SageMaker training job created: {job_id}")
        return job_id

    def create_endpoint(self, endpoint_name, model_name):
        endpoint_id = f"sm-endpoint-{np.random.randint(1000, 9999)}"
        self.endpoints[endpoint_id] = {
            'EndpointName': endpoint_name,
            'EndpointStatus': 'InService',
            'ModelName': model_name
        }
        print(f"✅ SageMaker endpoint created: {endpoint_id}")
        return endpoint_id

def simulate_sagemaker_training():
    """Simulate SageMaker training job"""
    print("🎯 Simulating SageMaker Training Job")
    print("="*40)

    sagemaker = MockSageMaker()

    # Create training job
    job_id = sagemaker.create_training_job(
        job_name='ml-classification-training',
        algorithm_spec={
            'TrainingImage': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3'
        },
        input_data='s3://my-bucket/training-data/'
    )

    print(f"📊 Training Progress:")
    print("   [████████████████████] 100% Complete")
    print("   - Data preprocessing: ✅")
    print("   - Model training: ✅")
    print("   - Model evaluation: ✅")
    print("   - Artifact upload: ✅")

    return job_id

def simulate_sagemaker_deployment(model_name):
    """Simulate SageMaker model deployment"""
    print("🚀 Simulating SageMaker Model Deployment")
    print("="*42)

    sagemaker = MockSageMaker()

    # Create endpoint
    endpoint_id = sagemaker.create_endpoint(
        endpoint_name='ml-classification-endpoint',
        model_name=model_name
    )

    print(f"🌐 Endpoint Status: InService")
    print(f"🔗 Endpoint URL: https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/{endpoint_id}/invocations")

    return endpoint_id

def simulate_sagemaker_prediction(endpoint_id, test_data):
    """Simulate making predictions via SageMaker endpoint"""
    print("🔮 Simulating SageMaker Predictions")
    print("="*35)

    # Mock predictions
    predictions = np.random.choice([0, 1], size=len(test_data))
    probabilities = np.random.rand(len(test_data))

    results = pd.DataFrame({
        'prediction': predictions,
        'probability': probabilities
    })

    print(f"📊 Processed {len(test_data)} predictions:")
    print(results.head())

    return results

# =============================================================================
# 7. Hyperparameter Tuning Simulation
# =============================================================================

def simulate_hyperparameter_tuning():
    """Simulate SageMaker hyperparameter tuning"""
    print("🎛️ Simulating Hyperparameter Tuning Job")
    print("="*42)

    # Define hyperparameter space
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 10],
        'min_samples_split': [2, 5, 10]
    }

    print("🔧 Hyperparameter Search Space:")
    for param, values in param_grid.items():
        print(f"   {param}: {values}")

    # Simulate tuning results
    results = []
    for i in range(6):  # Simulate 6 training jobs
        config = {
            'n_estimators': np.random.choice(param_grid['n_estimators']),
            'max_depth': np.random.choice(param_grid['max_depth']),
            'min_samples_split': np.random.choice(param_grid['min_samples_split'])
        }
        accuracy = 0.8 + np.random.random() * 0.15  # Random accuracy between 0.8-0.95

        results.append({
            'job_id': f'tuning-job-{i+1:03d}',
            'accuracy': accuracy,
            **config
        })

    results_df = pd.DataFrame(results).sort_values('accuracy', ascending=False)

    print("\n📈 Tuning Results (Top 6 configurations):")
    print(results_df)

    best_config = results_df.iloc[0]
    print(f"\n🏆 Best Configuration:")
    print(f"   Accuracy: {best_config['accuracy']:.4f}")
    print(f"   Parameters: {dict(best_config.drop(['job_id', 'accuracy']))}")

    return best_config

# =============================================================================
# 8. AWS Lambda Simulation
# =============================================================================

def simulate_lambda_function(input_data):
    """Simulate AWS Lambda serverless inference"""
    print("⚡ Simulating AWS Lambda Serverless Inference")
    print("="*48)

    print("🔄 Lambda function execution:")
    print("   - Cold start: 250ms")
    print("   - Loading model from S3: 150ms")
    print("   - Making prediction: 50ms")
    print("   - Total execution time: 450ms")

    # Mock prediction
    prediction = np.random.choice([0, 1])
    probability = np.random.rand()

    response = {
        'statusCode': 200,
        'body': {
            'prediction': int(prediction),
            'probability': float(probability),
            'execution_time_ms': 450,
            'model_version': 'v1.2.3'
        }
    }

    print(f"✅ Lambda Response:")
    print(f"   Status Code: {response['statusCode']}")
    print(f"   Prediction: {response['body']['prediction']}")
    print(f"   Probability: {response['body']['probability']:.4f}")
    print(f"   Execution Time: {response['body']['execution_time_ms']}ms")

    return response

# =============================================================================
# 9. Complete ML Pipeline Demonstration
# =============================================================================

def run_complete_ml_pipeline():
    """Run complete ML pipeline demonstration"""
    print("🚀 COMPLETE AWS ML PIPELINE DEMONSTRATION")
    print("="*60)

    # Step 1: Create and explore dataset
    print("\n📊 STEP 1: Data Preparation")
    print("-" * 30)
    df = create_sample_dataset()

    # Step 2: Upload data to S3 (mock)
    print("\n☁️ STEP 2: Upload Data to S3")
    print("-" * 30)
    s3_client.create_bucket(Bucket='ml-demo-bucket')
    upload_csv_to_s3_mock(df, 'ml-demo-bucket', 'data/training_data.csv')

    # Step 3: Train model locally
    print("\n🤖 STEP 3: Model Training & Evaluation")
    print("-" * 38)
    model, accuracy = train_and_evaluate_model(df)

    # Step 4: Upload model to S3 (mock)
    print("\n📤 STEP 4: Upload Model to S3")
    print("-" * 28)
    upload_model_to_s3_mock(model, 'ml-demo-bucket', 'models/rf_model.pkl')

    # Step 5: EC2 training simulation
    print("\n🖥️ STEP 5: EC2 Deep Learning Setup")
    print("-" * 33)
    instance_id = simulate_ec2_deep_learning_setup()

    # Step 6: SageMaker training
    print("\n🎯 STEP 6: SageMaker Training")
    print("-" * 28)
    job_id = simulate_sagemaker_training()

    # Step 7: Hyperparameter tuning
    print("\n🎛️ STEP 7: Hyperparameter Tuning")
    print("-" * 31)
    best_config = simulate_hyperparameter_tuning()

    # Step 8: Model deployment
    print("\n🚀 STEP 8: Model Deployment")
    print("-" * 26)
    endpoint_id = simulate_sagemaker_deployment('optimized-rf-model')

    # Step 9: Make predictions
    print("\n🔮 STEP 9: Model Inference")
    print("-" * 24)
    test_data = df.sample(5)[['feature1', 'feature2', 'feature3', 'feature4']]
    predictions = simulate_sagemaker_prediction(endpoint_id, test_data)

    # Step 10: Lambda serverless inference
    print("\n⚡ STEP 10: Serverless Inference")
    print("-" * 30)
    lambda_response = simulate_lambda_function([1.5, 2.3, 0.8, 1.2])

    # Summary
    print("\n🎉 PIPELINE COMPLETION SUMMARY")
    print("="*40)
    print(f"✅ Dataset: {df.shape[0]} samples, {df.shape[1]-1} features")
    print(f"✅ Model Accuracy: {accuracy:.4f}")
    print(f"✅ S3 Objects: 2 (data + model)")
    print(f"✅ EC2 Instance: {instance_id}")
    print(f"✅ SageMaker Job: {job_id}")
    print(f"✅ Best HP Config: {best_config['accuracy']:.4f} accuracy")
    print(f"✅ Endpoint: {endpoint_id}")
    print(f"✅ Predictions: {len(predictions)} samples")
    print(f"✅ Lambda: 450ms response time")

    return {
        'dataset_size': df.shape[0],
        'model_accuracy': accuracy,
        'instance_id': instance_id,
        'job_id': job_id,
        'endpoint_id': endpoint_id,
        'predictions': predictions
    }

# =============================================================================
# 10. Monitoring and Cost Optimization
# =============================================================================

def simulate_monitoring_dashboard():
    """Simulate CloudWatch monitoring dashboard"""
    print("📊 AWS CloudWatch Monitoring Dashboard")
    print("="*45)

    # Generate mock metrics
    timestamps = pd.date_range('2024-01-01', periods=24, freq='H')
    metrics_data = {
        'timestamp': timestamps,
        'prediction_count': np.random.poisson(50, 24),
        'latency_ms': np.random.normal(200, 50, 24),
        'error_rate': np.random.beta(1, 99, 24) * 100,
        'cpu_utilization': np.random.normal(60, 15, 24)
    }

    metrics_df = pd.DataFrame(metrics_data)

    # Create monitoring plots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('AWS ML Model Monitoring Dashboard', fontsize=16)

    # Prediction volume
    axes[0, 0].plot(metrics_df['timestamp'], metrics_df['prediction_count'])
    axes[0, 0].set_title('Prediction Volume')
    axes[0, 0].set_ylabel('Predictions/Hour')
    axes[0, 0].tick_params(axis='x', rotation=45)

    # Response latency
    axes[0, 1].plot(metrics_df['timestamp'], metrics_df['latency_ms'], color='orange')
    axes[0, 1].set_title('Response Latency')
    axes[0, 1].set_ylabel('Latency (ms)')
    axes[0, 1].tick_params(axis='x', rotation=45)

    # Error rate
    axes[1, 0].plot(metrics_df['timestamp'], metrics_df['error_rate'], color='red')
    axes[1, 0].set_title('Error Rate')
    axes[1, 0].set_ylabel('Error Rate (%)')
    axes[1, 0].tick_params(axis='x', rotation=45)

    # CPU utilization
    axes[1, 1].plot(metrics_df['timestamp'], metrics_df['cpu_utilization'], color='green')
    axes[1, 1].set_title('CPU Utilization')
    axes[1, 1].set_ylabel('CPU %')
    axes[1, 1].tick_params(axis='x', rotation=45)

    plt.tight_layout()
    plt.show()

    # Print summary statistics
    print("\n📈 24-Hour Metrics Summary:")
    print(f"   Total Predictions: {metrics_df['prediction_count'].sum():,}")
    print(f"   Avg Latency: {metrics_df['latency_ms'].mean():.1f}ms")
    print(f"   Max Error Rate: {metrics_df['error_rate'].max():.2f}%")
    print(f"   Avg CPU Usage: {metrics_df['cpu_utilization'].mean():.1f}%")

    return metrics_df

def cost_optimization_analysis():
    """Analyze cost optimization opportunities"""
    print("💰 AWS Cost Optimization Analysis")
    print("="*40)

    # Mock cost data
    services = ['EC2', 'SageMaker', 'S3', 'Lambda', 'CloudWatch']
    monthly_costs = [250, 180, 15, 8, 12]

    cost_df = pd.DataFrame({
        'service': services,
        'monthly_cost': monthly_costs
    })

    # Cost breakdown visualization
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.pie(cost_df['monthly_cost'], labels=cost_df['service'], autopct='%1.1f%%')
    plt.title('Monthly AWS Costs by Service')

    plt.subplot(1, 2, 2)
    plt.bar(cost_df['service'], cost_df['monthly_cost'], color='skyblue')
    plt.title('Monthly Cost Breakdown')
    plt.ylabel('Cost ($)')
    plt.xticks(rotation=45)

    plt.tight_layout()
    plt.show()

    total_cost = cost_df['monthly_cost'].sum()
    print(f"\n💲 Total Monthly Cost: ${total_cost}")
    print("\n🔧 Optimization Recommendations:")
    print("   1. Use Spot Instances for EC2: Save up to 70%")
    print("   2. Enable SageMaker Auto Scaling: Save 20-40%")
    print("   3. Use S3 Intelligent Tiering: Save 10-20%")
    print("   4. Optimize Lambda memory allocation: Save 15%")

    return cost_df

# =============================================================================
# 11. Main Execution Function
# =============================================================================

def main():
    """Main function to run all demonstrations"""
    print("🎯 AWS ML DEPLOYMENT - GOOGLE COLAB DEMO")
    print("="*60)
    print("This notebook demonstrates AWS ML deployment concepts")
    print("using mock implementations that run in Google Colab")
    print("="*60)

    try:
        # Run complete pipeline
        results = run_complete_ml_pipeline()

        # Show monitoring dashboard
        print("\n" + "="*60)
        metrics_df = simulate_monitoring_dashboard()

        # Cost analysis
        print("\n" + "="*60)
        cost_df = cost_optimization_analysis()

        print("\n🎉 ALL DEMONSTRATIONS COMPLETED SUCCESSFULLY!")
        print("✅ You've learned about:")
        print("   • S3 data storage and management")
        print("   • EC2 deep learning instances")
        print("   • SageMaker training and deployment")
        print("   • Hyperparameter tuning")
        print("   • Serverless inference with Lambda")
        print("   • Model monitoring and cost optimization")

        return results, metrics_df, cost_df

    except Exception as e:
        print(f"❌ Error in demonstration: {e}")
        return None

# =============================================================================
# 12. Interactive Learning Functions
# =============================================================================

def aws_quiz():
    """Interactive quiz about AWS ML concepts"""
    print("🧠 AWS ML DEPLOYMENT QUIZ")
    print("="*30)

    questions = [
        {
            "question": "Which AWS service is best for storing training data?",
            "options": ["A) EC2", "B) S3", "C) Lambda", "D) RDS"],
            "answer": "B",
            "explanation": "S3 is designed for object storage and is perfect for ML datasets"
        },
        {
            "question": "What type of EC2 instance is best for deep learning?",
            "options": ["A) t2.micro", "B) m5.large", "C) p3.2xlarge", "D) c5.large"],
            "answer": "C",
            "explanation": "p3 instances have GPUs optimized for machine learning workloads"
        },
        {
            "question": "Which service provides managed ML model training?",
            "options": ["A) EC2", "B) Lambda", "C) S3", "D) SageMaker"],
            "answer": "D",
            "explanation": "SageMaker is AWS's managed machine learning service"
        }
    ]

    score = 0
    for i, q in enumerate(questions, 1):
        print(f"\n❓ Question {i}: {q['question']}")
        for option in q['options']:
            print(f"   {option}")

        # In a real interactive environment, you'd get user input
        # For demo, we'll show the answer
        print(f"✅ Correct Answer: {q['answer']}")
        print(f"💡 Explanation: {q['explanation']}")
        score += 1

    print(f"\n🎉 Quiz Complete! Score: {score}/{len(questions)}")

# Run the main demonstration
if __name__ == "__main__":
    # Execute all demonstrations
    results = main()

    # Run quiz
    print("\n" + "="*60)
    aws_quiz()

    print("\n🎓 LEARNING COMPLETE!")
    print("You now have hands-on experience with AWS ML deployment concepts!")

✅ All packages installed successfully!
🔧 Mock S3 Client initialized
🎯 AWS ML DEPLOYMENT - GOOGLE COLAB DEMO
This notebook demonstrates AWS ML deployment concepts
using mock implementations that run in Google Colab
🚀 COMPLETE AWS ML PIPELINE DEMONSTRATION

📊 STEP 1: Data Preparation
------------------------------
📊 Creating sample dataset...
❌ Error in demonstration: Number of informative, redundant and repeated features must sum to less than the number of total features

🧠 AWS ML DEPLOYMENT QUIZ

❓ Question 1: Which AWS service is best for storing training data?
   A) EC2
   B) S3
   C) Lambda
   D) RDS
✅ Correct Answer: B
💡 Explanation: S3 is designed for object storage and is perfect for ML datasets

❓ Question 2: What type of EC2 instance is best for deep learning?
   A) t2.micro
   B) m5.large
   C) p3.2xlarge
   D) c5.large
✅ Correct Answer: C
💡 Explanation: p3 instances have GPUs optimized for machine learning workloads

❓ Question 3: Which service provides managed ML model train