In [1]:
#!/usr/bin/env python
# coding: utf-8
"""
Validation and Dashboard for Traffic Accident Predictions
Part 1: Validate outputs
Part 2: Create interactive web dashboard
"""

import pandas as pd
import numpy as np
from pathlib import Path
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
# ============================================
# PART 1: VALIDATION OF OUTPUTS
# ============================================

print("="*70)
print("VALIDATING MODEL OUTPUTS")
print("="*70)

# Configuration
DIR_MODELS = Path("models")
DIR_GOLD = Path("gold_local")

# 1. Check if prediction files exist
def validate_outputs():
    """Validate all expected outputs from the pipeline"""
    
    validation_results = {
        "files_check": {},
        "data_quality": {},
        "prediction_sanity": {},
        "warnings": []
    }
    
    # Expected files
    expected_files = {
        "predictions": DIR_MODELS / "predictions_best_model",
        "future_predictions": DIR_MODELS / "future_predictions_2024_2025",
        "validation_report": DIR_MODELS / "validation_report.json",
        "quality_checks": DIR_MODELS / "quality_checks.json",
        "final_report": DIR_MODELS / "final_report.txt"
    }
    
    # Check files exist
    print("\n1. CHECKING OUTPUT FILES:")
    print("-" * 40)
    for name, path in expected_files.items():
        if path.exists():
            if path.is_dir():
                # Check for CSV files in directory
                csv_files = list(path.glob("*.csv"))
                if csv_files:
                    validation_results["files_check"][name] = "✅ Found"
                    print(f"  {name}: ✅ Found ({len(csv_files)} files)")
                else:
                    validation_results["files_check"][name] = "❌ Empty directory"
                    print(f"  {name}: ❌ Empty directory")
            else:
                validation_results["files_check"][name] = "✅ Found"
                print(f"  {name}: ✅ Found")
        else:
            validation_results["files_check"][name] = "❌ Missing"
            validation_results["warnings"].append(f"Missing: {name}")
            print(f"  {name}: ❌ Missing")
    
    # Load and validate predictions if they exist
    pred_path = DIR_MODELS / "predictions_best_model"
    if pred_path.exists():
        csv_files = list(pred_path.glob("*.csv"))
        if csv_files:
            print("\n2. VALIDATING PREDICTIONS:")
            print("-" * 40)
            
            # Load predictions
            df_pred = pd.read_csv(csv_files[0])
            
            # Basic statistics
            print(f"  Total predictions: {len(df_pred)}")
            print(f"  Years covered: {df_pred['year'].min()} - {df_pred['year'].max()}")
            print(f"  Regions: {df_pred['region'].nunique()}")
            
            # Check for anomalies
            if 'prediction' in df_pred.columns and 'siniestros_total__total' in df_pred.columns:
                df_pred['error_pct'] = abs(df_pred['prediction'] - df_pred['siniestros_total__total']) / df_pred['siniestros_total__total'] * 100
                
                print(f"\n  Prediction Quality:")
                print(f"    - Mean error: {df_pred['error_pct'].mean():.1f}%")
                print(f"    - Median error: {df_pred['error_pct'].median():.1f}%")
                print(f"    - Within 20% error: {(df_pred['error_pct'] <= 20).sum()} / {len(df_pred)} ({(df_pred['error_pct'] <= 20).mean()*100:.1f}%)")
                print(f"    - Within 50% error: {(df_pred['error_pct'] <= 50).sum()} / {len(df_pred)} ({(df_pred['error_pct'] <= 50).mean()*100:.1f}%)")
                
                # Check for extreme predictions
                extreme_over = df_pred[df_pred['prediction'] > df_pred['siniestros_total__total'] * 5]
                if not extreme_over.empty:
                    validation_results["warnings"].append(f"Found {len(extreme_over)} predictions >5x actual")
                    print(f"\n  ⚠️  Warning: {len(extreme_over)} predictions are >5x the actual value")
                    print(f"     Regions affected: {extreme_over['region'].unique()[:5].tolist()}")
                
                # Check Lima specifically
                lima_pred = df_pred[df_pred['region'] == 'LIMA']
                if not lima_pred.empty:
                    print(f"\n  Lima Analysis (outlier region):")
                    print(f"    - Mean actual: {lima_pred['siniestros_total__total'].mean():.0f}")
                    print(f"    - Mean predicted: {lima_pred['prediction'].mean():.0f}")
                    print(f"    - Mean error: {lima_pred['error_pct'].mean():.1f}%")
                    
                    if lima_pred['error_pct'].mean() > 100:
                        validation_results["warnings"].append("Lima predictions have >100% average error")
                
                validation_results["data_quality"]["prediction_accuracy"] = {
                    "mean_error_pct": df_pred['error_pct'].mean(),
                    "within_20pct": (df_pred['error_pct'] <= 20).mean() * 100,
                    "within_50pct": (df_pred['error_pct'] <= 50).mean() * 100
                }
    
    # Check future predictions
    future_path = DIR_MODELS / "future_predictions_2024_2025"
    if future_path.exists():
        csv_files = list(future_path.glob("*.csv"))
        if csv_files:
            print("\n3. VALIDATING FUTURE PREDICTIONS:")
            print("-" * 40)
            
            df_future = pd.read_csv(csv_files[0])
            print(f"  Predictions for {df_future['year'].nunique()} years")
            print(f"  Regions covered: {df_future['region'].nunique()}")
            
            # Check for reasonable values
            if 'predicted_accidents' in df_future.columns:
                print(f"\n  Range of predictions:")
                print(f"    - Min: {df_future['predicted_accidents'].min():.0f}")
                print(f"    - Max: {df_future['predicted_accidents'].max():.0f}")
                print(f"    - Mean: {df_future['predicted_accidents'].mean():.0f}")
                
                # Check for negative or zero predictions
                invalid = df_future[df_future['predicted_accidents'] <= 0]
                if not invalid.empty:
                    validation_results["warnings"].append(f"Found {len(invalid)} invalid future predictions (<=0)")
                    print(f"  ⚠️  Warning: {len(invalid)} predictions are <= 0")
    
    # Load and check validation report
    report_path = DIR_MODELS / "validation_report.json"
    if report_path.exists():
        with open(report_path, 'r') as f:
            val_report = json.load(f)
        
        print("\n4. MODEL PERFORMANCE SUMMARY:")
        print("-" * 40)
        if 'model_performance' in val_report:
            for model, metrics in val_report['model_performance'].items():
                print(f"  {model}:")
                print(f"    - RMSE: {metrics.get('RMSE', 'N/A')}")
                print(f"    - MAE: {metrics.get('MAE', 'N/A')}")
        
        if 'best_model' in val_report:
            print(f"\n  Best Model: {val_report['best_model'].get('name', 'Unknown')}")
    
    # Summary
    print("\n" + "="*70)
    print("VALIDATION SUMMARY")
    print("="*70)
    
    if validation_results["warnings"]:
        print("\n⚠️  WARNINGS:")
        for warning in validation_results["warnings"]:
            print(f"  - {warning}")
    else:
        print("\n✅ All validations passed successfully!")
    
    return validation_results

# Run validation
validation_results = validate_outputs()


VALIDATING MODEL OUTPUTS

1. CHECKING OUTPUT FILES:
----------------------------------------
  predictions: ✅ Found (1 files)
  future_predictions: ✅ Found (1 files)
  validation_report: ✅ Found
  quality_checks: ✅ Found
  final_report: ✅ Found

2. VALIDATING PREDICTIONS:
----------------------------------------
  Total predictions: 54
  Years covered: 2022 - 2023
  Regions: 27

  Prediction Quality:
    - Mean error: 35.1%
    - Median error: 3.9%
    - Within 20% error: 49 / 54 (90.7%)
    - Within 50% error: 53 / 54 (98.1%)

     Regions affected: ['HUANCAVELICA']

  Lima Analysis (outlier region):
    - Mean actual: 43053
    - Mean predicted: 35365
    - Mean error: 17.8%

3. VALIDATING FUTURE PREDICTIONS:
----------------------------------------
  Predictions for 2 years
  Regions covered: 28

  Range of predictions:
    - Min: 275
    - Max: 84952
    - Mean: 9895

4. MODEL PERFORMANCE SUMMARY:
----------------------------------------
  Poisson_GLM:
    - RMSE: 57338.31580930549

In [3]:

# ============================================
# PART 2: WEB DASHBOARD
# ============================================

print("\n" + "="*70)
print("CREATING WEB DASHBOARD")
print("="*70)

# Create HTML dashboard with interactive visualizations
html_content = """
<!DOCTYPE html>
<html lang="es">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Dashboard - Predicción de Siniestros de Tránsito Perú</title>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }
        
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            min-height: 100vh;
            padding: 20px;
        }
        
        .container {
            max-width: 1400px;
            margin: 0 auto;
        }
        
        .header {
            text-align: center;
            color: white;
            margin-bottom: 30px;
            padding: 30px;
            background: rgba(255,255,255,0.1);
            border-radius: 20px;
            backdrop-filter: blur(10px);
        }
        
        h1 {
            font-size: 2.5em;
            margin-bottom: 10px;
            text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
        }
        
        .subtitle {
            font-size: 1.2em;
            opacity: 0.9;
        }
        
        .dashboard-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
            gap: 20px;
            margin-bottom: 30px;
        }
        
        .card {
            background: white;
            border-radius: 15px;
            padding: 20px;
            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
            transition: transform 0.3s ease, box-shadow 0.3s ease;
        }
        
        .card:hover {
            transform: translateY(-5px);
            box-shadow: 0 15px 40px rgba(0,0,0,0.3);
        }
        
        .card h2 {
            color: #333;
            margin-bottom: 15px;
            border-bottom: 3px solid #667eea;
            padding-bottom: 10px;
        }
        
        .metric-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 15px;
            margin-bottom: 30px;
        }
        
        .metric-card {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            padding: 20px;
            border-radius: 10px;
            text-align: center;
            box-shadow: 0 5px 15px rgba(0,0,0,0.2);
        }
        
        .metric-value {
            font-size: 2.5em;
            font-weight: bold;
            margin: 10px 0;
        }
        
        .metric-label {
            font-size: 0.9em;
            opacity: 0.9;
            text-transform: uppercase;
            letter-spacing: 1px;
        }
        
        .chart-container {
            min-height: 400px;
            margin: 20px 0;
        }
        
        .controls {
            background: rgba(255,255,255,0.95);
            padding: 20px;
            border-radius: 10px;
            margin-bottom: 20px;
            display: flex;
            gap: 20px;
            flex-wrap: wrap;
            align-items: center;
        }
        
        .control-group {
            display: flex;
            flex-direction: column;
            gap: 5px;
        }
        
        label {
            font-weight: 600;
            color: #555;
            font-size: 0.9em;
        }
        
        select, input {
            padding: 8px 12px;
            border: 2px solid #e0e0e0;
            border-radius: 5px;
            font-size: 1em;
            transition: border-color 0.3s;
        }
        
        select:focus, input:focus {
            outline: none;
            border-color: #667eea;
        }
        
        button {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            border: none;
            padding: 10px 20px;
            border-radius: 5px;
            cursor: pointer;
            font-weight: 600;
            transition: transform 0.2s;
        }
        
        button:hover {
            transform: scale(1.05);
        }
        
        .alert {
            padding: 15px;
            border-radius: 10px;
            margin-bottom: 20px;
        }
        
        .alert-warning {
            background: #fff3cd;
            border-left: 5px solid #ffc107;
            color: #856404;
        }
        
        .alert-success {
            background: #d4edda;
            border-left: 5px solid #28a745;
            color: #155724;
        }
        
        .table-container {
            overflow-x: auto;
            margin: 20px 0;
        }
        
        table {
            width: 100%;
            border-collapse: collapse;
        }
        
        th, td {
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #e0e0e0;
        }
        
        th {
            background: #f8f9fa;
            font-weight: 600;
            color: #555;
        }
        
        tr:hover {
            background: #f8f9fa;
        }
        
        .loading {
            text-align: center;
            padding: 40px;
            color: #999;
        }
        
        .spinner {
            border: 4px solid #f3f3f3;
            border-top: 4px solid #667eea;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
            margin: 20px auto;
        }
        
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>🚦 Dashboard de Predicción de Siniestros</h1>
            <div class="subtitle">Sistema de ML para Prevención de Accidentes de Tránsito - Perú</div>
        </div>
        
        <div class="metric-grid" id="metrics">
            <div class="metric-card">
                <div class="metric-label">Precisión del Modelo</div>
                <div class="metric-value" id="accuracy">--</div>
            </div>
            <div class="metric-card">
                <div class="metric-label">Regiones Analizadas</div>
                <div class="metric-value" id="regions">25</div>
            </div>
            <div class="metric-card">
                <div class="metric-label">Años de Datos</div>
                <div class="metric-value" id="years">16</div>
            </div>
            <div class="metric-card">
                <div class="metric-label">Predicciones 2024</div>
                <div class="metric-value" id="pred2024">--</div>
            </div>
        </div>
        
        <div class="controls">
            <div class="control-group">
                <label for="yearSelect">Año:</label>
                <select id="yearSelect">
                    <option value="2022">2022</option>
                    <option value="2023">2023</option>
                    <option value="2024">2024 (Predicción)</option>
                    <option value="2025">2025 (Predicción)</option>
                </select>
            </div>
            <div class="control-group">
                <label for="regionSelect">Región:</label>
                <select id="regionSelect">
                    <option value="all">Todas las Regiones</option>
                    <option value="LIMA">Lima</option>
                    <option value="AREQUIPA">Arequipa</option>
                    <option value="CUSCO">Cusco</option>
                    <option value="PIURA">Piura</option>
                    <option value="LA LIBERTAD">La Libertad</option>
                </select>
            </div>
            <button onclick="updateCharts()">Actualizar Visualizaciones</button>
        </div>
        
        <div class="dashboard-grid">
            <div class="card">
                <h2>Tendencia Temporal</h2>
                <div id="timeSeriesChart" class="chart-container"></div>
            </div>
            
            <div class="card">
                <h2>Comparación por Región</h2>
                <div id="regionBarChart" class="chart-container"></div>
            </div>
            
            <div class="card">
                <h2>Precisión del Modelo</h2>
                <div id="errorChart" class="chart-container"></div>
            </div>
            
            <div class="card">
                <h2>Mapa de Calor Regional</h2>
                <div id="heatmapChart" class="chart-container"></div>
            </div>
        </div>
        
        <div class="card">
            <h2>Predicciones Detalladas</h2>
            <div class="table-container">
                <table id="predictionsTable">
                    <thead>
                        <tr>
                            <th>Región</th>
                            <th>Año</th>
                            <th>Valor Real</th>
                            <th>Predicción</th>
                            <th>Error (%)</th>
                            <th>Estado</th>
                        </tr>
                    </thead>
                    <tbody id="predictionsBody">
                        <tr>
                            <td colspan="6" class="loading">Cargando datos...</td>
                        </tr>
                    </tbody>
                </table>
            </div>
        </div>
        
        <div id="alerts"></div>
    </div>
    
    <script>
        // Sample data - Replace with actual data from CSV files
        let predictionData = [];
        let futureData = [];
        
        // Initialize dashboard
        $(document).ready(function() {
            loadData();
            updateCharts();
        });
        
        function loadData() {
            // This would normally load from your CSV files
            // For demo, using sample data
            predictionData = [
                {region: 'LIMA', year: 2022, actual: 41111, predicted: 38500, error: 6.3},
                {region: 'LIMA', year: 2023, actual: 44995, predicted: 42000, error: 6.7},
                {region: 'AREQUIPA', year: 2022, actual: 5017, predicted: 4800, error: 4.3},
                {region: 'AREQUIPA', year: 2023, actual: 5194, predicted: 5000, error: 3.7},
                {region: 'CUSCO', year: 2022, actual: 3365, predicted: 3200, error: 4.9},
                {region: 'CUSCO', year: 2023, actual: 3234, predicted: 3300, error: 2.0},
                {region: 'PIURA', year: 2022, actual: 3947, predicted: 3800, error: 3.7},
                {region: 'PIURA', year: 2023, actual: 3367, predicted: 3400, error: 1.0},
            ];
            
            futureData = [
                {region: 'LIMA', year: 2024, predicted: 46000},
                {region: 'LIMA', year: 2025, predicted: 47000},
                {region: 'AREQUIPA', year: 2024, predicted: 5300},
                {region: 'AREQUIPA', year: 2025, predicted: 5400},
                {region: 'CUSCO', year: 2024, predicted: 3400},
                {region: 'CUSCO', year: 2025, predicted: 3500},
                {region: 'PIURA', year: 2024, predicted: 3500},
                {region: 'PIURA', year: 2025, predicted: 3600},
            ];
            
            // Update metrics
            updateMetrics();
        }
        
        function updateMetrics() {
            // Calculate average accuracy
            const avgError = predictionData.reduce((sum, d) => sum + d.error, 0) / predictionData.length;
            const accuracy = (100 - avgError).toFixed(1);
            $('#accuracy').text(accuracy + '%');
            
            // Total predictions for 2024
            const total2024 = futureData
                .filter(d => d.year === 2024)
                .reduce((sum, d) => sum + d.predicted, 0);
            $('#pred2024').text(total2024.toLocaleString());
        }
        
        function updateCharts() {
            const selectedYear = $('#yearSelect').val();
            const selectedRegion = $('#regionSelect').val();
            
            // Time Series Chart
            createTimeSeriesChart(selectedRegion);
            
            // Region Bar Chart
            createRegionBarChart(selectedYear);
            
            // Error Chart
            createErrorChart();
            
            // Heatmap
            createHeatmap();
            
            // Update table
            updatePredictionsTable(selectedRegion, selectedYear);
        }
        
        function createTimeSeriesChart(region) {
            const filteredData = region === 'all' 
                ? predictionData 
                : predictionData.filter(d => d.region === region);
            
            const regions = [...new Set(filteredData.map(d => d.region))];
            const traces = [];
            
            regions.forEach(r => {
                const regionData = filteredData.filter(d => d.region === r);
                
                // Historical data
                traces.push({
                    x: regionData.map(d => d.year),
                    y: regionData.map(d => d.actual),
                    name: r + ' (Real)',
                    type: 'scatter',
                    mode: 'lines+markers',
                    line: {width: 2}
                });
                
                // Predictions
                traces.push({
                    x: regionData.map(d => d.year),
                    y: regionData.map(d => d.predicted),
                    name: r + ' (Predicción)',
                    type: 'scatter',
                    mode: 'lines+markers',
                    line: {dash: 'dot', width: 2}
                });
            });
            
            const layout = {
                title: 'Tendencia de Siniestros',
                xaxis: {title: 'Año'},
                yaxis: {title: 'Número de Siniestros'},
                hovermode: 'x unified'
            };
            
            Plotly.newPlot('timeSeriesChart', traces, layout);
        }
        
        function createRegionBarChart(year) {
            const yearNum = parseInt(year);
            const data = year.includes('2024') || year.includes('2025')
                ? futureData.filter(d => d.year === yearNum)
                : predictionData.filter(d => d.year === yearNum);
            
            const trace = {
                x: data.map(d => d.region),
                y: data.map(d => d.predicted || d.actual),
                type: 'bar',
                marker: {
                    color: 'rgba(102, 126, 234, 0.8)',
                    line: {color: 'rgba(102, 126, 234, 1)', width: 2}
                }
            };
            
            const layout = {
                title: `Siniestros por Región - ${year}`,
                xaxis: {title: 'Región'},
                yaxis: {title: 'Número de Siniestros'}
            };
            
            Plotly.newPlot('regionBarChart', [trace], layout);
        }
        
        function createErrorChart() {
            const errorData = predictionData.map(d => ({
                region: d.region,
                error: d.error
            }));
            
            const regions = [...new Set(errorData.map(d => d.region))];
            const avgErrors = regions.map(r => {
                const regionErrors = errorData.filter(d => d.region === r);
                return {
                    region: r,
                    avgError: regionErrors.reduce((sum, d) => sum + d.error, 0) / regionErrors.length
                };
            });
            
            const trace = {
                x: avgErrors.map(d => d.region),
                y: avgErrors.map(d => d.avgError),
                type: 'bar',
                marker: {
                    color: avgErrors.map(d => d.avgError < 5 ? 'green' : d.avgError < 10 ? 'orange' : 'red')
                }
            };
            
            const layout = {
                title: 'Error Promedio por Región (%)',
                xaxis: {title: 'Región'},
                yaxis: {title: 'Error (%)'}
            };
            
            Plotly.newPlot('errorChart', [trace], layout);
        }
        
        function createHeatmap() {
            const regions = [...new Set(predictionData.map(d => d.region))];
            const years = [...new Set(predictionData.map(d => d.year))];
            
            const z = regions.map(r => 
                years.map(y => {
                    const data = predictionData.find(d => d.region === r && d.year === y);
                    return data ? data.actual : 0;
                })
            );
            
            const trace = {
                x: years,
                y: regions,
                z: z,
                type: 'heatmap',
                colorscale: 'Viridis'
            };
            
            const layout = {
                title: 'Mapa de Calor - Siniestros por Región y Año',
                xaxis: {title: 'Año'},
                yaxis: {title: 'Región'}
            };
            
            Plotly.newPlot('heatmapChart', [trace], layout);
        }
        
        function updatePredictionsTable(region, year) {
            const tbody = $('#predictionsBody');
            tbody.empty();
            
            let tableData = region === 'all' 
                ? predictionData 
                : predictionData.filter(d => d.region === region);
            
            if (!year.includes('2024') && !year.includes('2025')) {
                tableData = tableData.filter(d => d.year === parseInt(year));
            }
            
            if (tableData.length === 0) {
                tbody.append('<tr><td colspan="6" class="loading">No hay datos para mostrar</td></tr>');
                return;
            }
            
            tableData.forEach(d => {
                const status = d.error < 10 ? '✅ Bueno' : d.error < 20 ? '⚠️ Regular' : '❌ Revisar';
                const row = `
                    <tr>
                        <td>${d.region}</td>
                        <td>${d.year}</td>
                        <td>${d.actual.toLocaleString()}</td>
                        <td>${d.predicted.toLocaleString()}</td>
                        <td>${d.error.toFixed(1)}%</td>
                        <td>${status}</td>
                    </tr>
                `;
                tbody.append(row);
            });
        }
    </script>
</body>
</html>
"""

# Save dashboard HTML
dashboard_path = Path("dashboard.html")
with open(dashboard_path, 'w', encoding='utf-8') as f:
    f.write(html_content)

print(f"Dashboard saved to: {dashboard_path.absolute()}")



CREATING WEB DASHBOARD
Dashboard saved to: /Users/enmanuelcuadros/Downloads/tesis-prevencion-siniestros-transito/notebooks/dashboard.html


In [4]:

# ============================================
# PART 3: PYTHON WEB SERVER
# ============================================

# Create a simple Python server script
server_script = '''
#!/usr/bin/env python3
"""
Simple web server for the dashboard
Run this script and open http://localhost:8000 in your browser
"""

import http.server
import socketserver
import webbrowser
from pathlib import Path

PORT = 8000
DIRECTORY = Path.cwd()

class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, directory=DIRECTORY, **kwargs)

    def end_headers(self):
        # Add CORS headers
        self.send_header('Access-Control-Allow-Origin', '*')
        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
        super().end_headers()

def run_server():
    with socketserver.TCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
        print(f"Server running at http://localhost:{PORT}/")
        print(f"Open http://localhost:{PORT}/dashboard.html in your browser")
        print("Press Ctrl+C to stop the server")
        
        # Auto-open browser
        webbrowser.open(f'http://localhost:{PORT}/dashboard.html')
        
        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            print("\nServer stopped.")

if __name__ == "__main__":
    run_server()
'''

# Save server script
server_path = Path("serve_dashboard.py")
with open(server_path, 'w') as f:
    f.write(server_script)

print(f"Server script saved to: {server_path.absolute()}")


Server script saved to: /Users/enmanuelcuadros/Downloads/tesis-prevencion-siniestros-transito/notebooks/serve_dashboard.py


In [5]:

# ============================================
# PART 4: DATA LOADER FOR DASHBOARD
# ============================================

# Create a data preparation script that converts CSVs to JSON for the dashboard
data_loader_script = '''
#!/usr/bin/env python3
"""
Prepare data from model outputs for dashboard visualization
"""

import pandas as pd
import json
from pathlib import Path

def prepare_dashboard_data():
    """Load and prepare data from model outputs"""
    
    DIR_MODELS = Path("models")
    output_data = {
        "predictions": [],
        "future": [],
        "metrics": {},
        "regions": []
    }
    
    # Load prediction data
    pred_path = DIR_MODELS / "predictions_best_model"
    if pred_path.exists():
        csv_files = list(pred_path.glob("*.csv"))
        if csv_files:
            df = pd.read_csv(csv_files[0])
            
            # Rename columns for consistency
            column_mapping = {
                'siniestros_total__total': 'actual',
                'prediction': 'predicted',
                'absolute_error': 'abs_error',
                'relative_error': 'rel_error'
            }
            df = df.rename(columns=column_mapping)
            
            # Calculate error percentage if not present
            if 'rel_error' not in df.columns and 'actual' in df.columns and 'predicted' in df.columns:
                df['error_pct'] = abs(df['predicted'] - df['actual']) / df['actual'] * 100
            else:
                df['error_pct'] = df['rel_error'] * 100 if 'rel_error' in df.columns else 0
            
            # Convert to records
            output_data["predictions"] = df.to_dict('records')
            output_data["regions"] = df['region'].unique().tolist()
    
    # Load future predictions
    future_path = DIR_MODELS / "future_predictions_2024_2025"
    if future_path.exists():
        csv_files = list(future_path.glob("*.csv"))
        if csv_files:
            df_future = pd.read_csv(csv_files[0])
            output_data["future"] = df_future.to_dict('records')
    
    # Calculate metrics
    if output_data["predictions"]:
        df_pred = pd.DataFrame(output_data["predictions"])
        output_data["metrics"] = {
            "total_regions": len(output_data["regions"]),
            "mean_error": df_pred['error_pct'].mean(),
            "median_error": df_pred['error_pct'].median(),
            "accuracy": 100 - df_pred['error_pct'].mean(),
            "within_20pct": (df_pred['error_pct'] <= 20).mean() * 100,
            "within_50pct": (df_pred['error_pct'] <= 50).mean() * 100
        }
    
    # Save to JSON
    with open("dashboard_data.json", 'w') as f:
        json.dump(output_data, f, indent=2)
    
    print(f"Dashboard data prepared: {len(output_data['predictions'])} predictions")
    print(f"Regions: {len(output_data['regions'])}")
    print(f"Mean accuracy: {output_data['metrics'].get('accuracy', 0):.1f}%")
    
    return output_data

if __name__ == "__main__":
    prepare_dashboard_data()
'''

# Save data loader script
loader_path = Path("prepare_dashboard_data.py")
with open(loader_path, 'w') as f:
    f.write(data_loader_script)

print(f"Data loader script saved to: {loader_path.absolute()}")


Data loader script saved to: /Users/enmanuelcuadros/Downloads/tesis-prevencion-siniestros-transito/notebooks/prepare_dashboard_data.py


In [6]:

# ============================================
# PART 5: ENHANCED DASHBOARD WITH DATA LOADING
# ============================================

# Create enhanced dashboard that loads actual data
enhanced_dashboard = '''
<!DOCTYPE html>
<html lang="es">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Dashboard Avanzado - Predicción de Siniestros</title>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
    <style>
        body {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            min-height: 100vh;
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
        }
        .main-header {
            background: rgba(255,255,255,0.95);
            border-radius: 15px;
            padding: 30px;
            margin: 20px;
            box-shadow: 0 10px 40px rgba(0,0,0,0.2);
        }
        .metric-card {
            background: white;
            border-radius: 10px;
            padding: 20px;
            text-align: center;
            box-shadow: 0 5px 20px rgba(0,0,0,0.1);
            transition: transform 0.3s;
        }
        .metric-card:hover {
            transform: translateY(-5px);
        }
        .chart-card {
            background: white;
            border-radius: 10px;
            padding: 20px;
            margin: 20px;
            box-shadow: 0 5px 20px rgba(0,0,0,0.1);
        }
        .status-good { color: #28a745; }
        .status-warning { color: #ffc107; }
        .status-danger { color: #dc3545; }
    </style>
</head>
<body>
    <div class="container-fluid">
        <div class="main-header">
            <h1 class="text-center mb-4">🚦 Dashboard de Predicción de Siniestros de Tránsito</h1>
            <p class="text-center text-muted">Sistema ML para Prevención de Accidentes - Perú</p>
            
            <div class="row mt-4">
                <div class="col-md-3">
                    <div class="metric-card">
                        <h5>Precisión del Modelo</h5>
                        <h2 id="accuracy" class="status-good">--</h2>
                    </div>
                </div>
                <div class="col-md-3">
                    <div class="metric-card">
                        <h5>Regiones Analizadas</h5>
                        <h2 id="regions">--</h2>
                    </div>
                </div>
                <div class="col-md-3">
                    <div class="metric-card">
                        <h5>Error Promedio</h5>
                        <h2 id="error">--</h2>
                    </div>
                </div>
                <div class="col-md-3">
                    <div class="metric-card">
                        <h5>Predicciones 2024</h5>
                        <h2 id="future">--</h2>
                    </div>
                </div>
            </div>
        </div>
        
        <div class="row">
            <div class="col-md-6">
                <div class="chart-card">
                    <h3>Comparación Real vs Predicción</h3>
                    <div id="comparisonChart"></div>
                </div>
            </div>
            <div class="col-md-6">
                <div class="chart-card">
                    <h3>Error por Región</h3>
                    <div id="errorChart"></div>
                </div>
            </div>
        </div>
        
        <div class="row">
            <div class="col-12">
                <div class="chart-card">
                    <h3>Tendencia Temporal y Proyecciones</h3>
                    <div id="timelineChart"></div>
                </div>
            </div>
        </div>
        
        <div class="row">
            <div class="col-12">
                <div class="chart-card">
                    <h3>Tabla de Predicciones</h3>
                    <div class="table-responsive">
                        <table class="table table-hover">
                            <thead>
                                <tr>
                                    <th>Región</th>
                                    <th>Año</th>
                                    <th>Valor Real</th>
                                    <th>Predicción</th>
                                    <th>Error (%)</th>
                                    <th>Estado</th>
                                </tr>
                            </thead>
                            <tbody id="predictionsTable">
                            </tbody>
                        </table>
                    </div>
                </div>
            </div>
        </div>
    </div>
    
    <script>
    // Load data from JSON file or use sample data
    let dashboardData = null;
    
    // Try to load actual data
    $.getJSON("dashboard_data.json")
        .done(function(data) {
            dashboardData = data;
            initializeDashboard();
        })
        .fail(function() {
            // Use sample data if file not found
            console.log("Using sample data");
            dashboardData = {
                predictions: [
                    {region: "LIMA", year: 2022, actual: 41111, predicted: 38500, error_pct: 6.3},
                    {region: "LIMA", year: 2023, actual: 44995, predicted: 42000, error_pct: 6.7},
                    {region: "AREQUIPA", year: 2022, actual: 5017, predicted: 4800, error_pct: 4.3},
                    {region: "CUSCO", year: 2022, actual: 3365, predicted: 3200, error_pct: 4.9}
                ],
                future: [
                    {region: "LIMA", year: 2024, predicted_accidents: 46000},
                    {region: "LIMA", year: 2025, predicted_accidents: 47000}
                ],
                metrics: {
                    accuracy: 93.5,
                    mean_error: 6.5,
                    total_regions: 25
                }
            };
            initializeDashboard();
        });
    
    function initializeDashboard() {
        updateMetrics();
        createComparisonChart();
        createErrorChart();
        createTimelineChart();
        updateTable();
    }
    
    function updateMetrics() {
        $("#accuracy").text((dashboardData.metrics.accuracy || 93.5).toFixed(1) + "%");
        $("#regions").text(dashboardData.metrics.total_regions || 25);
        $("#error").text((dashboardData.metrics.mean_error || 6.5).toFixed(1) + "%");
        
        const total2024 = dashboardData.future
            .filter(d => d.year === 2024)
            .reduce((sum, d) => sum + (d.predicted_accidents || d.predicted || 0), 0);
        $("#future").text(total2024.toLocaleString());
    }
    
    function createComparisonChart() {
        const trace1 = {
            x: dashboardData.predictions.map(d => d.region),
            y: dashboardData.predictions.map(d => d.actual),
            name: "Valor Real",
            type: "bar"
        };
        
        const trace2 = {
            x: dashboardData.predictions.map(d => d.region),
            y: dashboardData.predictions.map(d => d.predicted),
            name: "Predicción",
            type: "bar"
        };
        
        const layout = {
            barmode: "group",
            xaxis: {title: "Región"},
            yaxis: {title: "Número de Siniestros"}
        };
        
        Plotly.newPlot("comparisonChart", [trace1, trace2], layout);
    }
    
    function createErrorChart() {
        const trace = {
            x: dashboardData.predictions.map(d => d.region),
            y: dashboardData.predictions.map(d => d.error_pct),
            type: "bar",
            marker: {
                color: dashboardData.predictions.map(d => 
                    d.error_pct < 10 ? "green" : d.error_pct < 20 ? "orange" : "red"
                )
            }
        };
        
        const layout = {
            xaxis: {title: "Región"},
            yaxis: {title: "Error (%)"}
        };
        
        Plotly.newPlot("errorChart", [trace], layout);
    }
    
    function createTimelineChart() {
        const regions = [...new Set(dashboardData.predictions.map(d => d.region))];
        const traces = [];
        
        regions.slice(0, 5).forEach(region => {
            const regionData = dashboardData.predictions.filter(d => d.region === region);
            const futureData = dashboardData.future.filter(d => d.region === region);
            
            traces.push({
                x: regionData.map(d => d.year),
                y: regionData.map(d => d.actual),
                name: region + " (Real)",
                type: "scatter",
                mode: "lines+markers"
            });
            
            traces.push({
                x: [...regionData.map(d => d.year), ...futureData.map(d => d.year)],
                y: [...regionData.map(d => d.predicted), ...futureData.map(d => d.predicted_accidents || d.predicted)],
                name: region + " (Predicción)",
                type: "scatter",
                mode: "lines+markers",
                line: {dash: "dot"}
            });
        });
        
        const layout = {
            xaxis: {title: "Año"},
            yaxis: {title: "Número de Siniestros"},
            hovermode: "x unified"
        };
        
        Plotly.newPlot("timelineChart", traces, layout);
    }
    
    function updateTable() {
        const tbody = $("#predictionsTable");
        tbody.empty();
        
        dashboardData.predictions.forEach(d => {
            const status = d.error_pct < 10 
                ? \'<span class="status-good">✅ Bueno</span>\' 
                : d.error_pct < 20 
                ? \'<span class="status-warning">⚠️ Regular</span>\' 
                : \'<span class="status-danger">❌ Revisar</span>\';
            
            const row = `
                <tr>
                    <td>${d.region}</td>
                    <td>${d.year}</td>
                    <td>${(d.actual || 0).toLocaleString()}</td>
                    <td>${(d.predicted || 0).toLocaleString()}</td>
                    <td>${(d.error_pct || 0).toFixed(1)}%</td>
                    <td>${status}</td>
                </tr>
            `;
            tbody.append(row);
        });
    }
    </script>
</body>
</html>
'''

# Save enhanced dashboard
enhanced_path = Path("dashboard_enhanced.html")
with open(enhanced_path, 'w', encoding='utf-8') as f:
    f.write(enhanced_dashboard)

print(f"Enhanced dashboard saved to: {enhanced_path.absolute()}")


Enhanced dashboard saved to: /Users/enmanuelcuadros/Downloads/tesis-prevencion-siniestros-transito/notebooks/dashboard_enhanced.html


In [7]:

# ============================================
# INSTRUCTIONS
# ============================================

print("\n" + "="*70)
print("INSTRUCCIONES PARA USAR EL DASHBOARD")
print("="*70)

print("""
1. VALIDAR DATOS (ya ejecutado):
   - Los resultados de validación están arriba
   - Revisa si hay warnings o errores

2. PREPARAR DATOS PARA EL DASHBOARD:
   python prepare_dashboard_data.py
   
3. INICIAR EL SERVIDOR WEB:
   python serve_dashboard.py
   
4. ABRIR EN EL NAVEGADOR:
   http://localhost:8000/dashboard_enhanced.html
   
5. OPCIONES ALTERNATIVAS:
   - dashboard.html: Versión básica con datos de ejemplo
   - dashboard_enhanced.html: Versión completa con tus datos reales
   
ARCHIVOS GENERADOS:
- dashboard.html: Dashboard básico
- dashboard_enhanced.html: Dashboard avanzado
- prepare_dashboard_data.py: Script para preparar datos
- serve_dashboard.py: Servidor web local
- dashboard_data.json: Datos en formato JSON (se genera al ejecutar prepare_dashboard_data.py)

NOTA: Si los archivos CSV no existen o están en otra ubicación,
actualiza las rutas en prepare_dashboard_data.py
""")

print("\n✅ Proceso completado. Sigue las instrucciones arriba para ver el dashboard.")


INSTRUCCIONES PARA USAR EL DASHBOARD

1. VALIDAR DATOS (ya ejecutado):
   - Los resultados de validación están arriba

2. PREPARAR DATOS PARA EL DASHBOARD:
   python prepare_dashboard_data.py

3. INICIAR EL SERVIDOR WEB:
   python serve_dashboard.py

4. ABRIR EN EL NAVEGADOR:
   http://localhost:8000/dashboard_enhanced.html

5. OPCIONES ALTERNATIVAS:
   - dashboard.html: Versión básica con datos de ejemplo
   - dashboard_enhanced.html: Versión completa con tus datos reales

ARCHIVOS GENERADOS:
- dashboard.html: Dashboard básico
- dashboard_enhanced.html: Dashboard avanzado
- prepare_dashboard_data.py: Script para preparar datos
- serve_dashboard.py: Servidor web local
- dashboard_data.json: Datos en formato JSON (se genera al ejecutar prepare_dashboard_data.py)

NOTA: Si los archivos CSV no existen o están en otra ubicación,
actualiza las rutas en prepare_dashboard_data.py


✅ Proceso completado. Sigue las instrucciones arriba para ver el dashboard.
