# DRAGONWIND Scenario Comparison - Part 3: Advanced Analytics and Exports

This notebook continues from the scenario_comparison_part2 notebook, where we ran simulations for different scenarios.
Now we'll perform advanced analytics, compare financial impacts, carbon emissions, and export the results.

In [None]:
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import json
import warnings
warnings.filterwarnings('ignore')

# Add the project root to the Python path
project_root = Path.cwd().parent.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Import DRAGONWIND modules
from src.config.loader import load_config
from src.core.simulation_engine import SimulationEngine
from src.scenarios.scenario_manager import Scenario, ScenarioManager
from src.analytics.plotter import create_figure
from src.utils.exporters import export_to_excel, export_to_csv, export_to_json

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# If scenario_results wasn't loaded, we'd need to re-run the simulations
# For this notebook, we assume the scenario_results dictionary is available from part 2
try:
    scenario_names = list(scenario_results.keys())
    print(f"Found results for scenarios: {scenario_names}")
except NameError:
    print("Scenario results not found. Please run scenario_comparison_part2 notebook first.")
    # For demonstration, we'll define some dummy scenarios
    scenario_names = ["baseline", "accelerated_growth", "grid_bottleneck", "manufacturing_constraint", "storage_integration"]
    print("Created placeholder scenario names for demonstration purposes.")

## Compare Carbon Emissions Across Scenarios

Let's analyze how different scenarios affect carbon emissions reduction.

In [None]:
# Extract and plot carbon emissions for each scenario
plt.figure(figsize=(14, 8))

try:
    for scenario_name, results in scenario_results.items():
        if 'carbon' in results and results['carbon'] is not None:
            carbon_df = results['carbon'].reset_index()
            plt.plot(
                carbon_df['year'],
                carbon_df['emissions'], 
                linewidth=2.5 if scenario_name == "baseline" else 2.0,
                linestyle='-' if scenario_name == "baseline" else '--',
                marker='o' if scenario_name == "baseline" else None,
                markersize=5,
                label=f"{scenario_name.replace('_', ' ').title()}"
            )

    plt.title('Carbon Emissions by Scenario (2025-2050)', fontsize=16)
    plt.xlabel('Year', fontsize=14)
    plt.ylabel('Emissions (Million Tons CO2)', fontsize=14)
    plt.grid(True)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.show()
    
    # Calculate cumulative emissions reductions
    print("Cumulative Emissions Reduction by 2050 (Million Tons CO2):")
    baseline_emissions = None
    for scenario_name, results in scenario_results.items():
        if 'carbon' in results and results['carbon'] is not None:
            if scenario_name == "baseline":
                baseline_emissions = results['carbon']['baseline_emissions'].sum() - results['carbon']['emissions'].sum()
                print(f"{scenario_name.ljust(25)}: {baseline_emissions:,.2f}")
            else:
                scenario_emissions = results['carbon']['baseline_emissions'].sum() - results['carbon']['emissions'].sum()
                print(f"{scenario_name.ljust(25)}: {scenario_emissions:,.2f} ({scenario_emissions - baseline_emissions:+,.2f} vs baseline)")
except (NameError, KeyError):
    print("Sample emissions data not available. Placeholder for demonstration.")
    # We would insert placeholder data visualization here

## Compare Financial Requirements Across Scenarios

Let's analyze how different scenarios affect investment requirements.

In [None]:
# Extract and plot total investment for each scenario
plt.figure(figsize=(14, 8))

try:
    for scenario_name, results in scenario_results.items():
        if 'finance' in results and results['finance'] is not None:
            finance_df = results['finance'].reset_index()
            plt.plot(
                finance_df['year'],
                finance_df['total_investment'], 
                linewidth=2.5 if scenario_name == "baseline" else 2.0,
                linestyle='-' if scenario_name == "baseline" else '--',
                marker='o' if scenario_name == "baseline" else None,
                markersize=5,
                label=f"{scenario_name.replace('_', ' ').title()}"
            )

    plt.title('Annual Investment Requirements by Scenario (2025-2050)', fontsize=16)
    plt.xlabel('Year', fontsize=14)
    plt.ylabel('Annual Investment (Billion USD)', fontsize=14)
    plt.grid(True)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.show()
    
    # Calculate cumulative investment requirements
    print("Cumulative Investment Requirements (2025-2050) (Billion USD):")
    baseline_investment = None
    for scenario_name, results in scenario_results.items():
        if 'finance' in results and results['finance'] is not None:
            scenario_investment = results['finance']['total_investment'].sum()
            if scenario_name == "baseline":
                baseline_investment = scenario_investment
                print(f"{scenario_name.ljust(25)}: {baseline_investment:,.2f}")
            else:
                print(f"{scenario_name.ljust(25)}: {scenario_investment:,.2f} ({scenario_investment - baseline_investment:+,.2f} vs baseline)")
except (NameError, KeyError):
    print("Sample finance data not available. Placeholder for demonstration.")
    # We would insert placeholder data visualization here

## Compare Storage and EV Integration Across Scenarios

Let's examine how battery storage and electric vehicle deployments differ across scenarios.

In [None]:
# Create a figure with two subplots - one for BESS, one for EVs
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 14))

try:
    # BESS deployment comparison
    for scenario_name, results in scenario_results.items():
        if 'bess' in results and results['bess'] is not None:
            bess_df = results['bess'].reset_index()
            ax1.plot(
                bess_df['year'],
                bess_df['storage_capacity'], 
                linewidth=2.5 if scenario_name == "baseline" else 2.0,
                linestyle='-' if scenario_name == "baseline" else '--',
                marker='o' if scenario_name == "baseline" else None,
                markersize=5,
                label=f"{scenario_name.replace('_', ' ').title()}"
            )

    ax1.set_title('Battery Energy Storage Capacity by Scenario (2025-2050)', fontsize=16)
    ax1.set_xlabel('Year', fontsize=14)
    ax1.set_ylabel('Storage Capacity (GWh)', fontsize=14)
    ax1.grid(True)
    ax1.legend(fontsize=12)
    
    # EV adoption comparison
    for scenario_name, results in scenario_results.items():
        if 'ev' in results and results['ev'] is not None:
            ev_df = results['ev'].reset_index()
            ax2.plot(
                ev_df['year'],
                ev_df['ev_fleet_size'] / 1_000_000, # Convert to millions
                linewidth=2.5 if scenario_name == "baseline" else 2.0,
                linestyle='-' if scenario_name == "baseline" else '--',
                marker='o' if scenario_name == "baseline" else None,
                markersize=5,
                label=f"{scenario_name.replace('_', ' ').title()}"
            )

    ax2.set_title('Electric Vehicle Fleet Size by Scenario (2025-2050)', fontsize=16)
    ax2.set_xlabel('Year', fontsize=14)
    ax2.set_ylabel('EV Fleet Size (Million Vehicles)', fontsize=14)
    ax2.grid(True)
    ax2.legend(fontsize=12)
    
    plt.tight_layout()
    plt.show()
except (NameError, KeyError):
    print("Sample BESS and EV data not available. Placeholder for demonstration.")
    # We would insert placeholder data visualization here

## Create Comprehensive Scenario Comparison Dashboard

Let's create a comprehensive dashboard to compare key metrics across all scenarios.

In [None]:
# Create a multi-metric comparison summary
try:
    # Define the metrics we want to compare across scenarios
    metrics = {
        "renewable_capacity_2050": "Total Renewable Capacity by 2050 (GW)",
        "grid_capacity_2050": "Grid Capacity by 2050 (GW)",
        "max_curtailment": "Maximum Curtailment Rate (%)",
        "carbon_reduction": "Cumulative Carbon Reduction (Million Tons)",
        "total_investment": "Total Investment Required (Billion USD)",
        "storage_capacity_2050": "Battery Storage by 2050 (GWh)",
        "ev_fleet_2050": "EV Fleet Size by 2050 (Million Vehicles)"
    }
    
    # Create a DataFrame to hold the comparison data
    comparison_data = {metric: [] for metric in metrics.keys()}
    comparison_data["scenario"] = []
    
    # Extract metrics for each scenario
    for scenario_name, results in scenario_results.items():
        comparison_data["scenario"].append(scenario_name.replace("_", " ").title())
        
        # Renewable capacity in 2050
        if 'renewable' in results and results['renewable'] is not None:
            final_capacity = results['renewable'].iloc[-1]['total_capacity']
            comparison_data["renewable_capacity_2050"].append(final_capacity)
        else:
            comparison_data["renewable_capacity_2050"].append(None)
        
        # Grid capacity in 2050
        if 'grid' in results and results['grid'] is not None:
            grid_capacity = results['grid'].iloc[-1]['grid_capacity']
            comparison_data["grid_capacity_2050"].append(grid_capacity)
            max_curtailment = results['grid']['curtailment_rate'].max() * 100
            comparison_data["max_curtailment"].append(max_curtailment)
        else:
            comparison_data["grid_capacity_2050"].append(None)
            comparison_data["max_curtailment"].append(None)
        
        # Carbon reduction
        if 'carbon' in results and results['carbon'] is not None:
            carbon_reduction = results['carbon']['baseline_emissions'].sum() - results['carbon']['emissions'].sum()
            comparison_data["carbon_reduction"].append(carbon_reduction)
        else:
            comparison_data["carbon_reduction"].append(None)
        
        # Total investment
        if 'finance' in results and results['finance'] is not None:
            total_investment = results['finance']['total_investment'].sum()
            comparison_data["total_investment"].append(total_investment)
        else:
            comparison_data["total_investment"].append(None)
        
        # Storage capacity in 2050
        if 'bess' in results and results['bess'] is not None:
            storage_capacity = results['bess'].iloc[-1]['storage_capacity']
            comparison_data["storage_capacity_2050"].append(storage_capacity)
        else:
            comparison_data["storage_capacity_2050"].append(None)
        
        # EV fleet size in 2050
        if 'ev' in results and results['ev'] is not None:
            ev_fleet = results['ev'].iloc[-1]['ev_fleet_size'] / 1_000_000  # Convert to millions
            comparison_data["ev_fleet_2050"].append(ev_fleet)
        else:
            comparison_data["ev_fleet_2050"].append(None)
    
    # Create DataFrame
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.set_index("scenario")
    
    # Rename columns for display
    comparison_df.columns = [metrics[col] for col in comparison_df.columns]
    
    # Display the comparison table
    display(comparison_df)
    
    # Create a heatmap for visual comparison
    plt.figure(figsize=(16, 8))
    
    # Normalize data for heatmap visualization
    normalized_df = comparison_df.copy()
    for col in normalized_df.columns:
        if normalized_df[col].max() > 0:
            normalized_df[col] = normalized_df[col] / normalized_df[col].max()
    
    # Create heatmap
    sns.heatmap(normalized_df, annot=True, cmap="YlGnBu", fmt=".2f", linewidths=.5)
    plt.title("Normalized Scenario Comparison (1.0 = Maximum Value)", fontsize=16)
    plt.tight_layout()
    plt.show()
except (NameError, KeyError):
    print("Sample data not available. Placeholder for demonstration.")
    # We would insert placeholder data visualization here

## Export Scenario Results

Let's export the scenario results to Excel, CSV, and JSON formats.

In [None]:
# Create an output directory for the exports
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
export_dir = project_root / "exports" / f"scenario_comparison_{timestamp}"
export_dir.mkdir(parents=True, exist_ok=True)
print(f"Created export directory: {export_dir}")

# Export results for each scenario to Excel
try:
    for scenario_name, results in scenario_results.items():
        # Create a dictionary of DataFrames to export to Excel sheets
        dataframes = {}
        for module, data in results.items():
            if isinstance(data, pd.DataFrame):
                # Reset index to make 'year' a column
                dataframes[module] = data.reset_index() if 'year' in data.index.names else data
        
        # Export to Excel
        excel_file = export_dir / f"{scenario_name}.xlsx"
        export_to_excel(dataframes, excel_file)
        print(f"Exported {scenario_name} results to Excel: {excel_file}")
        
        # Export key metrics to CSV for easier import into other tools
        for module, data in dataframes.items():
            csv_file = export_dir / f"{scenario_name}_{module}.csv"
            export_to_csv(data, csv_file)
            print(f"Exported {scenario_name} {module} results to CSV: {csv_file}")
            
    # Export comparison table to Excel and CSV
    if 'comparison_df' in locals():
        comparison_excel = export_dir / "scenario_comparison_summary.xlsx"
        comparison_csv = export_dir / "scenario_comparison_summary.csv"
        
        # Export to Excel
        with pd.ExcelWriter(comparison_excel) as writer:
            comparison_df.to_excel(writer, sheet_name="Scenario Comparison")
        print(f"Exported scenario comparison summary to Excel: {comparison_excel}")
        
        # Export to CSV
        comparison_df.to_csv(comparison_csv)
        print(f"Exported scenario comparison summary to CSV: {comparison_csv}")
        
        # Export to JSON for web dashboard integration
        json_file = export_dir / "scenario_comparison_summary.json"
        export_to_json(comparison_df.reset_index().to_dict(orient="records"), json_file)
        print(f"Exported scenario comparison summary to JSON: {json_file}")
except (NameError, KeyError):
    print("Sample data not available for export. Placeholder for demonstration.")
    # Create placeholder file to show export functionality
    with open(export_dir / "placeholder_export.txt", "w") as f:
        f.write("This is a placeholder file to demonstrate the export functionality.")
    print(f"Created placeholder export file in {export_dir}")

## Conclusion

In this three-part notebook series, we've demonstrated how to:

1. Define different scenarios for China's renewable energy transition
2. Run simulations for each scenario using the DRAGONWIND platform
3. Compare results across scenarios for key metrics
4. Create visualizations to highlight differences between scenarios
5. Export results to various formats for further analysis

This approach allows policy makers and analysts to explore different pathways and understand the implications of various policy and technology choices on China's renewable energy transition and carbon reduction goals.