# Quick Start Demo - Solar Energy Analysis

This notebook provides a quick demonstration using **mock solar data** so you can try the analysis immediately without needing your own solar system data.

**Features:**
- 🚀 **Ready to run** - uses included mock data
- 📊 **3 months** of realistic solar production and consumption data  
- 🌤️ **Realistic patterns** - seasonal variations, weather effects, daily curves
- 📈 **Full analysis** - production trends, consumption patterns, grid interaction

**Note:** This uses mock data generated to mimic a real 10kW residential solar system. For real data analysis, see the other notebooks in this folder.

In [None]:
# Import libraries
import sys
sys.path.append('../src')

from core.data_manager import SolarDataManager
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("🚀 Quick Start Demo - Solar Energy Analysis")
print("=" * 50)
print("Using mock data for demonstration purposes")
print()

## 1. Load Mock Data

We'll use the hybrid data manager with mock data and a mock API client.

In [None]:
# Setup paths for mock data and location
from core.location_loader import create_notebook_location

# Try to get location from .env file for more accurate mock data
location_from_env = create_notebook_location()
print(f"✅ Location configured: {location_from_env.location_name}")

# Look for location-specific mock data
city_name = location_from_env.location_name.split(',')[0].lower().replace(' ', '_').replace('.', '')
location_data_path = f"../data/raw/mock_solar_data_{city_name}.csv"

if Path(location_data_path).exists():
    mock_csv_path = location_data_path
    print(f"📊 Using location-specific mock data: {mock_csv_path}")
else:
    mock_csv_path = "../data/raw/mock_solar_data.csv"
    print(f"📊 Location-specific data not found, using generic: {mock_csv_path}")
    print(f"💡 Generate location data with: uv run python scripts/generate_mock_data.py {location_from_env.location_name.split(',')[0].lower()}")

# Check if mock data exists
if not Path(mock_csv_path).exists():
    print("❌ Mock data not found!")
    print("Please run: uv run python scripts/generate_mock_data.py")
    raise FileNotFoundError(f"Mock data file not found: {mock_csv_path}")

# Create mock API client for demo
class MockEnphaseClient:
    def get_energy_lifetime(self, start_date=None, end_date=None):
        return pd.DataFrame()  # Return empty for demo

mock_client = MockEnphaseClient()

# Initialize data manager with mock data
data_manager = SolarDataManager(
    csv_path=mock_csv_path,
    enphase_client=mock_client,
    cache_dir="../data/processed"
)

print(f"✅ Data manager initialized with location-aware data for {location_from_env.location_name}")
print(f"🌍 Location details: {location_from_env.latitude:.3f}°N, {location_from_env.longitude:.3f}°W, {location_from_env.timezone_str}")

## 2. Load and Examine Mock Data

In [None]:
# Load the mock CSV data
csv_data = data_manager.load_csv_data()

print(f"📊 Dataset Information:")
print(f"  Shape: {csv_data.shape}")
print(f"  Columns: {list(csv_data.columns)}")
print(f"  Date range: {csv_data.index.min().date()} to {csv_data.index.max().date()}")
print(f"  Duration: {(csv_data.index.max() - csv_data.index.min()).days} days")
print(f"  Total records: {len(csv_data):,} (15-minute intervals)")

print(f"\n🔍 Data Quality:")
print(f"  Missing values: {csv_data.isnull().sum().sum()}")
print(f"  Data completeness: {len(csv_data) / ((csv_data.index.max() - csv_data.index.min()).days * 96) * 100:.1f}%")

# Display sample data
print(f"\n📋 Sample Data (first 5 rows):")
print(csv_data.head())

# Show some midday data to see production
midday_sample = csv_data[csv_data.index.hour == 12].head(8)
print(f"\n☀️ Sample Midday Production:")
print(midday_sample[['Production (kWh)', 'Consumption (kWh)', 'Export (kWh)']])

## 3. Daily Summary Analysis

In [None]:
# Get daily production data using hybrid approach
daily_data = data_manager.get_daily_production(source_priority="csv_first")

print(f"📅 Daily Summary:")
print(f"  Daily data shape: {daily_data.shape}")
print(f"  Date range: {daily_data.index.min().date()} to {daily_data.index.max().date()}")
print(f"  Total days: {len(daily_data)}")

print(f"\n⚡ Production Summary:")
print(f"  Average daily production: {daily_data['Production (kWh)'].mean():.1f} kWh")
print(f"  Peak daily production: {daily_data['Production (kWh)'].max():.1f} kWh")
print(f"  Minimum daily production: {daily_data['Production (kWh)'].min():.1f} kWh")
print(f"  Total production: {daily_data['Production (kWh)'].sum():,.0f} kWh")

print(f"\n🏠 Consumption Summary:")
print(f"  Average daily consumption: {daily_data['Consumption (kWh)'].mean():.1f} kWh")
print(f"  Peak daily consumption: {daily_data['Consumption (kWh)'].max():.1f} kWh")
print(f"  Total consumption: {daily_data['Consumption (kWh)'].sum():,.0f} kWh")

# Calculate energy balance
net_energy = daily_data['Production (kWh)'] - daily_data['Consumption (kWh)']
surplus_days = (net_energy > 0).sum()
total_export = daily_data['Export (kWh)'].sum()

print(f"\n🌐 Grid Interaction:")
print(f"  Days with energy surplus: {surplus_days} out of {len(daily_data)} ({surplus_days/len(daily_data)*100:.1f}%)")
print(f"  Total energy exported: {total_export:,.0f} kWh")
print(f"  Net energy balance: {net_energy.sum():,.0f} kWh")
print(f"  Self-consumption rate: {(1 - total_export/daily_data['Production (kWh)'].sum())*100:.1f}%")

## 4. Visualize Daily Patterns

In [None]:
# Create comprehensive daily analysis plots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Daily production over time
axes[0,0].plot(daily_data.index, daily_data['Production (kWh)'], alpha=0.7, linewidth=1.5, label='Production')
axes[0,0].plot(daily_data.index, daily_data['Consumption (kWh)'], alpha=0.7, linewidth=1.5, label='Consumption')
axes[0,0].set_title('Daily Production vs Consumption Over Time')
axes[0,0].set_ylabel('Energy (kWh)')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# 2. Production distribution
axes[0,1].hist(daily_data['Production (kWh)'], bins=25, alpha=0.7, edgecolor='black', color='orange')
axes[0,1].axvline(daily_data['Production (kWh)'].mean(), color='red', linestyle='--', 
                 label=f"Mean: {daily_data['Production (kWh)'].mean():.1f} kWh")
axes[0,1].set_title('Daily Production Distribution')
axes[0,1].set_xlabel('Daily Production (kWh)')
axes[0,1].set_ylabel('Frequency')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# 3. Net energy (production - consumption)
net_daily = daily_data['Production (kWh)'] - daily_data['Consumption (kWh)']
colors = ['red' if x < 0 else 'green' for x in net_daily]
axes[1,0].bar(range(len(net_daily)), net_daily, color=colors, alpha=0.6)
axes[1,0].axhline(y=0, color='black', linestyle='-', alpha=0.8)
axes[1,0].set_title('Daily Net Energy Balance\n(Green = Surplus, Red = Deficit)')
axes[1,0].set_xlabel('Day')
axes[1,0].set_ylabel('Net Energy (kWh)')
axes[1,0].grid(True, alpha=0.3)

# 4. Consumption vs Production scatter
axes[1,1].scatter(daily_data['Production (kWh)'], daily_data['Consumption (kWh)'], 
                 alpha=0.6, s=30)
# Add diagonal line (perfect balance)
max_val = max(daily_data['Production (kWh)'].max(), daily_data['Consumption (kWh)'].max())
axes[1,1].plot([0, max_val], [0, max_val], 'r--', alpha=0.7, label='Perfect Balance')
axes[1,1].set_title('Daily Consumption vs Production')
axes[1,1].set_xlabel('Production (kWh)')
axes[1,1].set_ylabel('Consumption (kWh)')
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Analyze Typical Day Patterns

In [None]:
# Analyze average hourly patterns
csv_data['hour'] = csv_data.index.hour
hourly_patterns = csv_data.groupby('hour').agg({
    'Production (kWh)': 'mean',
    'Consumption (kWh)': 'mean',
    'Export (kWh)': 'mean',
    'Import (kWh)': 'mean'
})

# Create hourly pattern visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Hourly production and consumption
hours = hourly_patterns.index
axes[0].plot(hours, hourly_patterns['Production (kWh)'] * 4, label='Production', linewidth=2, marker='o')
axes[0].plot(hours, hourly_patterns['Consumption (kWh)'] * 4, label='Consumption', linewidth=2, marker='s')
axes[0].set_title('Average Hourly Energy Patterns')
axes[0].set_xlabel('Hour of Day')
axes[0].set_ylabel('Average Hourly Energy (kWh)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].set_xticks(range(0, 24, 2))

# Grid import/export patterns
axes[1].fill_between(hours, 0, hourly_patterns['Export (kWh)'] * 4, 
                    alpha=0.6, label='Export to Grid', color='green')
axes[1].fill_between(hours, 0, -hourly_patterns['Import (kWh)'] * 4, 
                    alpha=0.6, label='Import from Grid', color='red')
axes[1].axhline(y=0, color='black', linestyle='-', alpha=0.8)
axes[1].set_title('Grid Interaction Patterns')
axes[1].set_xlabel('Hour of Day')
axes[1].set_ylabel('Grid Energy Flow (kWh)\n(+ Export, - Import)')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_xticks(range(0, 24, 2))

plt.tight_layout()
plt.show()

# Print key insights
peak_production_hour = hourly_patterns['Production (kWh)'].idxmax()
peak_consumption_hour = hourly_patterns['Consumption (kWh)'].idxmax()
max_export_hour = hourly_patterns['Export (kWh)'].idxmax()

print(f"\n🔍 Key Insights:")
print(f"  Peak production time: {peak_production_hour}:00 ({hourly_patterns.loc[peak_production_hour, 'Production (kWh)']*4:.1f} kWh/hour)")
print(f"  Peak consumption time: {peak_consumption_hour}:00 ({hourly_patterns.loc[peak_consumption_hour, 'Consumption (kWh)']*4:.1f} kWh/hour)")
print(f"  Peak export time: {max_export_hour}:00 ({hourly_patterns.loc[max_export_hour, 'Export (kWh)']*4:.1f} kWh/hour)")

# Solar production hours (when production > 0)
production_hours = hourly_patterns[hourly_patterns['Production (kWh)'] > 0.01]
print(f"  Solar production hours: {production_hours.index.min()}:00 to {production_hours.index.max()}:00")
print(f"  Daily solar production window: {len(production_hours)} hours")

## 6. Basic Financial Analysis

In [None]:
# Location-based financial analysis with mock data
from core.location_loader import get_location_electricity_rates
import os

# Get location-specific electricity rates
nrel_api_key = os.environ.get('NREL_API_KEY')
electricity_rates = get_location_electricity_rates(location_from_env, nrel_api_key)

# Extract rates (convert from cents/kWh to $/kWh)
ELECTRICITY_RATE = electricity_rates['annual_cost_per_kwh']  # $/kWh
FEED_IN_TARIFF = electricity_rates['feed_in_rate_per_kwh']  # $/kWh for exported energy

print(f"🔌 Location-Based Electricity Rates for {location_from_env.location_name}:")
print(f"  Residential rate: {electricity_rates['residential_rate']:.2f}¢/kWh (${ELECTRICITY_RATE:.4f}/kWh)")
print(f"  Feed-in tariff: {electricity_rates['feed_in_tariff']:.2f}¢/kWh (${FEED_IN_TARIFF:.4f}/kWh)")
print(f"  Rate source: {electricity_rates['source']}")
if electricity_rates['national_comparison']['is_above_average']:
    print(f"  📈 {electricity_rates['national_comparison']['vs_national_avg']:.2f}¢ above national average")
else:
    print(f"  📉 {abs(electricity_rates['national_comparison']['vs_national_avg']):.2f}¢ below national average")

# Calculate savings and earnings
total_production = daily_data['Production (kWh)'].sum()
total_consumption = daily_data['Consumption (kWh)'].sum()
total_export = daily_data['Export (kWh)'].sum()
total_import = daily_data['Import (kWh)'].sum()

# Self-consumed solar (production - export)
self_consumed = total_production - total_export

# Financial calculations using location-specific rates
savings_from_self_consumption = self_consumed * ELECTRICITY_RATE
earnings_from_export = total_export * FEED_IN_TARIFF
cost_of_imported_energy = total_import * ELECTRICITY_RATE

total_solar_benefit = savings_from_self_consumption + earnings_from_export
net_electricity_cost = cost_of_imported_energy - earnings_from_export

# Project to annual figures
days_in_period = len(daily_data)
annual_multiplier = 365 / days_in_period

print(f"\n💰 Location-Specific Financial Analysis ({days_in_period} days of mock data):")
print(f"  Using {location_from_env.location_name} electricity rates")
print()
print(f"📊 Energy Summary:")
print(f"  Total solar production: {total_production:.0f} kWh")
print(f"  Self-consumed solar: {self_consumed:.0f} kWh ({self_consumed/total_production*100:.1f}%)")
print(f"  Exported to grid: {total_export:.0f} kWh ({total_export/total_production*100:.1f}%)")
print(f"  Imported from grid: {total_import:.0f} kWh")
print()
print(f"💵 Financial Benefits (period):")
print(f"  Savings from self-consumption: ${savings_from_self_consumption:.0f}")
print(f"  Earnings from exports: ${earnings_from_export:.0f}")
print(f"  Total solar benefit: ${total_solar_benefit:.0f}")
print(f"  Cost of imported energy: ${cost_of_imported_energy:.0f}")
print(f"  Net electricity cost: ${net_electricity_cost:.0f}")
print()
print(f"📈 Projected Annual Figures:")
print(f"  Annual solar production: ~{total_production * annual_multiplier:.0f} kWh")
print(f"  Annual solar benefit: ~${total_solar_benefit * annual_multiplier:.0f}")
print(f"  Annual electricity cost: ~${net_electricity_cost * annual_multiplier:.0f}")
print()
print(f"📉 Without Solar (estimated):")
without_solar_cost = total_consumption * ELECTRICITY_RATE
print(f"  Total electricity cost: ${without_solar_cost:.0f}")
print(f"  Annual electricity cost: ~${without_solar_cost * annual_multiplier:.0f}")
solar_savings = without_solar_cost - net_electricity_cost
print(f"  Solar savings vs no solar: ${solar_savings:.0f} ({(solar_savings/without_solar_cost)*100:.1f}% reduction)")
print()
print(f"💡 Rate Impact Analysis:")
# Compare with national average
national_avg_benefit = total_solar_benefit * (0.1622 / ELECTRICITY_RATE)  # Scale by rate difference
rate_advantage = total_solar_benefit - national_avg_benefit
if rate_advantage > 0:
    print(f"  Your local rates make solar ${abs(rate_advantage):.0f} MORE valuable than national average")
else:
    print(f"  Your local rates make solar ${abs(rate_advantage):.0f} LESS valuable than national average")

## 7. Summary and Next Steps

In [None]:
# Data source summary
summary = data_manager.get_data_summary()

print("🎯 QUICK START DEMO SUMMARY")
print("=" * 40)
print(f"\n📊 Dataset Overview:")
print(f"  Source: Mock solar data (demonstration)")
print(f"  Records: {summary['csv']['records']:,} (15-minute intervals)")
print(f"  Duration: {days_in_period} days")
print(f"  System type: Simulated 10kW residential solar")

print(f"\n⚡ System Performance:")
print(f"  Average daily production: {daily_data['Production (kWh)'].mean():.1f} kWh")
print(f"  Best production day: {daily_data['Production (kWh)'].max():.1f} kWh")
print(f"  Energy self-sufficiency: {(1 - total_import/total_consumption)*100:.1f}%")
print(f"  Solar contribution: {(self_consumed/total_consumption)*100:.1f}% of consumption")

print(f"\n🌐 Data Sources:")
print(f"  CSV Data: {summary['csv']['available']} ({summary['csv']['records']:,} records)")
print(f"  API Data: {summary['api']['available']} (Mock client: {summary['api']['is_mock']})")

print(f"\n✅ Demo Complete!")
print(f"\n🚀 Next Steps:")
print(f"  1. Try the other notebooks with your own solar data")
print(f"  2. Set up real Enphase API credentials for live data")
print(f"  3. Explore seasonal analysis: 01b_seasonal_and_temporal_analysis.ipynb")
print(f"  4. Build ML models: 01c_baseline_ml_models.ipynb")
print(f"  5. Generate your own mock data: uv run python scripts/generate_mock_data.py")

print(f"\n📚 Documentation:")
print(f"  - README.md: Setup instructions and API configuration")
print(f"  - All notebooks work with both mock and real data")
print(f"  - Mock data simulates realistic solar system behavior")

print(f"\n🎉 Thank you for trying the Solar Energy Analysis project!")