# Device Usage Plot Generator

This notebook generates visualizations for specific device activations detected in the experiment analysis.

## What This Notebook Does
- Parses a list of dates/times from the experiment report
- Generates 12-hour window plots for each activation
- Shows original power, remaining power, and segmented power for all 3 phases

## Where to Get the Data
1. Open the **House Report** (house_X.html) from the experiment analysis
2. Go to the **Device Detection** section (Central AC, Regular AC, or Boiler)
3. Click the **"Show Copyable Dates"** button below the table
4. Copy the text from the textarea that appears
5. Paste it in the `DEVICE_DATES` variable below

## Supported Date Formats
The notebook supports both formats (can be mixed in the same list):

| Format | Example |
|--------|---------|
| DD/MM/YYYY HH:MM-HH:MM | `10/01/2024 08:30-14:15` |
| YYYY-MM-DD HH:MM-HH:MM | `2022-11-11 08:41-09:28` |

### Example Input
```
10/01/2024 08:30-14:15, 2022-11-11 08:41-09:28, 15/01/2024 10:00-16:45
```

## 1. Setup

In [None]:
# Install required packages
# Run this cell first when using in Google Colab
!pip install pandas plotly

In [None]:
# Mount Google Drive (for Colab)
# This allows access to files in your Google Drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("‚úì Google Drive mounted successfully")
except ImportError:
    print("Not running in Colab - skipping drive mount")

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from pathlib import Path
import os
import re

## 2. Configuration

### Set Your Parameters Here

In [None]:
# ============================================
# CONFIGURATION - EDIT THESE VALUES
# ============================================

# House ID (from the report)
HOUSE_ID = "10"

# Path to experiment output directory
# For Colab: mount Google Drive and set path like "/content/drive/MyDrive/experiments/..."
# For local: use relative or absolute path
EXPERIMENT_DIR = "../experiment_pipeline/OUTPUT/experiments/your_experiment_name"

# Run number (usually 0 for first iteration)
RUN_NUMBER = 0

# ============================================
# PASTE DEVICE DATES HERE
# Copy from "Show Copyable Dates" button in report
# ============================================
# Supported formats (can be mixed):
#   - DD/MM/YYYY HH:MM-HH:MM (e.g., "10/01/2024 08:30-14:15")
#   - YYYY-MM-DD HH:MM-HH:MM (e.g., "2022-11-11 08:41-09:28")
DEVICE_DATES = """
10/01/2024 08:30-14:15, 2022-11-11 08:41-09:28, 15/01/2024 10:00-16:45
"""

# Device name (for plot titles)
DEVICE_NAME = "Central AC"  # Options: "Central AC", "Regular AC", "Boiler"

## 3. Helper Functions

In [None]:
def parse_device_dates(dates_string):
    """
    Parse device dates from the copied text.
    
    Supports two formats:
    - "DD/MM/YYYY HH:MM-HH:MM" (e.g., "10/01/2024 08:30-14:15")
    - "YYYY-MM-DD HH:MM-HH:MM" (e.g., "2022-11-11 08:41-09:28")
    
    Returns list of dicts with:
        - date: datetime object
        - on_time: start time string
        - off_time: end time string  
        - on_dt: ON datetime
        - off_dt: OFF datetime
        - center_time: datetime for plot centering (midpoint of event)
    """
    activations = []
    
    # Split by comma or newline
    parts = []
    for line in dates_string.strip().split('\n'):
        for p in line.split(','):
            if p.strip():
                parts.append(p.strip())
    
    for part in parts:
        date_obj = None
        on_time = None
        off_time = None
        
        # Try format 1: "YYYY-MM-DD HH:MM-HH:MM"
        match = re.match(r'(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})-(\d{2}:\d{2})', part)
        if match:
            date_str, on_time, off_time = match.groups()
            date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        
        # Try format 2: "DD/MM/YYYY HH:MM-HH:MM"
        if not date_obj:
            match = re.match(r'(\d{2}/\d{2}/\d{4})\s+(\d{2}:\d{2})-(\d{2}:\d{2})', part)
            if match:
                date_str, on_time, off_time = match.groups()
                date_obj = datetime.strptime(date_str, '%d/%m/%Y')
        
        if date_obj and on_time and off_time:
            on_hour, on_min = map(int, on_time.split(':'))
            on_dt = date_obj.replace(hour=on_hour, minute=on_min)
            
            off_hour, off_min = map(int, off_time.split(':'))
            off_dt = date_obj.replace(hour=off_hour, minute=off_min)
            
            # Handle overnight events
            if off_dt < on_dt:
                off_dt += timedelta(days=1)
            
            # Calculate midpoint for centering the plot window
            event_duration = (off_dt - on_dt).total_seconds()
            center = on_dt + timedelta(seconds=event_duration / 2)
            
            label = f"{date_obj.strftime('%d/%m/%Y')} {on_time}-{off_time}"
            
            activations.append({
                'date': date_obj,
                'on_time': on_time,
                'off_time': off_time,
                'on_dt': on_dt,
                'off_dt': off_dt,
                'center_time': center,
                'label': label
            })
    
    return activations


def get_required_months(activations, hours_buffer=6):
    """
    Extract unique months needed for the given activations.
    Returns set of (month, year) tuples.
    
    Args:
        activations: List of activation dicts from parse_device_dates
        hours_buffer: Hours before/after event to include (default 6 for 12-hour window)
    """
    required_months = set()
    
    for act in activations:
        # Get the time range we'll need for this activation
        start_time = act['center_time'] - timedelta(hours=hours_buffer)
        end_time = act['center_time'] + timedelta(hours=hours_buffer)
        
        # Add all months in this range
        current = start_time
        while current <= end_time:
            required_months.add((current.month, current.year))
            # Move to next month (always use day=1 to avoid "day out of range" errors)
            if current.month == 12:
                current = current.replace(year=current.year + 1, month=1, day=1)
            else:
                current = current.replace(month=current.month + 1, day=1)
    
    return required_months


def load_matches_for_month(experiment_dir, house_id, run_number, month, year):
    """
    Load both matched events and unmatched individual events for a specific month.
    
    Returns DataFrame with all events (matched + unmatched) or None if not found.
    """
    import pickle
    import sys
    import numpy as np
    
    # Fix numpy compatibility
    if not hasattr(sys.modules.get('numpy', None), '_core'):
        sys.modules['numpy._core'] = np.core
        sys.modules['numpy._core.numeric'] = np.core.numeric
    
    exp_path = Path(experiment_dir)
    events_list = []
    
    # Load matches
    matches_path = exp_path / f"run_{run_number}" / f"house_{house_id}" / "matches"
    if matches_path.exists():
        match_file = matches_path / f"matches_{house_id}_{month:02d}_{year}.pkl"
        if match_file.exists():
            try:
                with open(match_file, 'rb') as f:
                    matches_df = pickle.load(f)
                    events_list.append(matches_df)
            except Exception as e:
                print(f"Warning: Could not load {match_file}: {e}")
    
    # Load unmatched events from on_off
    on_off_path = exp_path / f"run_{run_number}" / f"house_{house_id}" / "on_off"
    if on_off_path.exists():
        on_off_file = on_off_path / f"on_off_{house_id}_{month:02d}_{year}.pkl"
        if on_off_file.exists():
            try:
                with open(on_off_file, 'rb') as f:
                    on_off_df = pickle.load(f)
                    # Only include unmatched events
                    unmatched = on_off_df[on_off_df['matched'] == 0].copy()
                    if not unmatched.empty:
                        events_list.append(unmatched)
            except Exception as e:
                print(f"Warning: Could not load {on_off_file}: {e}")
    
    if events_list:
        combined = pd.concat(events_list, ignore_index=True)
        # Convert datetime columns
        for col in ['on_start', 'on_end', 'off_start', 'off_end', 'start', 'end']:
            if col in combined.columns:
                combined[col] = pd.to_datetime(combined[col], errors='coerce')
        return combined
    
    return None


def load_summarized_data(experiment_dir, house_id, run_number=0, required_months=None):
    """
    Load summarized data from monthly pickle files in summarized directory.
    Only loads months specified in required_months for efficiency.
    
    Args:
        experiment_dir: Path to experiment directory
        house_id: House ID
        run_number: Run number (default 0)
        required_months: Set of (month, year) tuples to load. If None, loads all.
    
    Returns:
        DataFrame with all monthly data concatenated
    """
    import pickle
    import sys
    import numpy as np
    
    # Fix numpy compatibility for pickle files created with newer numpy
    if not hasattr(sys.modules.get('numpy', None), '_core'):
        sys.modules['numpy._core'] = np.core
        sys.modules['numpy._core.numeric'] = np.core.numeric
    
    exp_path = Path(experiment_dir)
    
    # Try new structure: experiment/run_N/house_{id}/summarized/
    new_path = exp_path / f"run_{run_number}" / f"house_{house_id}" / "summarized"
    print(f"Trying path: {new_path}")
    
    if new_path.exists():
        # Find all pickle files (format: summarized_HOUSEID_MM_YYYY.pkl)
        all_pickle_files = sorted([f for f in os.listdir(new_path) 
                              if f.endswith('.pkl') and f.startswith(f'summarized_{house_id}_')])
        
        # Filter to only required months if specified
        if required_months:
            pickle_files = []
            for pkl_file in all_pickle_files:
                # Parse month and year from filename: summarized_1001_02_2021.pkl
                parts = pkl_file.replace('.pkl', '').split('_')
                if len(parts) >= 4:
                    try:
                        month = int(parts[-2])
                        year = int(parts[-1])
                        if (month, year) in required_months:
                            pickle_files.append(pkl_file)
                    except ValueError:
                        continue
            print(f"Loading {len(pickle_files)} out of {len(all_pickle_files)} months (only required months)")
        else:
            pickle_files = all_pickle_files
            print(f"Loading all {len(pickle_files)} months")
        
        if pickle_files:
            dfs = []
            
            for pkl_file in pickle_files:
                file_path = new_path / pkl_file
                print(f"  Loading {pkl_file}...")
                try:
                    with open(file_path, 'rb') as f:
                        df_month = pickle.load(f)
                        dfs.append(df_month)
                except Exception as e:
                    print(f"  Warning: Could not load {pkl_file}: {e}")
                    continue
            
            if dfs:
                df = pd.concat(dfs, ignore_index=True)
                df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed', dayfirst=True)
                df = df.sort_values('timestamp').reset_index(drop=True)
                print(f"‚úì Loaded total of {len(df)} rows from {len(dfs)} months")
                return df
    
    # Try old structure: experiment/house_{id}/run_N/house_{id}/summarized/
    old_path = exp_path / f"house_{house_id}" / f"run_{run_number}" / f"house_{house_id}" / "summarized"
    print(f"Trying old structure: {old_path}")
    
    if old_path.exists():
        all_pickle_files = sorted([f for f in os.listdir(old_path) 
                              if f.endswith('.pkl') and f.startswith(f'summarized_{house_id}_')])
        
        # Filter to only required months if specified
        if required_months:
            pickle_files = []
            for pkl_file in all_pickle_files:
                parts = pkl_file.replace('.pkl', '').split('_')
                if len(parts) >= 4:
                    try:
                        month = int(parts[-2])
                        year = int(parts[-1])
                        if (month, year) in required_months:
                            pickle_files.append(pkl_file)
                    except ValueError:
                        continue
            print(f"Loading {len(pickle_files)} out of {len(all_pickle_files)} months (only required months)")
        else:
            pickle_files = all_pickle_files
            print(f"Loading all {len(pickle_files)} months")
        
        if pickle_files:
            dfs = []
            
            for pkl_file in pickle_files:
                file_path = old_path / pkl_file
                print(f"  Loading {pkl_file}...")
                try:
                    with open(file_path, 'rb') as f:
                        df_month = pickle.load(f)
                        dfs.append(df_month)
                except Exception as e:
                    print(f"  Warning: Could not load {pkl_file}: {e}")
                    continue
            
            if dfs:
                df = pd.concat(dfs, ignore_index=True)
                df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed', dayfirst=True)
                df = df.sort_values('timestamp').reset_index(drop=True)
                print(f"‚úì Loaded total of {len(df)} rows from {len(dfs)} months")
                return df
    
    raise FileNotFoundError(
        f"Could not find summarized directory with pickle files for house {house_id}\n"
        f"Tried:\n  - {new_path}\n  - {old_path}"
    )


def filter_data_by_window(df, center_time, hours_before=6, hours_after=6):
    """
    Filter data to a window around the center time.
    Center time should be the midpoint of the event.
    """
    start = center_time - timedelta(hours=hours_before)
    end = center_time + timedelta(hours=hours_after)
    
    print(f"Filtering data: {start} to {end}")
    filtered = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)].copy()
    
    if filtered.empty:
        raise ValueError(f"No data found for window {start} to {end}")
    
    print(f"Found {len(filtered)} rows in time window")
    return filtered

## 4. Plotting Functions

In [None]:
def calculate_y_axis_range(df, phases):
    """Calculate shared y-axis range across all phases."""
    y_min = 0  # Start from 0 for power data
    y_max = 0

    for phase in phases:
        # Check original columns for the max range
        columns = [f'original_{phase}']
        for col in columns:
            if col in df.columns:
                col_max = df[col].max()
                if pd.notna(col_max):
                    y_max = max(y_max, col_max)

    # Add some padding to see everything
    if y_max > 0:
        y_max = y_max * 1.1  # Add 10% padding at top
    else:
        y_max = 100  # Default if no data
    
    print(f"Y-axis range calculated: {y_min} to {y_max}")
    return [y_min, y_max]


def create_device_plot(df, activation_info, device_name, house_id, matches_df=None):
    """
    Create a 4-row x 3-column plot showing power data for all phases.
    Event window (ON-OFF) is shown as shaded background in all rows.
    
    Rows:
    1. Original power
    2. Remaining power (after segregation)
    3. Segregated power (short, medium, long duration)
    4. Events/Matches detected (matched events as connecting lines, unmatched as individual markers)
    
    Columns: w1, w2, w3 phases
    """
    phases = ['w1', 'w2', 'w3']
    
    # Calculate shared Y-axis range from original power data
    y_range = calculate_y_axis_range(df, phases)
    
    # Color scheme matching pipeline visualization
    COLORS = {
        'original': 'black',
        'remaining': 'blue',
        'short': 'red',
        'medium': 'orange',
        'long': 'purple',
        'matched': 'green',
        'unmatched_on': 'red',
        'unmatched_off': 'blue',
    }
    
    fig = make_subplots(
        rows=4, cols=len(phases),
        shared_xaxes=True, shared_yaxes=False,
        subplot_titles=[f"Phase {phase}" for phase in phases],
        vertical_spacing=0.06,
        horizontal_spacing=0.05,
        row_heights=[0.25, 0.25, 0.25, 0.25]
    )
    
    # Get ON/OFF times for shaded background
    on_dt = activation_info['on_dt']
    off_dt = activation_info['off_dt']
    
    print(f"Event window: {on_dt} to {off_dt}")
    
    # Add data traces and shaded backgrounds for each phase
    for col_idx, phase in enumerate(phases, start=1):
        # Add shaded background for event window in all 4 rows
        for row_idx in range(1, 5):
            # Calculate correct yref for plotly subplots
            axis_num = (row_idx - 1) * 3 + col_idx
            yref = 'y domain' if axis_num == 1 else f'y{axis_num} domain'
            
            fig.add_shape(
                type="rect",
                x0=on_dt, x1=off_dt,
                y0=0, y1=1,
                yref=yref,
                fillcolor="rgba(255, 200, 200, 0.3)",
                layer="below",
                line_width=0,
                row=row_idx, col=col_idx
            )
        
        # Row 1: Original data
        original_col = f'original_{phase}'
        if original_col in df.columns:
            fig.add_trace(
                go.Scatter(
                    x=df['timestamp'], y=df[original_col],
                    mode='lines', line=dict(color=COLORS['original'], width=1),
                    name=f'Original' if col_idx == 1 else None,
                    showlegend=(col_idx == 1),
                    legendgroup='original'
                ),
                row=1, col=col_idx
            )
        
        # Row 2: After segregation (remaining)
        remaining_col = f'remaining_{phase}'
        if remaining_col in df.columns:
            fig.add_trace(
                go.Scatter(
                    x=df['timestamp'], y=df[remaining_col],
                    mode='lines', line=dict(color=COLORS['remaining'], width=1),
                    name=f'Remaining' if col_idx == 1 else None,
                    showlegend=(col_idx == 1),
                    legendgroup='remaining'
                ),
                row=2, col=col_idx
            )
        
        # Row 3: Segregated by duration (if columns exist)
        traces_added = 0
        for duration_type in ['short', 'medium', 'long']:
            col_name = f'{duration_type}_duration_{phase}'
            if col_name in df.columns:
                y_data = df[col_name].fillna(0)
                if y_data.sum() > 0:  # Only plot if there's actual data
                    fig.add_trace(
                        go.Scatter(
                            x=df['timestamp'], y=y_data,
                            mode='lines', line=dict(color=COLORS[duration_type], width=1),
                            name=f'{duration_type.capitalize()} duration' if col_idx == 1 else None,
                            showlegend=(col_idx == 1),
                            legendgroup=duration_type
                        ),
                        row=3, col=col_idx
                    )
                    traces_added += 1
        
        if col_idx == 1 and traces_added == 0:
            print(f"‚ö† Warning: No traces added to Row 3 (Segregation Data)")
        
        # Row 4: Events - both matched (connecting lines) and unmatched (individual markers)
        events_added = 0
        matched_count = 0
        unmatched_count = 0
        
        if matches_df is not None and not matches_df.empty:
            # Filter events to the current window AND phase
            window_start = df['timestamp'].min()
            window_end = df['timestamp'].max()

            # Check for phase column
            if 'phase' in matches_df.columns:
                matches_df_copy = matches_df.copy()

                # Convert datetime columns
                for col in ['on_start', 'on_end', 'off_start', 'off_end', 'start', 'end']:
                    if col in matches_df_copy.columns:
                        matches_df_copy[col] = pd.to_datetime(matches_df_copy[col], errors='coerce')

                # Filter to events for THIS PHASE
                phase_events = matches_df_copy[matches_df_copy['phase'] == phase].copy()
                
                # Filter by time window - handle NaN values properly
                if 'on_start' in phase_events.columns and 'start' in phase_events.columns:
                    # For matched events: check on_start is in window
                    # For unmatched events: check start is in window
                    mask = (
                        ((pd.notna(phase_events['on_start'])) & 
                         (phase_events['on_start'] >= window_start) & 
                         (phase_events['on_start'] <= window_end)) |
                        ((pd.notna(phase_events['start'])) & 
                         (phase_events['start'] >= window_start) & 
                         (phase_events['start'] <= window_end))
                    )
                    phase_events = phase_events[mask]
                elif 'on_start' in phase_events.columns:
                    phase_events = phase_events[
                        (pd.notna(phase_events['on_start'])) &
                        (phase_events['on_start'] >= window_start) & 
                        (phase_events['on_start'] <= window_end)
                    ]
                elif 'start' in phase_events.columns:
                    phase_events = phase_events[
                        (pd.notna(phase_events['start'])) &
                        (phase_events['start'] >= window_start) & 
                        (phase_events['start'] <= window_end)
                    ]

                print(f"Events in window for {phase}: {len(phase_events)}")

                # Draw each event
                for idx, event in phase_events.iterrows():
                    # Check if this is a matched event (has both on_start and off_start with non-null values)
                    if pd.notna(event.get('on_start')) and pd.notna(event.get('off_start')):
                        # This is a matched event - draw connecting line
                        on_mag = event.get('on_magnitude', 0)
                        off_mag = event.get('off_magnitude', 0)
                        
                        fig.add_trace(
                            go.Scatter(
                                x=[event['on_start'], event['off_start']],
                                y=[on_mag, off_mag],
                                mode='lines+markers',
                                line=dict(color='green', width=2, dash='solid'),
                                marker=dict(size=8, color=['green', 'red']),
                                showlegend=False,
                                hovertemplate=(
                                    f"<b>Match</b><br>"
                                    f"ON:  {event['on_start']}<br>"
                                    f"OFF: {event['off_start']}<br>"
                                    f"Duration: {event.get('duration', 'N/A'):.0f} min<br>"
                                    f"ON mag:  {on_mag:.0f}W<br>"
                                    f"OFF mag: {off_mag:.0f}W<br>"
                                    f"<extra></extra>"
                                )
                            ),
                            row=4, col=col_idx
                        )
                        events_added += 1
                        matched_count += 1
                    elif pd.notna(event.get('start')) and pd.notna(event.get('magnitude')):
                        # This is an unmatched individual event
                        event_type = event.get('event', 'unknown')
                        color = COLORS['unmatched_on'] if event_type == 'on' else COLORS['unmatched_off']
                        
                        fig.add_trace(
                            go.Scatter(
                                x=[event['start'], event['end']],
                                y=[0, event['magnitude']],
                                mode='lines+markers',
                                line=dict(dash='dash', color=color, width=1),
                                marker=dict(size=6),
                                showlegend=False,
                                hovertemplate=(
                                    f"<b>Unmatched {event_type.upper()}</b><br>"
                                    f"Start: {event['start']}<br>"
                                    f"End: {event['end']}<br>"
                                    f"Magnitude: {event['magnitude']:.0f}W<br>"
                                    f"<extra></extra>"
                                )
                            ),
                            row=4, col=col_idx
                        )
                        events_added += 1
                        unmatched_count += 1
            else:
                if col_idx == 1:
                    print(f"‚ö† Events df missing phase column")

        if col_idx == 1:
            print(f"  Added {events_added} events to Row 4 ({matched_count} matched, {unmatched_count} unmatched)")
    
    # Row titles on y-axis
    row_titles = ["Original Data", "After Segregation", "Segregation Data", "Events"]
    for row_idx, title in enumerate(row_titles, start=1):
        fig.update_yaxes(title_text=title, row=row_idx, col=1)
    
    # Apply shared Y-axis range to all rows and columns
    for row_idx in range(1, 5):
        for col_idx in range(1, 4):
            fig.update_yaxes(range=y_range, row=row_idx, col=col_idx)
    
    # Update layout
    fig.update_layout(
        title=f"{device_name} - House {house_id}<br><sub>{activation_info['label']}</sub>",
        hovermode="x unified",
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        height=900
    )
    
    return fig

## 5. Load Data and Parse Dates

In [None]:
# Parse the device dates
activations = parse_device_dates(DEVICE_DATES)

print(f"Found {len(activations)} activations:")
for i, act in enumerate(activations, 1):
    print(f"  {i}. {act['label']}")

In [None]:
# Load the house data - only for required months
try:
    # First, get the required months from the activations
    required_months = get_required_months(activations)
    print(f"\nRequired months: {sorted(required_months)}")
    print(f"Total: {len(required_months)} months\n")
    
    # Load only the required months
    df = load_summarized_data(EXPERIMENT_DIR, HOUSE_ID, RUN_NUMBER, required_months=required_months)
    print(f"\nLoaded data for house {HOUSE_ID}")
    print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
    print(f"Total rows: {len(df)}")
    print(f"\nAvailable columns: {list(df.columns)}")
    
    # Check for segregation columns
    seg_cols = [col for col in df.columns if 'duration' in col]
    if seg_cols:
        print(f"‚úì Found segregation columns: {seg_cols}")
    else:
        print("‚ö† No segregation columns found (short/medium/long_duration)")
    
    # Load matches for the required months
    print("\n--- Loading matches/events ---")
    matches_dfs = []
    for month, year in required_months:
        matches_df = load_matches_for_month(EXPERIMENT_DIR, HOUSE_ID, RUN_NUMBER, month, year)
        if matches_df is not None:
            print(f"‚úì Loaded matches for {month}/{year}: {len(matches_df)} events")
            matches_dfs.append(matches_df)
        else:
            print(f"  No matches found for {month}/{year}")
    
    if matches_dfs:
        all_matches = pd.concat(matches_dfs, ignore_index=True)
        print(f"\n‚úì Total matches loaded: {len(all_matches)}")
        print(f"Matches columns: {list(all_matches.columns)}")
    else:
        all_matches = None
        print("\n‚ö† No matches data found")
    
except FileNotFoundError as e:
    print(f"Error: {e}")
    print("\nPlease check:")
    print("1. EXPERIMENT_DIR path is correct")
    print("2. HOUSE_ID exists in the experiment")
    print("3. RUN_NUMBER is valid")
    df = None
    all_matches = None

In [None]:
# Check segregation output: verify summarized files have actual segregated data
import pickle, sys
import numpy as np

if not hasattr(sys.modules.get('numpy', None), '_core'):
    sys.modules['numpy._core'] = np.core
    sys.modules['numpy._core.numeric'] = np.core.numeric

exp_path = Path(EXPERIMENT_DIR)
summarized_dir = exp_path / f"run_{RUN_NUMBER}" / f"house_{HOUSE_ID}" / "summarized"

if summarized_dir.exists():
    phases = ['w1', 'w2', 'w3']
    dur_types = ['short', 'medium', 'long']
    months_ok, months_empty, total_kwh = 0, 0, 0.0

    for f in sorted(summarized_dir.glob(f"summarized_{HOUSE_ID}_*.pkl")):
        with open(f, 'rb') as fh:
            month_df = pickle.load(fh)
        s = sum(month_df[c].fillna(0).sum() for c in month_df.columns if 'duration' in c)
        if s > 0:
            months_ok += 1
            total_kwh += s / 60000
        else:
            months_empty += 1
            print(f"  EMPTY: {f.stem}")

    print(f"\nHouse {HOUSE_ID}: {months_ok} months with data, {months_empty} empty, {total_kwh:.1f} kWh total")
    if months_empty == 0:
        print("All months have segregated data")
    else:
        print(f"WARNING: {months_empty} months have no segregated data!")
else:
    print(f"Summarized directory not found: {summarized_dir}")

## 6. Generate Plots

In [None]:
# Generate plots for all activations
if df is not None:
    for i, act in enumerate(activations, 1):
        print(f"\n{'='*60}")
        print(f"Generating plot {i}/{len(activations)}: {act['label']}")
        print(f"{'='*60}")
        
        try:
            filtered_df = filter_data_by_window(df, act['center_time'])
            
            # Debug: show segregation data sums per phase in this window
            phases = ['w1', 'w2', 'w3']
            print(f"\n  Segregation data in filtered window ({len(filtered_df)} rows):")
            for phase in phases:
                phase_sums = []
                for dur in ['short', 'medium', 'long']:
                    col = f'{dur}_duration_{phase}'
                    if col in filtered_df.columns:
                        s = filtered_df[col].fillna(0).sum()
                        phase_sums.append(f"{dur}={s:.0f}")
                    else:
                        phase_sums.append(f"{dur}=NO COL")
                total = sum(filtered_df[f'{d}_duration_{phase}'].fillna(0).sum() 
                           for d in ['short', 'medium', 'long'] 
                           if f'{d}_duration_{phase}' in filtered_df.columns)
                print(f"    {phase}: {', '.join(phase_sums)}  (total={total:.0f}W)")
            
            fig = create_device_plot(filtered_df, act, DEVICE_NAME, HOUSE_ID, matches_df=all_matches)
            fig.show()
        except ValueError as e:
            print(f"  Skipping - {e}")
else:
    print("No data loaded. Please fix the errors above.")

In [None]:
# Create output directory
OUTPUT_DIR = f"./plots/{DEVICE_NAME.replace(' ', '_').lower()}_{HOUSE_ID}"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Install kaleido for image export (only needed once)
try:
    import kaleido
except ImportError:
    print("Installing kaleido for image export...")
    import subprocess
    subprocess.check_call(['pip', 'install', '-q', 'kaleido'])
    import kaleido

if df is not None:
    # Save all plots as HTML and PNG files
    for i, act in enumerate(activations, 1):
        try:
            filtered_df = filter_data_by_window(df, act['center_time'])
            fig = create_device_plot(filtered_df, act, DEVICE_NAME, HOUSE_ID, matches_df=all_matches)
            
            # Create filename from date
            date_str = act['date'].strftime('%Y%m%d')
            time_str = act['on_time'].replace(':', '')
            base_filename = f"{OUTPUT_DIR}/plot_{date_str}_{time_str}"
            
            # Save as HTML (interactive, download to view)
            html_file = f"{base_filename}.html"
            fig.write_html(html_file)
            print(f"‚úì HTML: {html_file}")
            
            # Save as PNG (static image, view directly in Drive)
            png_file = f"{base_filename}.png"
            fig.write_image(png_file, width=1400, height=900)
            print(f"‚úì PNG:  {png_file}")
            
        except ValueError as e:
            print(f"Skipped {act['label']}: {e}")

    print(f"\nüìÅ All plots saved to: {OUTPUT_DIR}")
    print(f"  ‚Ä¢ HTML files - Download to view interactive plots")
    print(f"  ‚Ä¢ PNG files  - View directly in Google Drive")
else:
    print("No data loaded. Cannot save plots.")