# Device Activations Visualization - Dynamic Threshold

This notebook visualizes device activations from the **unified JSON format** created by the dynamic threshold pipeline.

## What This Notebook Does
- Loads device activations from `device_activations_{house_id}.json`
- Shows all detected devices across multiple threshold runs
- Displays per-minute device power consumption
- Creates interactive timeline plots

## Quick Start
1. Set `EXPERIMENT_DIR` to your experiment output directory (with device_activations JSON)
2. Set `HOUSE_ID`
3. Run all cells

## Filtering Options
You can filter activations by:
- Device type (e.g., "boiler", "central_ac", "regular_ac")
- Threshold/iteration (0=2000W, 1=1500W, 2=1100W, 3=800W)
- Date range
- Match type (matched, unmatched_on, unmatched_off)
- Phase (w1, w2, w3)

## 1. Setup

In [None]:
# Install required packages
# Run this cell first when using in Google Colab
!pip install pandas plotly

In [None]:
# Mount Google Drive (for Colab)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("✓ Google Drive mounted successfully")
except ImportError:
    print("Not running in Colab - skipping drive mount")

In [None]:
import json
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from pathlib import Path
import os

## 2. Configuration

In [None]:
# ============================================
# CONFIGURATION - EDIT THESE VALUES
# ============================================

# House ID
HOUSE_ID = "305"

# Path to experiment output directory containing device_activations_{house_id}.json
# For Colab: mount Google Drive and set path like "/content/drive/MyDrive/experiments/exp010_20260215_120000"
# For local: use relative or absolute path
EXPERIMENT_DIR = "../experiment_pipeline/OUTPUT/experiments/exp010_20260215_120000"

# Filtering options (set to None to show all)
FILTER_DEVICE_TYPE = None  # e.g., "boiler", "central_ac", "regular_ac", or None for all
FILTER_ITERATION = None    # e.g., 0, 1, 2, 3, or None for all iterations
FILTER_PHASE = None        # e.g., "w1", "w2", "w3", or None for all phases
FILTER_MATCH_TYPE = None   # e.g., "matched", "unmatched_on", "unmatched_off", or None for all

# Date range filter (set to None to show all)
FILTER_START_DATE = None  # e.g., "2024-01-01" or None
FILTER_END_DATE = None    # e.g., "2024-01-31" or None

# Display options
MAX_ACTIVATIONS_TO_PLOT = 20  # Maximum number of activations to plot (to avoid slow rendering)

## 3. Load Activations JSON

In [None]:
# Load the JSON file
json_path = Path(EXPERIMENT_DIR) / f"device_activations_{HOUSE_ID}.json"

if not json_path.exists():
    raise FileNotFoundError(
        f"Device activations JSON not found: {json_path}\n\n"
        f"Please check:\n"
        f"1. EXPERIMENT_DIR is correct\n"
        f"2. The experiment was run with the dynamic threshold pipeline\n"
        f"3. The JSON file was generated (should be created automatically)"
    )

with open(json_path, 'r') as f:
    activations_data = json.load(f)

print(f"✓ Loaded device activations for house {activations_data['house_id']}")
print(f"  Generated at: {activations_data['generated_at']}")
print(f"  Threshold schedule: {activations_data['threshold_schedule']}")
print(f"  Total activations: {activations_data['total_activations']}")
print(f"    - Matched: {activations_data['total_matched']}")
print(f"    - Unmatched ON: {activations_data['total_unmatched_on']}")
print(f"    - Unmatched OFF: {activations_data['total_unmatched_off']}")

# Convert to DataFrame for easier filtering
activations_df = pd.DataFrame(activations_data['activations'])

# Convert timestamp strings to datetime
for col in ['on_start', 'on_end', 'off_start', 'off_end']:
    if col in activations_df.columns:
        activations_df[col] = pd.to_datetime(activations_df[col], errors='coerce')

print(f"\n✓ Converted to DataFrame: {len(activations_df)} rows")
print(f"\nColumns: {list(activations_df.columns)}")

## 4. Explore the Data

In [None]:
# Summary statistics
print("=" * 60)
print("ACTIVATIONS SUMMARY")
print("=" * 60)

# By match type
print("\nBy match type:")
print(activations_df['match_type'].value_counts())

# By device type (for matched events)
matched = activations_df[activations_df['match_type'] == 'matched']
if not matched.empty:
    print("\nBy device type (matched only):")
    print(matched['device_type'].value_counts())

# By iteration/threshold
print("\nBy iteration (threshold):")
threshold_map = {i: th for i, th in enumerate(activations_data['threshold_schedule'])}
for iteration, count in activations_df['iteration'].value_counts().sort_index().items():
    threshold = threshold_map.get(iteration, 'N/A')
    print(f"  Iteration {iteration} ({threshold}W): {count} activations")

# By phase
print("\nBy phase:")
print(activations_df['phase'].value_counts())

# Date range
print("\nDate range:")
all_dates = pd.concat([activations_df['on_start'].dropna(), activations_df['off_start'].dropna()])
if not all_dates.empty:
    print(f"  {all_dates.min()} to {all_dates.max()}")

## 5. Filter Activations

In [None]:
# Apply filters
filtered_df = activations_df.copy()

if FILTER_DEVICE_TYPE is not None:
    filtered_df = filtered_df[filtered_df['device_type'] == FILTER_DEVICE_TYPE]
    print(f"Filtered by device_type = {FILTER_DEVICE_TYPE}: {len(filtered_df)} activations")

if FILTER_ITERATION is not None:
    filtered_df = filtered_df[filtered_df['iteration'] == FILTER_ITERATION]
    threshold = activations_data['threshold_schedule'][FILTER_ITERATION]
    print(f"Filtered by iteration = {FILTER_ITERATION} ({threshold}W): {len(filtered_df)} activations")

if FILTER_PHASE is not None:
    filtered_df = filtered_df[filtered_df['phase'] == FILTER_PHASE]
    print(f"Filtered by phase = {FILTER_PHASE}: {len(filtered_df)} activations")

if FILTER_MATCH_TYPE is not None:
    filtered_df = filtered_df[filtered_df['match_type'] == FILTER_MATCH_TYPE]
    print(f"Filtered by match_type = {FILTER_MATCH_TYPE}: {len(filtered_df)} activations")

if FILTER_START_DATE is not None:
    start_dt = pd.to_datetime(FILTER_START_DATE)
    # Filter by on_start for matched/unmatched_on, off_start for unmatched_off
    mask = (
        ((filtered_df['on_start'].notna()) & (filtered_df['on_start'] >= start_dt)) |
        ((filtered_df['off_start'].notna()) & (filtered_df['off_start'] >= start_dt))
    )
    filtered_df = filtered_df[mask]
    print(f"Filtered by start_date >= {FILTER_START_DATE}: {len(filtered_df)} activations")

if FILTER_END_DATE is not None:
    end_dt = pd.to_datetime(FILTER_END_DATE)
    mask = (
        ((filtered_df['on_start'].notna()) & (filtered_df['on_start'] <= end_dt)) |
        ((filtered_df['off_start'].notna()) & (filtered_df['off_start'] <= end_dt))
    )
    filtered_df = filtered_df[mask]
    print(f"Filtered by start_date <= {FILTER_END_DATE}: {len(filtered_df)} activations")

# Limit to avoid rendering too many plots
if len(filtered_df) > MAX_ACTIVATIONS_TO_PLOT:
    print(f"\n⚠ Warning: {len(filtered_df)} activations found, showing first {MAX_ACTIVATIONS_TO_PLOT}")
    print(f"  (Adjust MAX_ACTIVATIONS_TO_PLOT to show more)")
    filtered_df = filtered_df.head(MAX_ACTIVATIONS_TO_PLOT)

print(f"\n✓ Final count: {len(filtered_df)} activations to display")

# Show sample
if not filtered_df.empty:
    print("\nSample (first 5):")
    display_cols = ['phase', 'on_start', 'off_start', 'on_magnitude', 'duration', 'device_type', 'match_type', 'iteration']
    print(filtered_df[display_cols].head())

## 6. Visualization Functions

In [None]:
def plot_single_activation(activation, index=None):
    """
    Plot a single device activation showing:
    - ON and OFF events (magnitude vs time)
    - Per-minute device power consumption (if available)
    """
    match_type = activation['match_type']
    phase = activation['phase']
    iteration = activation['iteration']
    threshold = activations_data['threshold_schedule'][iteration]
    
    # Build title
    if match_type == 'matched':
        device_type = activation['device_type'] or 'Unknown'
        duration = activation['duration']
        title = f"{device_type.upper()} - {phase.upper()} - Iteration {iteration} ({threshold}W)<br>"
        title += f"<sub>{activation['on_start']} to {activation['off_start']} ({duration:.0f} min)</sub>"
    elif match_type == 'unmatched_on':
        title = f"UNMATCHED ON - {phase.upper()} - Iteration {iteration} ({threshold}W)<br>"
        title += f"<sub>{activation['on_start']} ({activation['on_duration']:.0f} min)</sub>"
    else:  # unmatched_off
        title = f"UNMATCHED OFF - {phase.upper()} - Iteration {iteration} ({threshold}W)<br>"
        title += f"<sub>{activation['off_start']} ({activation['off_duration']:.0f} min)</sub>"
    
    if index is not None:
        title = f"[{index}] " + title
    
    # Create figure
    fig = go.Figure()
    
    # Plot ON/OFF events as markers
    if pd.notna(activation['on_start']) and pd.notna(activation['on_magnitude']):
        fig.add_trace(go.Scatter(
            x=[activation['on_start'], activation['on_end']],
            y=[0, activation['on_magnitude']],
            mode='lines+markers',
            name='ON Event',
            line=dict(color='green', width=3),
            marker=dict(size=10, color='green'),
            hovertemplate=(
                f"<b>ON Event</b><br>"
                f"Start: {activation['on_start']}<br>"
                f"End: {activation['on_end']}<br>"
                f"Magnitude: {activation['on_magnitude']:.0f}W<br>"
                f"Duration: {activation['on_duration']:.0f} min<br>"
                f"<extra></extra>"
            )
        ))
    
    if pd.notna(activation['off_start']) and pd.notna(activation['off_magnitude']):
        fig.add_trace(go.Scatter(
            x=[activation['off_start'], activation['off_end']],
            y=[0, activation['off_magnitude']],
            mode='lines+markers',
            name='OFF Event',
            line=dict(color='red', width=3),
            marker=dict(size=10, color='red'),
            hovertemplate=(
                f"<b>OFF Event</b><br>"
                f"Start: {activation['off_start']}<br>"
                f"End: {activation['off_end']}<br>"
                f"Magnitude: {activation['off_magnitude']:.0f}W<br>"
                f"Duration: {activation['off_duration']:.0f} min<br>"
                f"<extra></extra>"
            )
        ))
    
    # Plot per-minute device power consumption if available
    if activation['values'] is not None and len(activation['values']) > 0:
        # Build timestamps for values array
        # Values span from on_start to off_end (or on_end for unmatched)
        if pd.notna(activation['on_start']):
            start_time = activation['on_start']
            if pd.notna(activation['off_end']):
                end_time = activation['off_end']
            else:
                end_time = activation['on_end']
        else:
            start_time = activation['off_start']
            end_time = activation['off_end']
        
        # Generate minute-by-minute timestamps
        num_values = len(activation['values'])
        timestamps = pd.date_range(start=start_time, periods=num_values, freq='1min')
        
        # Replace None with 0
        values = [v if v is not None else 0 for v in activation['values']]
        
        fig.add_trace(go.Scatter(
            x=timestamps,
            y=values,
            mode='lines',
            name='Device Power',
            line=dict(color='blue', width=1.5),
            fill='tozeroy',
            fillcolor='rgba(0, 100, 255, 0.2)',
            hovertemplate='<b>Device Power</b><br>Time: %{x}<br>Power: %{y:.0f}W<extra></extra>'
        ))
    
    # Layout
    fig.update_layout(
        title=title,
        xaxis_title='Time',
        yaxis_title='Power (W)',
        hovermode='x unified',
        height=400,
        showlegend=True
    )
    
    return fig


def plot_timeline_overview(activations_df, threshold_schedule):
    """
    Create a timeline overview showing all activations.
    Each activation is a horizontal bar from on_start to off_end.
    Color by device type, facet by phase.
    """
    if activations_df.empty:
        print("No activations to plot")
        return None
    
    # Prepare data
    df = activations_df.copy()
    
    # Calculate start and end for each activation
    df['start'] = df.apply(
        lambda row: row['on_start'] if pd.notna(row['on_start']) else row['off_start'],
        axis=1
    )
    df['end'] = df.apply(
        lambda row: row['off_end'] if pd.notna(row['off_end']) else (
            row['on_end'] if pd.notna(row['on_end']) else row['start']
        ),
        axis=1
    )
    
    # Drop rows with no valid times
    df = df[df['start'].notna() & df['end'].notna()]
    
    if df.empty:
        print("No valid activations to plot")
        return None
    
    # Sort by start time
    df = df.sort_values('start')
    
    # Color mapping
    device_colors = {
        'boiler': 'red',
        'central_ac': 'blue',
        'regular_ac': 'green',
        'other': 'gray',
        None: 'lightgray'
    }
    
    # Create subplots for 3 phases
    phases = ['w1', 'w2', 'w3']
    fig = make_subplots(
        rows=3, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.05,
        subplot_titles=[f"Phase {p}" for p in phases]
    )
    
    for row_idx, phase in enumerate(phases, start=1):
        phase_df = df[df['phase'] == phase]
        
        for idx, activation in phase_df.iterrows():
            device_type = activation.get('device_type') or 'other'
            color = device_colors.get(device_type, 'gray')
            
            # Label
            if activation['match_type'] == 'matched':
                label = f"{device_type} (iter {activation['iteration']})"
            else:
                label = f"{activation['match_type']} (iter {activation['iteration']})"
            
            # Add horizontal bar
            fig.add_trace(
                go.Scatter(
                    x=[activation['start'], activation['end']],
                    y=[idx, idx],
                    mode='lines+markers',
                    line=dict(color=color, width=8),
                    marker=dict(size=6, color=color),
                    name=label,
                    showlegend=False,
                    hovertemplate=(
                        f"<b>{label}</b><br>"
                        f"Start: {activation['start']}<br>"
                        f"End: {activation['end']}<br>"
                        f"Duration: {activation.get('duration', 'N/A')} min<br>"
                        f"<extra></extra>"
                    )
                ),
                row=row_idx, col=1
            )
    
    fig.update_layout(
        title=f"Device Activations Timeline - House {HOUSE_ID}",
        xaxis_title='Time',
        height=800,
        hovermode='closest'
    )
    
    return fig

## 7. Timeline Overview

In [None]:
# Plot timeline overview
if not filtered_df.empty:
    timeline_fig = plot_timeline_overview(filtered_df, activations_data['threshold_schedule'])
    if timeline_fig:
        timeline_fig.show()
else:
    print("No activations to display")

## 8. Individual Activation Plots

In [None]:
# Plot each activation individually
if not filtered_df.empty:
    print(f"Plotting {len(filtered_df)} activations...\n")
    
    for idx, (_, activation) in enumerate(filtered_df.iterrows(), start=1):
        fig = plot_single_activation(activation, index=idx)
        fig.show()
else:
    print("No activations to display")

## 9. Export to Files (Optional)

In [None]:
# Create output directory
OUTPUT_DIR = f"./plots/activations_{HOUSE_ID}"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Install kaleido for image export (only needed once)
try:
    import kaleido
except ImportError:
    print("Installing kaleido for image export...")
    import subprocess
    subprocess.check_call(['pip', 'install', '-q', 'kaleido'])
    import kaleido

# Save timeline overview
if not filtered_df.empty:
    timeline_fig = plot_timeline_overview(filtered_df, activations_data['threshold_schedule'])
    if timeline_fig:
        timeline_fig.write_html(f"{OUTPUT_DIR}/timeline_overview.html")
        timeline_fig.write_image(f"{OUTPUT_DIR}/timeline_overview.png", width=1400, height=800)
        print(f"✓ Saved timeline overview")
    
    # Save individual plots
    for idx, (_, activation) in enumerate(filtered_df.iterrows(), start=1):
        fig = plot_single_activation(activation, index=idx)
        
        # Create filename
        match_type = activation['match_type']
        start_time = activation['on_start'] if pd.notna(activation['on_start']) else activation['off_start']
        date_str = start_time.strftime('%Y%m%d_%H%M')
        filename = f"{OUTPUT_DIR}/activation_{idx:03d}_{match_type}_{activation['phase']}_{date_str}"
        
        fig.write_html(f"{filename}.html")
        fig.write_image(f"{filename}.png", width=1200, height=400)
    
    print(f"\n✓ Saved {len(filtered_df)} activation plots to {OUTPUT_DIR}")
    print(f"  • HTML files - Download to view interactive plots")
    print(f"  • PNG files  - View directly in Google Drive")
else:
    print("No activations to save")