# Device Usage Plot Generator

This notebook generates visualizations for specific device activations detected in the experiment analysis.

## What This Notebook Does
- Parses a list of dates/times from the experiment report
- Generates 12-hour window plots for each activation
- Shows original power, remaining power, and segmented power for all 3 phases

## Where to Get the Data
1. Open the **House Report** (house_X.html) from the experiment analysis
2. Go to the **Device Detection** section (Central AC, Regular AC, or Boiler)
3. Click the **"Show Copyable Dates"** button below the table
4. Copy the text from the textarea that appears
5. Paste it in the `DEVICE_DATES` variable below

## Supported Date Formats
The notebook supports both formats (can be mixed in the same list):

| Format | Example |
|--------|---------|
| DD/MM/YYYY HH:MM-HH:MM | `10/01/2024 08:30-14:15` |
| YYYY-MM-DD HH:MM-HH:MM | `2022-11-11 08:41-09:28` |

### Example Input
```
10/01/2024 08:30-14:15, 2022-11-11 08:41-09:28, 15/01/2024 10:00-16:45
```

## 1. Setup

In [None]:
# Install required packages (uncomment if running in Colab)
# !pip install pandas plotly

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from pathlib import Path
import os
import re

## 2. Configuration

### Set Your Parameters Here

In [None]:
# ============================================
# CONFIGURATION - EDIT THESE VALUES
# ============================================

# House ID (from the report)
HOUSE_ID = "10"

# Path to experiment output directory
# For Colab: mount Google Drive and set path like "/content/drive/MyDrive/experiments/..."
# For local: use relative or absolute path
EXPERIMENT_DIR = "../experiment_pipeline/OUTPUT/experiments/your_experiment_name"

# Run number (usually 0 for first iteration)
RUN_NUMBER = 0

# ============================================
# PASTE DEVICE DATES HERE
# Copy from "Show Copyable Dates" button in report
# ============================================
# Supported formats (can be mixed):
#   - DD/MM/YYYY HH:MM-HH:MM (e.g., "10/01/2024 08:30-14:15")
#   - YYYY-MM-DD HH:MM-HH:MM (e.g., "2022-11-11 08:41-09:28")
DEVICE_DATES = """
10/01/2024 08:30-14:15, 2022-11-11 08:41-09:28, 15/01/2024 10:00-16:45
"""

# Device name (for plot titles)
DEVICE_NAME = "Central AC"  # Options: "Central AC", "Regular AC", "Boiler"

## 3. Helper Functions

In [None]:
def parse_device_dates(dates_string):
    """
    Parse device dates from the copied text.
    
    Supports two formats:
    - "DD/MM/YYYY HH:MM-HH:MM" (e.g., "10/01/2024 08:30-14:15")
    - "YYYY-MM-DD HH:MM-HH:MM" (e.g., "2022-11-11 08:41-09:28")
    
    Returns list of dicts with:
        - date: datetime object
        - on_time: start time string
        - off_time: end time string  
        - center_time: datetime for plot centering
    """
    activations = []
    
    # Split by comma or newline
    parts = []
    for line in dates_string.strip().split('\n'):
        for p in line.split(','):
            if p.strip():
                parts.append(p.strip())
    
    for part in parts:
        date_obj = None
        on_time = None
        off_time = None
        
        # Try format 1: "YYYY-MM-DD HH:MM-HH:MM"
        match = re.match(r'(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})-(\d{2}:\d{2})', part)
        if match:
            date_str, on_time, off_time = match.groups()
            date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        
        # Try format 2: "DD/MM/YYYY HH:MM-HH:MM"
        if not date_obj:
            match = re.match(r'(\d{2}/\d{2}/\d{4})\s+(\d{2}:\d{2})-(\d{2}:\d{2})', part)
            if match:
                date_str, on_time, off_time = match.groups()
                date_obj = datetime.strptime(date_str, '%d/%m/%Y')
        
        if date_obj and on_time and off_time:
            on_hour, on_min = map(int, on_time.split(':'))
            center = date_obj.replace(hour=on_hour, minute=on_min)
            label = f"{date_obj.strftime('%d/%m/%Y')} {on_time}-{off_time}"
            
            activations.append({
                'date': date_obj,
                'on_time': on_time,
                'off_time': off_time,
                'center_time': center,
                'label': label
            })
    
    return activations


def load_summarized_data(experiment_dir, house_id, run_number=0):
    """
    Load summarized data for a house.
    
    Supports both new (run_N/house_X/summarized/) and old structures.
    """
    exp_path = Path(experiment_dir)
    
    # Try new structure: experiment/run_N/house_{id}/summarized/
    new_path = exp_path / f"run_{run_number}" / f"house_{house_id}" / "summarized"
    if new_path.exists():
        files = sorted(new_path.glob(f"summarized_{house_id}_*.csv"))
        if files:
            dfs = [pd.read_csv(f) for f in files]
            df = pd.concat(dfs, ignore_index=True)
            df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed', dayfirst=True)
            return df
    
    # Try old structure: experiment/house_{id}/run_N/house_{id}/
    old_path = exp_path / f"house_{house_id}" / f"run_{run_number}" / f"house_{house_id}"
    if old_path.exists():
        # Check summarized subfolder
        summarized_subdir = old_path / "summarized"
        if summarized_subdir.exists():
            files = sorted(summarized_subdir.glob(f"summarized_{house_id}_*.csv"))
            if files:
                dfs = [pd.read_csv(f) for f in files]
                df = pd.concat(dfs, ignore_index=True)
                df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed', dayfirst=True)
                return df
        # Check direct file
        direct_file = old_path / f"summarized_{house_id}.csv"
        if direct_file.exists():
            df = pd.read_csv(direct_file)
            df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed', dayfirst=True)
            return df
    
    raise FileNotFoundError(f"Could not find summarized data for house {house_id}")


def filter_data_by_window(df, center_time, hours_before=6, hours_after=6):
    """
    Filter data to a window around the center time.
    """
    start = center_time - timedelta(hours=hours_before)
    end = center_time + timedelta(hours=hours_after)
    
    filtered = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)].copy()
    
    if filtered.empty:
        raise ValueError(f"No data found for window {start} to {end}")
    
    return filtered

## 4. Plotting Functions

In [None]:
def calculate_y_axis_range(df, phases):
    """Calculate shared y-axis range across all phases."""
    y_min = float('inf')
    y_max = float('-inf')

    for phase in phases:
        columns = [f'original_{phase}', f'remaining_{phase}', 
                   f'short_duration_{phase}', f'medium_duration_{phase}', f'long_duration_{phase}']
        for col in columns:
            if col in df.columns:
                y_min = min(y_min, df[col].min())
                y_max = max(y_max, df[col].max())

    return [y_min, y_max]


def create_device_plot(df, activation_info, device_name, house_id):
    """
    Create a 4-row x 3-column plot showing power data for all phases.
    
    Rows:
    1. Original power
    2. Remaining power (after segmentation)
    3. Segmented power (short, medium, long duration)
    4. Event markers
    
    Columns: w1, w2, w3 phases
    """
    phases = ['w1', 'w2', 'w3']
    
    # Color scheme matching project
    COLORS = {
        'original': 'black',
        'remaining': 'blue',
        'short': 'green',
        'medium': 'orange',
        'long': 'purple',
    }
    
    fig = make_subplots(
        rows=4, cols=len(phases),
        shared_xaxes=True, shared_yaxes=True,
        subplot_titles=[f"Phase {phase}" for phase in phases]
    )
    
    # Calculate shared y-axis range
    y_range = calculate_y_axis_range(df, phases)
    
    # Calculate ON/OFF times for vertical lines
    on_dt = activation_info['center_time']
    off_hour, off_min = map(int, activation_info['off_time'].split(':'))
    off_dt = activation_info['date'].replace(hour=off_hour, minute=off_min)
    
    # Handle overnight events
    if off_dt < on_dt:
        off_dt += timedelta(days=1)
    
    for col_idx, phase in enumerate(phases, start=1):
        # Row 1: Original data
        original_col = f'original_{phase}'
        if original_col in df.columns:
            fig.add_trace(
                go.Scatter(
                    x=df['timestamp'], y=df[original_col],
                    mode='lines', line=dict(color=COLORS['original']),
                    showlegend=False
                ),
                row=1, col=col_idx
            )
        
        # Row 2: After segregation (remaining)
        remaining_col = f'remaining_{phase}'
        if remaining_col in df.columns:
            fig.add_trace(
                go.Scatter(
                    x=df['timestamp'], y=df[remaining_col],
                    mode='lines', line=dict(color=COLORS['remaining']),
                    showlegend=False
                ),
                row=2, col=col_idx
            )
        
        # Row 3: Segregated by duration
        for duration_type in ['short', 'medium', 'long']:
            col_name = f'{duration_type}_duration_{phase}'
            if col_name in df.columns:
                fig.add_trace(
                    go.Scatter(
                        x=df['timestamp'], y=df[col_name],
                        mode='lines', line=dict(color=COLORS[duration_type]),
                        showlegend=False
                    ),
                    row=3, col=col_idx
                )
        
        # Row 4: Event markers (ON/OFF vertical lines)
        fig.add_trace(
            go.Scatter(
                x=[on_dt, on_dt], y=[0, y_range[1]],
                mode='lines', line=dict(color='green', dash='dash', width=2),
                name='ON', showlegend=(col_idx == 1)
            ),
            row=4, col=col_idx
        )
        fig.add_trace(
            go.Scatter(
                x=[off_dt, off_dt], y=[0, y_range[1]],
                mode='lines', line=dict(color='red', dash='dash', width=2),
                name='OFF', showlegend=(col_idx == 1)
            ),
            row=4, col=col_idx
        )
    
    # Add legend entries
    legend_items = [
        ('Original', COLORS['original']),
        ('Remaining', COLORS['remaining']),
        ('Short duration', COLORS['short']),
        ('Medium duration', COLORS['medium']),
        ('Long duration', COLORS['long']),
    ]
    for name, color in legend_items:
        fig.add_trace(go.Scatter(
            x=[None], y=[None], mode='lines',
            name=name, line=dict(color=color), showlegend=True
        ))
    
    # Row titles on y-axis
    row_titles = ["Original Data", "After Segregation", "Segregation Data", "Event Markers"]
    for row_idx, title in enumerate(row_titles, start=1):
        fig.update_yaxes(title_text=title, row=row_idx, col=1)
    
    fig.update_layout(
        title=f"{device_name} - House {house_id}<br><sub>{activation_info['label']}</sub>",
        hovermode="x unified",
        showlegend=True,
        yaxis_range=y_range
    )
    
    return fig

## 5. Load Data and Parse Dates

In [None]:
# Parse the device dates
activations = parse_device_dates(DEVICE_DATES)

print(f"Found {len(activations)} activations:")
for i, act in enumerate(activations, 1):
    print(f"  {i}. {act['label']}")

In [None]:
# Load the house data
try:
    df = load_summarized_data(EXPERIMENT_DIR, HOUSE_ID, RUN_NUMBER)
    print(f"Loaded data for house {HOUSE_ID}")
    print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
    print(f"Columns: {list(df.columns)}")
except FileNotFoundError as e:
    print(f"Error: {e}")
    print("\nPlease check:")
    print("1. EXPERIMENT_DIR path is correct")
    print("2. HOUSE_ID exists in the experiment")
    print("3. RUN_NUMBER is valid")

## 6. Generate Plots

### Option A: Generate All Plots

In [None]:
# Generate plots for all activations
for i, act in enumerate(activations, 1):
    print(f"\nGenerating plot {i}/{len(activations)}: {act['label']}")
    
    try:
        filtered_df = filter_data_by_window(df, act['center_time'])
        fig = create_device_plot(filtered_df, act, DEVICE_NAME, HOUSE_ID)
        fig.show()
    except ValueError as e:
        print(f"  Skipping - {e}")

### Option B: Generate Single Plot by Index

In [None]:
# Change this to select which activation to plot (1-based index)
PLOT_INDEX = 1

if 1 <= PLOT_INDEX <= len(activations):
    act = activations[PLOT_INDEX - 1]
    print(f"Generating plot for: {act['label']}")
    
    try:
        filtered_df = filter_data_by_window(df, act['center_time'])
        fig = create_device_plot(filtered_df, act, DEVICE_NAME, HOUSE_ID)
        fig.show()
    except ValueError as e:
        print(f"Error: {e}")
else:
    print(f"Invalid index. Choose 1-{len(activations)}")

## 7. Save Plots to Files

In [None]:
# Create output directory
OUTPUT_DIR = f"./plots/{DEVICE_NAME.replace(' ', '_').lower()}_{HOUSE_ID}"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Save all plots as HTML files
for i, act in enumerate(activations, 1):
    try:
        filtered_df = filter_data_by_window(df, act['center_time'])
        fig = create_device_plot(filtered_df, act, DEVICE_NAME, HOUSE_ID)
        
        # Create filename from date
        date_str = act['date'].strftime('%Y%m%d')
        time_str = act['on_time'].replace(':', '')
        filename = f"{OUTPUT_DIR}/plot_{date_str}_{time_str}.html"
        
        fig.write_html(filename)
        print(f"Saved: {filename}")
    except ValueError as e:
        print(f"Skipped {act['label']}: {e}")

print(f"\nAll plots saved to: {OUTPUT_DIR}")