# PMU Disturbance Analysis - Temporal Analysis

This notebook performs comprehensive temporal analysis including:
1. Time series decomposition (STL)
2. Anomaly detection (3 methods)
3. Inter-arrival time analysis
4. Change point detection
5. Cyclical patterns (hourly, daily, monthly)
6. Rolling statistics and trend analysis
7. ACF/PACF analysis

**Input**: `outputs/data/cleaned_data.parquet` (from Notebook 01)

**Output**: `outputs/data/temporal_results.csv`, visualizations, insights

In [None]:
# Import libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Import project modules
from src import temporal, visualizations as viz
import config

# Set visualization style
sns.set_style(config.PLOT_SETTINGS['style'])
plt.rcParams['figure.figsize'] = config.DEFAULT_FIGSIZE
plt.rcParams['font.size'] = config.PLOT_SETTINGS['font_size']

print("Libraries loaded successfully!")

## 1. Load Data

In [None]:
# Load cleaned data from Notebook 01
merged_df = pd.read_parquet(config.CLEANED_DATA)
print(f"Loaded {len(merged_df):,} records")
print(f"Columns: {list(merged_df.columns)}")

# Identify datetime column (adjust based on your actual data)
datetime_cols = merged_df.select_dtypes(include=['datetime64']).columns.tolist()
if len(datetime_cols) > 0:
    datetime_col = datetime_cols[0]
    print(f"\nUsing datetime column: {datetime_col}")
else:
    print("\nWARNING: No datetime column found. Please specify manually.")
    datetime_col = 'DateTime'  # Adjust this to match your data

## 2. Time Series Aggregation

In [None]:
# Aggregate disturbances by day
daily_counts = temporal.aggregate_disturbances_by_time(
    merged_df, 
    datetime_col=datetime_col, 
    freq='D'
)

print(f"Daily time series:")
print(f"  Date range: {daily_counts.index.min()} to {daily_counts.index.max()}")
print(f"  Total days: {len(daily_counts)}")
print(f"  Mean daily disturbances: {daily_counts.mean():.2f}")
print(f"  Std daily disturbances: {daily_counts.std():.2f}")
print(f"  Max daily disturbances: {daily_counts.max()}")

# Plot raw time series
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(daily_counts.index, daily_counts.values, linewidth=1.5, color='steelblue')
ax.set_xlabel('Date')
ax.set_ylabel('Daily Disturbance Count')
ax.set_title('PMU Disturbances Over Time', fontsize=14, fontweight='bold')
ax.grid(alpha=0.3)
plt.tight_layout()
viz.save_figure(fig, '02_01_daily_time_series')
plt.show()

## 3. Time Series Decomposition (STL)

In [None]:
# Perform STL decomposition
decomposition = temporal.decompose_time_series(daily_counts, period=7)

# Plot decomposition (static)
fig = viz.plot_time_series_decomposition(
    decomposition,
    title='STL Decomposition of Daily Disturbances',
    interactive=False
)
viz.save_figure(fig, '02_02_stl_decomposition')
plt.show()

# Plot decomposition (interactive)
fig_interactive = viz.plot_time_series_decomposition(
    decomposition,
    title='STL Decomposition of Daily Disturbances (Interactive)',
    interactive=True
)
viz.save_figure(fig_interactive, '02_02_stl_decomposition', static=False, interactive=True)
fig_interactive.show()

## 4. Anomaly Detection

In [None]:
# Detect anomalies using three methods
anomalies_zscore = temporal.detect_anomalies_zscore(daily_counts, threshold=3.0)
anomalies_iqr = temporal.detect_anomalies_iqr(daily_counts, multiplier=1.5)
anomalies_iforest = temporal.detect_anomalies_isolation_forest(daily_counts, contamination=0.1)

print("Anomaly Detection Results:")
print(f"  Z-score (threshold=3.0): {anomalies_zscore.sum()} anomalies detected")
print(f"  IQR (multiplier=1.5): {anomalies_iqr.sum()} anomalies detected")
print(f"  Isolation Forest (contamination=0.1): {anomalies_iforest.sum()} anomalies detected")

# Plot anomalies (static)
anomalies_dict = {
    'Z-score': anomalies_zscore,
    'IQR': anomalies_iqr,
    'Isolation Forest': anomalies_iforest
}

fig = viz.plot_anomalies(
    daily_counts,
    anomalies_dict,
    title='Anomaly Detection: Daily Disturbances',
    interactive=False
)
viz.save_figure(fig, '02_03_anomaly_detection')
plt.show()

# Plot anomalies (interactive)
fig_interactive = viz.plot_anomalies(
    daily_counts,
    anomalies_dict,
    title='Anomaly Detection: Daily Disturbances (Interactive)',
    interactive=True
)
viz.save_figure(fig_interactive, '02_03_anomaly_detection', static=False, interactive=True)
fig_interactive.show()

## 5. Inter-Arrival Time Analysis

In [None]:
# Calculate inter-arrival times
df_with_intervals = temporal.calculate_inter_arrival_times(
    merged_df,
    datetime_col=datetime_col,
    group_by='SectionID'
)

inter_arrival = df_with_intervals['inter_arrival_hours'].dropna()

print(f"Inter-Arrival Time Statistics:")
print(f"  Mean: {inter_arrival.mean():.2f} hours")
print(f"  Median: {inter_arrival.median():.2f} hours")
print(f"  Std: {inter_arrival.std():.2f} hours")
print(f"  Min: {inter_arrival.min():.2f} hours")
print(f"  Max: {inter_arrival.max():.2f} hours")

# Test for Poisson process
poisson_test = temporal.test_poisson_process(inter_arrival, alpha=0.05)
print(f"\nPoisson Process Test:")
if 'error' not in poisson_test:
    print(f"  Conclusion: {poisson_test['conclusion']}")
    print(f"  P-value: {poisson_test['p_value']:.4f}")
    print(f"  Mean inter-arrival: {poisson_test['mean_inter_arrival']:.2f} hours")
else:
    print(f"  Error: {poisson_test['error']}")

# Plot inter-arrival time distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram
axes[0].hist(inter_arrival, bins=50, edgecolor='black', alpha=0.7)
axes[0].set_xlabel('Inter-Arrival Time (hours)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Distribution of Inter-Arrival Times')
axes[0].grid(alpha=0.3)

# Box plot
axes[1].boxplot(inter_arrival, vert=True)
axes[1].set_ylabel('Inter-Arrival Time (hours)')
axes[1].set_title('Inter-Arrival Time Box Plot')
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
viz.save_figure(fig, '02_04_inter_arrival_times')
plt.show()

## 6. Change Point Detection

In [None]:
# Detect change points
change_points = temporal.detect_change_points(
    daily_counts,
    model='rbf',
    min_size=7,
    jump=5,
    pen=3
)

print(f"Change Point Detection:")
print(f"  Number of change points detected: {len(change_points)}")
if len(change_points) > 0:
    change_dates = [daily_counts.index[cp] for cp in change_points if cp < len(daily_counts)]
    print(f"  Change point dates: {change_dates}")

# Plot time series with change points
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(daily_counts.index, daily_counts.values, linewidth=1.5, color='steelblue', label='Daily Counts')

# Mark change points
for cp in change_points:
    if cp < len(daily_counts):
        ax.axvline(daily_counts.index[cp], color='red', linestyle='--', alpha=0.7, linewidth=2)

ax.set_xlabel('Date')
ax.set_ylabel('Daily Disturbance Count')
ax.set_title(f'Change Point Detection ({len(change_points)} points detected)', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(alpha=0.3)
plt.tight_layout()
viz.save_figure(fig, '02_05_change_points')
plt.show()

## 7. Rolling Statistics

In [None]:
# Calculate rolling statistics
rolling_stats = temporal.calculate_rolling_statistics(
    daily_counts,
    windows=config.ROLLING_WINDOWS
)

print("Rolling Statistics Calculated:")
print(rolling_stats.head())

# Plot rolling statistics (static)
fig = viz.plot_rolling_statistics(
    rolling_stats,
    title='Rolling Statistics: Daily Disturbances',
    interactive=False
)
viz.save_figure(fig, '02_06_rolling_statistics')
plt.show()

# Plot rolling statistics (interactive)
fig_interactive = viz.plot_rolling_statistics(
    rolling_stats,
    title='Rolling Statistics: Daily Disturbances (Interactive)',
    interactive=True
)
viz.save_figure(fig_interactive, '02_06_rolling_statistics', static=False, interactive=True)
fig_interactive.show()

## 8. Cyclical Patterns

In [None]:
# Extract cyclical patterns
patterns = temporal.extract_cyclical_patterns(merged_df, datetime_col=datetime_col)

print("Cyclical Pattern Analysis:")
print(f"\nHourly Pattern (top 5 hours):")
print(patterns['hourly'].sort_values(ascending=False).head())
print(f"\nDaily Pattern:")
print(patterns['daily'])
print(f"\nWeekend vs Weekday:")
print(patterns['weekend_vs_weekday'])

# Plot cyclical patterns (static)
fig = viz.plot_cyclical_patterns(
    patterns,
    title='Cyclical Patterns in PMU Disturbances',
    interactive=False
)
viz.save_figure(fig, '02_07_cyclical_patterns')
plt.show()

# Plot cyclical patterns (interactive)
fig_interactive = viz.plot_cyclical_patterns(
    patterns,
    title='Cyclical Patterns in PMU Disturbances (Interactive)',
    interactive=True
)
viz.save_figure(fig_interactive, '02_07_cyclical_patterns', static=False, interactive=True)
fig_interactive.show()

## 9. Calendar Heatmap

In [None]:
# Create calendar heatmap (interactive only)
fig = viz.plot_calendar_heatmap(
    merged_df,
    datetime_col=datetime_col,
    title='PMU Disturbance Calendar Heatmap'
)
viz.save_figure(fig, '02_08_calendar_heatmap', static=False, interactive=True)
fig.show()

## 10. ACF and PACF Analysis

In [None]:
# Calculate ACF and PACF
acf_values, pacf_values = temporal.calculate_acf_pacf(daily_counts, nlags=40)

# Plot ACF and PACF
fig = viz.plot_acf_pacf(
    acf_values,
    pacf_values,
    title='Autocorrelation and Partial Autocorrelation Functions'
)
viz.save_figure(fig, '02_09_acf_pacf')
plt.show()

print("ACF and PACF Analysis:")
print(f"  First 5 ACF values: {acf_values[:5]}")
print(f"  First 5 PACF values: {pacf_values[:5]}")

## 11. Save Results

In [None]:
# Compile temporal analysis results
temporal_results = pd.DataFrame({
    'Analysis': ['Time Series Stats', 'Anomalies (Z-score)', 'Anomalies (IQR)', 
                 'Anomalies (IForest)', 'Change Points', 'Mean Inter-Arrival (hrs)'],
    'Value': [
        f"Mean: {daily_counts.mean():.2f}, Std: {daily_counts.std():.2f}",
        anomalies_zscore.sum(),
        anomalies_iqr.sum(),
        anomalies_iforest.sum(),
        len(change_points),
        f"{inter_arrival.mean():.2f}"
    ]
})

# Save to CSV
temporal_results.to_csv(config.TEMPORAL_RESULTS, index=False)
print(f"\nTemporal analysis results saved to: {config.TEMPORAL_RESULTS}")
print("\nResults Summary:")
display(temporal_results)

## Summary

This notebook has:
- ✅ Decomposed time series into trend, seasonal, and residual components
- ✅ Detected anomalies using 3 different methods
- ✅ Analyzed inter-arrival times and tested for Poisson process
- ✅ Identified change points in disturbance rates
- ✅ Examined cyclical patterns (hourly, daily, monthly)
- ✅ Calculated rolling statistics for trend analysis
- ✅ Performed ACF/PACF analysis
- ✅ Generated 9 visualizations (both static and interactive)

**Key Findings**: Review the outputs above for temporal insights

**Next Steps**: Proceed to Notebook 03 (Causality & Pattern Mining)