# PMU Disturbance Analysis - Operational & Maintenance Insights

PMU age analysis, service patterns, and operational data.

In [None]:
import sys
sys.path.append('..')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

from src import visualizations as viz
import config

sns.set_style(config.PLOT_SETTINGS['style'])
print("Libraries loaded!")

## 1. Load Data

In [None]:
pmu_df = pd.read_csv(Path(config.OUTPUT_DIR) / 'data' / 'pmu_data.csv')
merged_df = pd.read_parquet(config.CLEANED_DATA)

# Parse dates
date_cols = ['InService', 'OutService']
for col in date_cols:
    if col in pmu_df.columns:
        pmu_df[col] = pd.to_datetime(pmu_df[col], errors='coerce')

datetime_cols = merged_df.select_dtypes(include=['datetime64']).columns.tolist()
datetime_col = datetime_cols[0] if datetime_cols else 'DateTime'
print(f"Data loaded. Using datetime column: {datetime_col}")

## 2. PMU Age Analysis

In [None]:
# Calculate PMU age
current_date = pd.Timestamp.now()
if 'InService' in pmu_df.columns:
    pmu_df['Age_Years'] = (current_date - pmu_df['InService']).dt.days / 365.25
    
    print("PMU Age Statistics:")
    print(f"  Mean age: {pmu_df['Age_Years'].mean():.2f} years")
    print(f"  Median age: {pmu_df['Age_Years'].median():.2f} years")
    print(f"  Min age: {pmu_df['Age_Years'].min():.2f} years")
    print(f"  Max age: {pmu_df['Age_Years'].max():.2f} years")
else:
    print("InService column not found")

In [None]:
# Correlate age with disturbances
disturbance_counts = merged_df.groupby('SectionID').size().reset_index(name='DisturbanceCount')
pmu_with_counts = pmu_df.merge(disturbance_counts, on='SectionID', how='left')
pmu_with_counts['DisturbanceCount'] = pmu_with_counts['DisturbanceCount'].fillna(0)

# Plot age vs disturbances
if 'Age_Years' in pmu_with_counts.columns:
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.scatter(pmu_with_counts['Age_Years'], pmu_with_counts['DisturbanceCount'], alpha=0.6)
    ax.set_xlabel('PMU Age (years)')
    ax.set_ylabel('Disturbance Count')
    ax.set_title('PMU Age vs Disturbance Frequency', fontsize=14, fontweight='bold')
    ax.grid(alpha=0.3)
    
    # Add trendline
    z = np.polyfit(pmu_with_counts['Age_Years'].dropna(), 
                   pmu_with_counts.loc[pmu_with_counts['Age_Years'].notna(), 'DisturbanceCount'], 1)
    p = np.poly1d(z)
    ax.plot(pmu_with_counts['Age_Years'].sort_values(), 
            p(pmu_with_counts['Age_Years'].sort_values()), 
            "r--", linewidth=2, label='Trend')
    ax.legend()
    
    plt.tight_layout()
    viz.save_figure(fig, '06_01_age_vs_disturbances')
    plt.show()

## 3. Bathtub Curve Analysis

In [None]:
# Create age bins for bathtub curve
if 'Age_Years' in pmu_with_counts.columns:
    pmu_with_counts['Age_Bin'] = pd.cut(pmu_with_counts['Age_Years'], 
                                         bins=[0, 2, 5, 10, 15, 100],
                                         labels=['0-2', '2-5', '5-10', '10-15', '15+'])
    
    # Calculate failure rate by age group
    bathtub_data = pmu_with_counts.groupby('Age_Bin').agg({
        'DisturbanceCount': 'mean',
        'SectionID': 'count'
    }).rename(columns={'DisturbanceCount': 'Avg_Disturbances', 'SectionID': 'PMU_Count'})
    
    print("Failure Rate by Age Group (Bathtub Curve):")
    display(bathtub_data)
    
    # Plot bathtub curve
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(bathtub_data.index.astype(str), bathtub_data['Avg_Disturbances'], 
            marker='o', linewidth=2, markersize=10, color='darkred')
    ax.set_xlabel('PMU Age Group (years)')
    ax.set_ylabel('Average Disturbances per PMU')
    ax.set_title('Bathtub Curve: Failure Rate vs PMU Age', fontsize=14, fontweight='bold')
    ax.grid(alpha=0.3)
    
    plt.tight_layout()
    viz.save_figure(fig, '06_02_bathtub_curve')
    plt.show()

## 4. PMU Type Comparison

In [None]:
# Identify type column
type_cols = [c for c in pmu_df.columns if 'type' in c.lower()]
if type_cols:
    type_col = type_cols[0]
    
    # Compare disturbances by PMU type
    type_comparison = pmu_with_counts.groupby(type_col).agg({
        'DisturbanceCount': ['mean', 'median', 'std', 'sum'],
        'SectionID': 'count'
    })
    type_comparison.columns = ['Mean', 'Median', 'Std', 'Total', 'PMU_Count']
    
    print(f"Reliability Comparison by {type_col}:")
    display(type_comparison)
    
    # Box plot
    fig, ax = plt.subplots(figsize=(12, 6))
    pmu_with_counts.boxplot(column='DisturbanceCount', by=type_col, ax=ax)
    ax.set_xlabel('PMU Type')
    ax.set_ylabel('Disturbance Count')
    ax.set_title('Disturbance Frequency by PMU Type')
    plt.suptitle('')
    
    plt.tight_layout()
    viz.save_figure(fig, '06_03_type_comparison')
    plt.show()
else:
    print("No PMU type column found")

## 5. Service Pattern Analysis

In [None]:
# Analyze OutService patterns
if 'OutService' in pmu_df.columns:
    out_of_service = pmu_df[pmu_df['OutService'].notna()]
    print(f"PMUs with OutService records: {len(out_of_service)}")
    
    if len(out_of_service) > 0:
        # Calculate service duration
        out_of_service['ServiceDuration_Years'] = (
            out_of_service['OutService'] - out_of_service['InService']
        ).dt.days / 365.25
        
        print("\nService Duration Statistics:")
        print(f"  Mean: {out_of_service['ServiceDuration_Years'].mean():.2f} years")
        print(f"  Median: {out_of_service['ServiceDuration_Years'].median():.2f} years")
else:
    print("No OutService data available")

## 6. Save Results

In [None]:
# Save operational insights
operational_results = pmu_with_counts[['SectionID', 'Age_Years', 'DisturbanceCount']]
operational_results.to_csv(config.OPERATIONAL_RESULTS, index=False)
print(f"Operational results saved to: {config.OPERATIONAL_RESULTS}")

## Summary

- ✅ Analyzed PMU age and correlation with disturbances
- ✅ Created bathtub curve for failure rates
- ✅ Compared reliability across PMU types
- ✅ Analyzed service patterns

**Next**: Notebook 07 (Statistical Validation)