# Network Incident Analysis Pipeline

This notebook processes network incident data and generates pipeline metrics for operational monitoring.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from datetime import datetime, timezone
import logging
import os
import tempfile

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
# Import the ETL functions from the scripts module
import sys
sys.path.append('../scripts')

try:
    from network_incident_etl import transform_incident_frame, log_pipeline_metrics
    print("✅ Successfully imported ETL functions")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Make sure the network_incident_etl.py file exists in the scripts directory")

In [None]:
# Create sample data for testing
sample_data = [
    {
        'opened_at': '2025-04-01T09:00:00Z',
        'u_resolved': '2025-04-01T11:30:00Z',
        'assignment_group': 'Network Operations',
        'short_description': 'WiFi connection issues in building A',
        'u_ci_type': 'WiFi/AP',
        'priority': '2 - High',
        'incident_state': 'Resolved'
    },
    {
        'opened_at': '2025-04-02T14:00:00Z',
        'u_resolved': '2025-04-02T16:45:00Z',
        'assignment_group': 'Network Support',
        'short_description': 'VPN tunnel down for remote users',
        'u_ci_type': 'VPN',
        'priority': '1 - Critical',
        'incident_state': 'Resolved'
    },
    {
        'opened_at': '2025-04-03T10:30:00Z',
        'u_resolved': None,
        'assignment_group': 'Network Operations',
        'short_description': 'Switch port 5 down in data center',
        'u_ci_type': 'Network Infrastructure',
        'priority': '1 - Critical',
        'incident_state': 'In Progress'
    },
    {
        'opened_at': '2025-04-04T08:15:00Z',
        'u_resolved': '2025-04-04T09:30:00Z',
        'assignment_group': 'Application Support',
        'short_description': 'DNS resolution slow for internal apps',
        'u_ci_type': 'DNS',
        'priority': '3 - Moderate',
        'incident_state': 'Resolved'
    },
    {
        'opened_at': '2025-04-05T11:00:00Z',
        'u_resolved': '2025-04-05T12:15:00Z',
        'assignment_group': 'Network Operations',
        'short_description': 'Performance issues with ClearCase build system',
        'u_ci_type': 'Application',
        'priority': '2 - High',
        'incident_state': 'Resolved'
    }
]

# Create DataFrame
raw_df = pd.DataFrame(sample_data)
print(f"✅ Created sample dataset with {len(raw_df)} records")
print("\nSample data preview:")
print(raw_df.head())

In [None]:
# Transform the incident data
try:
    tidy_df = transform_incident_frame(raw_df)
    print(f"✅ Successfully transformed {len(tidy_df)} records")
    print("\nTransformed data preview:")
    print(tidy_df[['short_description', 'patternCategory', 'resolutionTimeHrs', 'isActive', 'userImpactEstimate']].head())
except Exception as e:
    print(f"❌ Error during transformation: {e}")
    # Create a fallback DataFrame if transformation fails
    tidy_df = raw_df.copy()
    tidy_df['patternCategory'] = 'Other_Network'
    tidy_df['resolutionTimeHrs'] = 0
    tidy_df['isActive'] = False
    tidy_df['userImpactEstimate'] = 10

In [None]:
# Set up metrics logging with Windows-compatible path
# Create a temporary file path that works on Windows
temp_dir = tempfile.gettempdir()
csv_fallback_path = os.path.join(temp_dir, 'ops_metrics.csv')

print(f"📁 Using fallback CSV path: {csv_fallback_path}")

# Log pipeline metrics (engine=None means we'll use CSV fallback)
try:
    log_pipeline_metrics(raw_df, tidy_df, engine=None, csv_fallback=csv_fallback_path)
    print("✅ Pipeline metrics logged successfully")
    
    # Check if CSV file was created
    if os.path.exists(csv_fallback_path):
        print(f"📊 Metrics saved to: {csv_fallback_path}")
        # Read and display the metrics
        metrics_df = pd.read_csv(csv_fallback_path)
        print("\nPipeline Metrics:")
        print(metrics_df.to_string(index=False))
    else:
        print("⚠️  CSV file not created - metrics may have been logged to console only")
        
except Exception as e:
    print(f"❌ Error logging metrics: {e}")
    print("Continuing without metrics logging...")

In [None]:
# Display summary statistics
print("📊 PIPELINE SUMMARY")
print("=" * 50)
print(f"Raw records: {len(raw_df)}")
print(f"Transformed records: {len(tidy_df)}")
print(f"Active incidents: {tidy_df['isActive'].sum()}")
print(f"High impact incidents: {tidy_df['isHighImpact'].sum()}")

if 'patternCategory' in tidy_df.columns:
    print("\n📈 Pattern Categories:")
    pattern_counts = tidy_df['patternCategory'].value_counts()
    for pattern, count in pattern_counts.items():
        print(f"  {pattern}: {count}")

if 'resolutionTimeHrs' in tidy_df.columns:
    resolved_incidents = tidy_df[tidy_df['resolutionTimeHrs'] > 0]
    if len(resolved_incidents) > 0:
        avg_resolution = resolved_incidents['resolutionTimeHrs'].mean()
        print(f"\n⏱️  Average resolution time: {avg_resolution:.1f} hours")

print("\n✅ Pipeline execution complete!")