# Deliverable 2: AI Insight Extraction Framework

**Objective:** Build a structured framework that uses Claude API to automatically extract actionable insights from operational data — trends, anomalies, correlations, facility comparisons, risk assessments, and executive summaries.

**What this notebook demonstrates:**
1. Loading and processing operational metrics data
2. Statistical analysis as input for AI interpretation
3. Six types of AI-generated insights using prompt engineering
4. Visualizations paired with AI narrative for each analysis type
5. Structured output suitable for automated reporting

---

## Setup

In [None]:
import sys
sys.path.insert(0, '../..')  # project root

import importlib.util
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path

from shared.data_generation.generate import generate_operational_metrics
from shared.utils.processing import run_full_processing
from shared.utils.plotting import apply_portfolio_style, save_figure, COLORS

# Helper to import from digit-prefixed folders
def import_from(module_path, module_name):
    spec = importlib.util.spec_from_file_location(module_name, module_path)
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    return mod

pd.set_option('display.max_columns', 20)
pd.set_option('display.float_format', '{:.2f}'.format)
print('Setup complete.')

## 1. Load & Process Data

In [None]:
# Load data (generate if needed)
data_path = Path('../data/operational_metrics.csv')
if data_path.exists():
    raw_df = pd.read_csv(data_path, parse_dates=['date'])
    print(f'Loaded existing data: {len(raw_df)} rows')
else:
    raw_df = generate_operational_metrics()
    from shared.data_generation.generate import save_data
    save_data(raw_df, '../data')
    print(f'Generated new data: {len(raw_df)} rows')

# Run processing pipeline
processed = run_full_processing(raw_df)
for name, df in processed.items():
    print(f'  {name}: {df.shape}')

## 2. Trend Analysis

We compute month-over-month changes and visualize trends, then ask Claude to interpret them.

In [None]:
trends = processed['trends']

# Average MoM change by metric and facility
avg_mom = trends.groupby(['facility', 'metric'])['mom_change'].mean().reset_index()
avg_mom.columns = ['facility', 'metric', 'avg_mom_change_pct']
avg_mom = avg_mom.sort_values('avg_mom_change_pct')

fig = px.bar(
    avg_mom, x='metric', y='avg_mom_change_pct', color='facility',
    barmode='group', color_discrete_map=COLORS,
)
fig = apply_portfolio_style(fig, 'Average Month-over-Month Change by Metric')
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500)
fig.add_hline(y=0, line_dash='dash', line_color='gray')
save_figure(fig, '../outputs/trend_analysis')
fig.show()

In [None]:
# Trend volatility: how consistent are the MoM changes?
vol = trends.groupby(['facility', 'metric'])['mom_change'].std().reset_index()
vol.columns = ['facility', 'metric', 'mom_volatility']

fig = px.bar(
    vol.sort_values('mom_volatility', ascending=False),
    x='metric', y='mom_volatility', color='facility',
    barmode='group', color_discrete_map=COLORS,
)
fig = apply_portfolio_style(fig, 'Trend Volatility (MoM Change Std Dev)')
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500)
save_figure(fig, '../outputs/trend_volatility')
fig.show()

In [None]:
# AI Trend Analysis
try:
    extractor = import_from('../../02_insight_extraction/src/extractor.py', 'extractor')
    trend_insight = extractor.analyze_trends(trends.tail(200).to_csv(index=False))
    print('=== AI TREND ANALYSIS ===')
    print(trend_insight)
except Exception as e:
    print(f'AI analysis unavailable: {e}')
    print('Set ANTHROPIC_API_KEY in .env to enable.')

## 3. Anomaly Detection & Assessment

Z-score based anomaly detection flags unusual data points. Claude assesses their business significance.

In [None]:
anomalies = processed['anomalies']
print(f'Total anomalies detected: {len(anomalies)}')
print(f'\nBy facility:')
print(anomalies['facility'].value_counts())
print(f'\nBy metric:')
print(anomalies['metric'].value_counts())
anomalies.sort_values('z_score', key=abs, ascending=False).head(10)

In [None]:
# Anomaly timeline
if len(anomalies) > 0:
    fig = px.scatter(
        anomalies, x='date', y='z_score', color='facility',
        symbol='metric', size=anomalies['z_score'].abs(),
        hover_data=['metric', 'value', 'unit'],
        color_discrete_map=COLORS,
    )
    fig.add_hline(y=2, line_dash='dash', line_color='orange', annotation_text='Warning')
    fig.add_hline(y=-2, line_dash='dash', line_color='orange')
    fig.add_hline(y=3, line_dash='dash', line_color='red', annotation_text='Critical')
    fig.add_hline(y=-3, line_dash='dash', line_color='red')
    fig = apply_portfolio_style(fig, 'Anomaly Timeline with Severity Bands')
    fig.update_layout(height=500)
    save_figure(fig, '../outputs/anomaly_timeline')
    fig.show()

In [None]:
# AI Anomaly Assessment
try:
    anomaly_insight = extractor.analyze_anomalies(anomalies.to_csv(index=False))
    print('=== AI ANOMALY ASSESSMENT ===')
    print(anomaly_insight)
except Exception as e:
    print(f'AI analysis unavailable: {e}')

## 4. Cross-Metric Correlations

Understand which metrics move together — revealing causal relationships and operational levers.

In [None]:
corr = processed['correlations']

# Heatmap for each facility
for facility in raw_df['facility'].unique():
    fac_corr = corr[corr['facility'] == facility]
    metrics = sorted(set(fac_corr['metric_1']) | set(fac_corr['metric_2']))
    matrix = pd.DataFrame(1.0, index=metrics, columns=metrics)
    for _, row in fac_corr.iterrows():
        matrix.loc[row['metric_1'], row['metric_2']] = row['correlation']
        matrix.loc[row['metric_2'], row['metric_1']] = row['correlation']

    fig = px.imshow(
        matrix, text_auto='.2f', color_continuous_scale='RdBu_r',
        zmin=-1, zmax=1,
    )
    fig = apply_portfolio_style(fig, f'Metric Correlation Matrix — {facility}')
    fig.update_layout(height=500, width=600)
    save_figure(fig, f'../outputs/correlation_{facility.lower().replace(" ", "_")}')
    fig.show()

In [None]:
# Strongest correlations across all facilities
strong = corr[corr['correlation'].abs() > 0.5].sort_values('correlation', key=abs, ascending=False)
print(f'Strong correlations (|r| > 0.5): {len(strong)}')
strong.head(15)

In [None]:
# AI Correlation Analysis
try:
    if len(strong) > 0:
        corr_insight = extractor.analyze_correlations(strong.to_csv(index=False))
        print('=== AI CORRELATION ANALYSIS ===')
        print(corr_insight)
    else:
        print('No strong correlations to analyze.')
except Exception as e:
    print(f'AI analysis unavailable: {e}')

## 5. Facility Comparison

In [None]:
summary = processed['summary']

# Radar chart comparing facilities
# Normalize metrics to 0-100 scale for comparison
radar_data = summary.pivot_table(index='metric', columns='facility', values='mean')
radar_norm = radar_data.apply(lambda x: (x - x.min()) / (x.max() - x.min()) * 100, axis=1)

fig = go.Figure()
for facility in radar_norm.columns:
    fig.add_trace(go.Scatterpolar(
        r=radar_norm[facility].tolist() + [radar_norm[facility].iloc[0]],
        theta=radar_norm.index.tolist() + [radar_norm.index[0]],
        fill='toself', name=facility, opacity=0.6,
        line=dict(color=COLORS.get(facility)),
    ))

fig = apply_portfolio_style(fig, 'Facility Performance Radar (Normalized)')
fig.update_layout(height=550, polar=dict(radialaxis=dict(visible=True, range=[0, 100])))
save_figure(fig, '../outputs/facility_radar')
fig.show()

In [None]:
# AI Facility Comparison
try:
    comparison_insight = extractor.compare_facilities(summary.to_csv(index=False))
    print('=== AI FACILITY COMPARISON ===')
    print(comparison_insight)
except Exception as e:
    print(f'AI analysis unavailable: {e}')

## 6. Risk Assessment & Executive Summary

In [None]:
# AI Risk Assessment
try:
    risk_insight = extractor.assess_risks(
        anomalies.to_csv(index=False),
        processed['trends'].tail(100).to_csv(index=False),
    )
    print('=== AI RISK ASSESSMENT ===')
    print(risk_insight)
except Exception as e:
    print(f'AI analysis unavailable: {e}')

In [None]:
# AI Executive Summary
try:
    exec_summary = extractor.generate_executive_summary(
        summary.to_csv(index=False),
        processed['trends'].tail(200).to_csv(index=False),
        anomalies.to_csv(index=False),
    )
    print('=== AI EXECUTIVE SUMMARY ===')
    print(exec_summary)
except Exception as e:
    print(f'AI analysis unavailable: {e}')

## 7. Save All Outputs

In [None]:
output_dir = Path('../outputs')
output_dir.mkdir(exist_ok=True)

processed['summary'].to_csv(output_dir / 'summary_stats.csv', index=False)
processed['trends'].to_csv(output_dir / 'trends.csv', index=False)
processed['anomalies'].to_csv(output_dir / 'anomalies.csv', index=False)
processed['correlations'].to_csv(output_dir / 'correlations.csv', index=False)
strong.to_csv(output_dir / 'strong_correlations.csv', index=False)

print('All outputs saved to 02_insight_extraction/outputs/')
for f in sorted(output_dir.iterdir()):
    print(f'  {f.name}')

---

## Summary

| Insight Type | Statistical Input | AI Analysis |
|---|---|---|
| Trend Analysis | MoM change computation, volatility metrics | Pattern identification, action recommendations |
| Anomaly Detection | Z-score flagging (threshold = 2.0) | Business impact assessment, investigation priorities |
| Correlation Analysis | Pearson cross-metric correlations | Causal vs. spurious identification, operational levers |
| Facility Comparison | Normalized performance metrics | Rankings, best practice transfer recommendations |
| Risk Assessment | Combined anomaly + trend data | Risk ranking, early warning indicators, mitigation strategies |
| Executive Summary | Full dataset summary | Health assessment, wins/concerns, 30-day and quarterly actions |

**Next:** See [03_strategic_dashboard](../../03_strategic_dashboard/notebooks/) for the interactive dashboard.