# Narrative Gravity Wells - Exploratory Data AnalysisStudy: priority3_test

In [None]:
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom scipy import statsimport plotly.express as pximport plotly.graph_objects as gofrom plotly.subplots import make_subplotsimport warningswarnings.filterwarnings('ignore')# Set styleplt.style.use('seaborn-v0_8')sns.set_palette("husl")print("📊 Narrative Gravity Wells Analysis")print("=" * 50)

In [None]:
# Load datasetprint("Loading dataset...")data = pd.read_feather('../data/priority3_test.feather')print(f"✅ Loaded {len(data)} observations with {len(data.columns)} variables")print(f"Date range: {data['exp_date'].min()} to {data['exp_date'].max()}")print(f"Frameworks: {data['framework'].unique()}")print(f"LLM Models: {data['llm_model'].unique()}")

## Dataset Overview

In [None]:
# Dataset structureprint("\nDataset Info:")print(data.info())# Basic statisticsprint("\nDescriptive Statistics:")print(data.describe())# Missing data analysisprint("\nMissing Data Analysis:")missing = data.isnull().sum()missing_pct = (missing / len(data)) * 100missing_df = pd.DataFrame({    'Missing Count': missing,    'Missing %': missing_pct}).sort_values('Missing %', ascending=False)print(missing_df[missing_df['Missing Count'] > 0])

## Reliability Analysis

In [None]:
# Reliability by frameworkprint("\nReliability Analysis by Framework:")reliability_stats = data.groupby('framework')['cv'].agg([    'count', 'mean', 'std', 'min', 'max']).round(4)print(reliability_stats)# Reliability target analysistarget_cv = 0.20reliable_analyses = data[data['cv'] <= target_cv]reliability_rate = len(reliable_analyses) / len(data.dropna(subset=['cv'])) * 100print(f"\nReliability Rate (CV ≤ {target_cv}): {reliability_rate:.1f}%")# Framework reliability comparisonframework_reliability = data.groupby('framework').apply(    lambda x: (x['cv'] <= target_cv).mean() * 100).round(1)print("\nReliability Rate by Framework:")print(framework_reliability.sort_values(ascending=False))

## Framework Performance

In [None]:
# Framework performance comparisonprint("\nFramework Performance Analysis:")framework_stats = data.groupby('framework').agg({    'cv': ['mean', 'std', 'count'],    'icc': ['mean', 'std'],    'cost': ['mean', 'sum'],    'process_time_sec': ['mean', 'sum']}).round(4)# Flatten column namesframework_stats.columns = ['_'.join(col).strip() for col in framework_stats.columns]print(framework_stats)# Statistical significance testingfrom scipy import statsframeworks = data['framework'].unique()cv_by_framework = [data[data['framework'] == f]['cv'].dropna() for f in frameworks]if len(cv_by_framework) > 1 and all(len(group) > 1 for group in cv_by_framework):    f_stat, p_value = stats.f_oneway(*cv_by_framework)    print(f"\nFramework Effect on Reliability:")    print(f"F-statistic: {f_stat:.4f}, p-value: {p_value:.4f}")        if p_value < 0.05:        print("✅ Significant framework effect detected")    else:        print("❌ No significant framework effect")

## Visualization

In [None]:
# Comprehensive visualization suiteimport matplotlib.pyplot as pltimport seaborn as sns# Set up the plotting environmentplt.figure(figsize=(15, 12))# 1. Reliability by Frameworkplt.subplot(2, 3, 1)if 'cv' in data.columns and data['cv'].notna().any():    sns.boxplot(data=data, x='framework', y='cv')    plt.axhline(y=0.20, color='red', linestyle='--', alpha=0.7, label='Target Threshold')    plt.title('Reliability by Framework\n(Lower CV = Better)')    plt.xticks(rotation=45)    plt.legend()# 2. Model Performanceplt.subplot(2, 3, 2)if 'llm_model' in data.columns and 'cv' in data.columns:    sns.violinplot(data=data, x='llm_model', y='cv')    plt.title('Model Performance Comparison')    plt.xticks(rotation=45)# 3. Cost vs Processing Timeplt.subplot(2, 3, 3)if 'cost' in data.columns and 'process_time_sec' in data.columns:    sns.scatterplot(data=data, x='process_time_sec', y='cost', hue='llm_model', alpha=0.7)    plt.title('Cost vs Processing Time')    plt.xlabel('Processing Time (seconds)')    plt.ylabel('Cost (USD)')# 4. Framework Usage Over Timeplt.subplot(2, 3, 4)if 'exp_date' in data.columns:    data_time = data.copy()    data_time['month'] = pd.to_datetime(data_time['exp_date']).dt.to_period('M')    framework_timeline = data_time.groupby(['month', 'framework']).size().reset_index(name='count')        for framework in framework_timeline['framework'].unique():        fdata = framework_timeline[framework_timeline['framework'] == framework]        plt.plot(fdata['month'].astype(str), fdata['count'], marker='o', label=framework)        plt.title('Framework Usage Timeline')    plt.xticks(rotation=45)    plt.legend()# 5. Well Scores Distribution (if available)well_columns = [col for col in data.columns if col.startswith('well_')]if well_columns:    plt.subplot(2, 3, 5)    well_data = data[well_columns].melt()    sns.boxplot(data=well_data, x='variable', y='value')    plt.title('Well Scores Distribution')    plt.xticks(rotation=45)    plt.ylabel('Score')# 6. Reliability Improvement Over Timeplt.subplot(2, 3, 6)if 'exp_date' in data.columns and 'cv' in data.columns:    data_time = data.copy()    data_time['week'] = pd.to_datetime(data_time['exp_date']).dt.to_period('W')    weekly_cv = data_time.groupby('week')['cv'].mean()        plt.plot(weekly_cv.index.astype(str), weekly_cv.values, marker='o', linewidth=2)    plt.axhline(y=0.20, color='red', linestyle='--', alpha=0.7, label='Target Threshold')    plt.title('Reliability Improvement Over Time')    plt.xticks(rotation=45)    plt.ylabel('Mean CV')    plt.legend()plt.tight_layout()plt.savefig('comprehensive_analysis.png', dpi=300, bbox_inches='tight')plt.show()print("\n✅ Visualization suite completed!")print("📊 Saved: comprehensive_analysis.png")