In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display
import numpy as np

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 10)
%matplotlib inline

## 1. Load Data

In [None]:
# Load all data files
hierarchy = pd.read_csv("../data/processed/cord_hierarchy.csv")
numeric_values = pd.read_csv("../data/processed/cord_numeric_values.csv")
clusters = pd.read_csv("../data/processed/cluster_assignments_kmeans.csv")
features = pd.read_csv("../data/processed/graph_structural_features.csv")
summation = pd.read_csv("../data/processed/summation_test_results.csv")

# Get list of khipus
khipu_list = sorted(hierarchy['KHIPU_ID'].unique())
print(f"✓ Loaded data for {len(khipu_list)} khipus")

## 2. Interactive Khipu Selector

In [None]:
# Khipu selector widget
khipu_dropdown = widgets.Dropdown(
    options=khipu_list,
    value=khipu_list[0],
    description='Khipu ID:',
    style={'description_width': 'initial'}
)

def view_khipu(khipu_id):
    """Display comprehensive khipu analysis."""
    
    # Get khipu data
    khipu_cords = hierarchy[hierarchy['KHIPU_ID'] == khipu_id].copy()
    khipu_values = numeric_values[numeric_values['khipu_id'] == khipu_id].copy()
    
    # Merge with values
    khipu_data = khipu_cords.merge(
        khipu_values[['cord_id', 'numeric_value']], 
        left_on='CORD_ID', 
        right_on='cord_id', 
        how='left'
    )
    
    # Get cluster and features
    cluster_info = clusters[clusters['khipu_id'] == khipu_id]
    feature_info = features[features['khipu_id'] == khipu_id]
    summation_info = summation[summation['khipu_id'] == khipu_id]
    
    if len(cluster_info) == 0:
        print(f"No data found for khipu {khipu_id}")
        return
    
    cluster_id = cluster_info.iloc[0]['cluster']
    
    # === SECTION 1: Overview ===
    print("="*80)
    print(f"KHIPU {khipu_id} - DETAILED ANALYSIS")
    print("="*80)
    print(f"\nCluster: {cluster_id}")
    
    if len(khipu_data) > 0 and 'PROVENANCE' in khipu_data.columns:
        prov = khipu_data.iloc[0]['PROVENANCE']
        print(f"Provenance: {prov if pd.notna(prov) else 'Unknown'}")
    
    print(f"\nTotal Cords: {len(khipu_data)}")
    
    # Level distribution
    level_counts = khipu_data['CORD_LEVEL'].value_counts().sort_index()
    print("\nHierarchy Levels:")
    for level, count in level_counts.items():
        print(f"  Level {level}: {count} cords")
    
    # === SECTION 2: Numeric Analysis ===
    print("\n" + "-"*80)
    print("NUMERIC VALUES")
    print("-"*80)
    
    cords_with_values = khipu_data['numeric_value'].notna().sum()
    print(f"Cords with numeric values: {cords_with_values}/{len(khipu_data)} ({cords_with_values/len(khipu_data)*100:.1f}%)")
    
    if cords_with_values > 0:
        values = khipu_data['numeric_value'].dropna()
        print(f"\nValue range: {values.min():.0f} - {values.max():.0f}")
        print(f"Mean: {values.mean():.1f}, Median: {values.median():.1f}")
        print(f"Total sum: {values.sum():.0f}")
    
    # === SECTION 3: Summation Testing ===
    if len(summation_info) > 0:
        print("\n" + "-"*80)
        print("SUMMATION PATTERNS")
        print("-"*80)
        
        has_sum = summation_info.iloc[0]['has_pendant_summation']
        match_rate = summation_info.iloc[0]['pendant_match_rate']
        
        print(f"Has summation pattern: {'YES' if has_sum else 'NO'}")
        print(f"Match rate: {match_rate:.1%}")
        
        if 'num_white_boundaries' in summation_info.columns:
            white_bounds = summation_info.iloc[0]['num_white_boundaries']
            print(f"White boundary markers: {white_bounds}")
    
    # === SECTION 4: Structural Features ===
    if len(feature_info) > 0:
        print("\n" + "-"*80)
        print("STRUCTURAL FEATURES")
        print("-"*80)
        
        feat = feature_info.iloc[0]
        print(f"Size (nodes): {feat['num_nodes']:.0f}")
        print(f"Depth: {feat['depth']:.0f} levels")
        print(f"Average branching factor: {feat['avg_branching']:.2f}")
        print(f"Numeric coverage: {feat['has_numeric']*100:.1f}%")
        
        if 'density' in feat.index:
            print(f"Graph density: {feat['density']:.3f}")
    
    # === SECTION 5: Visualizations ===
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle(f'Khipu {khipu_id} - Visual Analysis', fontsize=16, fontweight='bold')
    
    # Plot 1: Hierarchy structure
    ax1 = axes[0, 0]
    if len(level_counts) > 0:
        ax1.bar(level_counts.index, level_counts.values, color='steelblue', alpha=0.7)
        ax1.set_xlabel('Hierarchy Level', fontsize=12)
        ax1.set_ylabel('Number of Cords', fontsize=12)
        ax1.set_title('Cord Distribution by Level', fontsize=13, fontweight='bold')
        ax1.grid(True, alpha=0.3)
    
    # Plot 2: Numeric values distribution
    ax2 = axes[0, 1]
    if cords_with_values > 0:
        values_to_plot = khipu_data['numeric_value'].dropna()
        ax2.hist(values_to_plot, bins=20, color='coral', alpha=0.7, edgecolor='black')
        ax2.set_xlabel('Numeric Value', fontsize=12)
        ax2.set_ylabel('Frequency', fontsize=12)
        ax2.set_title('Value Distribution', fontsize=13, fontweight='bold')
        ax2.grid(True, alpha=0.3)
    else:
        ax2.text(0.5, 0.5, 'No numeric values', ha='center', va='center', fontsize=14)
        ax2.set_xlim(0, 1)
        ax2.set_ylim(0, 1)
    
    # Plot 3: Cord ordinal vs value
    ax3 = axes[1, 0]
    valued_cords = khipu_data[khipu_data['numeric_value'].notna()].copy()
    if len(valued_cords) > 0:
        colors_by_level = {1: 'blue', 2: 'green', 3: 'orange', 4: 'red'}
        for level in valued_cords['CORD_LEVEL'].unique():
            level_data = valued_cords[valued_cords['CORD_LEVEL'] == level]
            ax3.scatter(level_data['CORD_ORDINAL'], level_data['numeric_value'], 
                       label=f'Level {level}', 
                       color=colors_by_level.get(level, 'gray'),
                       alpha=0.6, s=50)
        ax3.set_xlabel('Cord Position (Ordinal)', fontsize=12)
        ax3.set_ylabel('Numeric Value', fontsize=12)
        ax3.set_title('Values by Position', fontsize=13, fontweight='bold')
        ax3.legend()
        ax3.grid(True, alpha=0.3)
    else:
        ax3.text(0.5, 0.5, 'No positioned values', ha='center', va='center', fontsize=14)
        ax3.set_xlim(0, 1)
        ax3.set_ylim(0, 1)
    
    # Plot 4: Comparison to cluster centroid
    ax4 = axes[1, 1]
    if len(feature_info) > 0 and len(features) > 0:
        cluster_features = features[features['khipu_id'].isin(
            clusters[clusters['cluster'] == cluster_id]['khipu_id']
        )]
        
        comparison_features = ['num_nodes', 'depth', 'avg_branching', 'has_numeric']
        khipu_vals = [feat[f] for f in comparison_features if f in feat.index]
        cluster_means = [cluster_features[f].mean() for f in comparison_features if f in cluster_features.columns]
        
        if len(khipu_vals) == len(cluster_means):
            x = np.arange(len(comparison_features))
            width = 0.35
            
            ax4.bar(x - width/2, khipu_vals, width, label='This Khipu', color='steelblue', alpha=0.7)
            ax4.bar(x + width/2, cluster_means, width, label=f'Cluster {cluster_id} Mean', color='coral', alpha=0.7)
            
            ax4.set_ylabel('Value', fontsize=12)
            ax4.set_title('Comparison to Cluster Centroid', fontsize=13, fontweight='bold')
            ax4.set_xticks(x)
            ax4.set_xticklabels(['Size', 'Depth', 'Branching', 'Numeric %'], rotation=45, ha='right')
            ax4.legend()
            ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # === SECTION 6: Cord Details Table ===
    print("\n" + "-"*80)
    print("CORD DETAILS (First 20)")
    print("-"*80)
    
    display_cols = ['CORD_ID', 'CORD_LEVEL', 'CORD_ORDINAL', 'numeric_value', 'PENDANT_FROM']
    available_cols = [c for c in display_cols if c in khipu_data.columns]
    display(khipu_data[available_cols].head(20))

# Create interactive output
output = widgets.interactive_output(view_khipu, {'khipu_id': khipu_dropdown})

display(khipu_dropdown)
display(output)

## 3. Export Khipu Report

In [None]:
def export_khipu_report(khipu_id, filename=None):
    """Export detailed khipu report to CSV."""
    if filename is None:
        filename = f"khipu_{khipu_id}_report.csv"
    
    # Get all data for this khipu
    khipu_cords = hierarchy[hierarchy['KHIPU_ID'] == khipu_id].copy()
    khipu_values = numeric_values[numeric_values['khipu_id'] == khipu_id].copy()
    
    report = khipu_cords.merge(
        khipu_values[['cord_id', 'numeric_value']], 
        left_on='CORD_ID', 
        right_on='cord_id', 
        how='left'
    )
    
    output_path = f"../data/processed/{filename}"
    report.to_csv(output_path, index=False)
    print(f"✓ Exported khipu {khipu_id} report to {output_path}")
    print(f"  {len(report)} cords exported")

# Example usage:
# export_khipu_report(1000000)