In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')  
import os
import json
import re
from datetime import datetime

# Configure paths
DATA_FOLDER = 'data'
OUTPUT_FOLDER = 'data/outputs'
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Initialize results structure
results = {
    'status': 'processing',
    'timestamp': datetime.now().isoformat(),
    'errors': [],
    'warnings': [],
    'ground_truth': {},
    'markers': {},
    'analysis': {},
    'plots': []
}

from analysis_utils import (
    prepare_event_markers_timestamps,
    find_timestamp_offset,
    extract_window_data
)

In [7]:
results['status'] = 'completed' if len(results['errors']) == 0 else 'completed_with_errors'
results_path = os.path.join(OUTPUT_FOLDER, 'results.json')

with open(results_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Analysis complete! Results saved to {results_path}")
print(f"Status: {results['status']}")
print(f"Plots generated: {len(results['plots'])}")



✓ Analysis complete! Results saved to data/outputs/results.json
Status: completed
Plots generated: 3


In [None]:
print("\n1. LOADING CONFIGURATION")
print("-" * 80)

# Find the most recent subject folder
subject_folders = []
for item in os.listdir(DATA_FOLDER):
    item_path = os.path.join(DATA_FOLDER, item)
    if os.path.isdir(item_path) and item not in ['outputs', 'test_temp']:
        manifest_path = os.path.join(item_path, 'file_manifest.json')
        if os.path.exists(manifest_path):
            mtime = os.path.getmtime(item_path)
            subject_folders.append((item, item_path, mtime))

if not subject_folders:
    error_msg = "No subject folder with manifest found"
    print(f"ERROR: {error_msg}")
    results['errors'].append(error_msg)
    results['status'] = 'failed'
else:
    # Sort by modification time and get the most recent
    subject_folders.sort(key=lambda x: x[2], reverse=True)
    folder_name, subject_folder, _ = subject_folders[0]
    
    print(f"Using subject folder: {folder_name}")
    
    # Load manifest
    manifest_path = os.path.join(subject_folder, 'file_manifest.json')
    with open(manifest_path, 'r') as f:
        manifest = json.load(f)
    
    print(f"✓ Manifest loaded")
    print(f"  EmotiBit files: {len(manifest.get('emotibit_files', []))}")
    print(f"  Event markers: {'Yes' if manifest.get('event_markers') else 'No'}")
    
    # Get analysis configuration
    analysis_config = manifest.get('analysis_config', {})
    selected_metrics = analysis_config.get('selected_metrics', [])
    baseline_window = analysis_config.get('baseline_window', {})
    task_window = analysis_config.get('task_window', {})
    
    print(f"  Selected metrics: {selected_metrics}")
    print(f"  Baseline window: {baseline_window.get('eventMarker', 'Not configured')}")
    print(f"  Task window: {task_window.get('eventMarker', 'Not configured')}")
    
    if not selected_metrics or not baseline_window.get('eventMarker') or not task_window.get('eventMarker'):
        results['warnings'].append('Analysis configuration incomplete - using defaults')
        print("  ⚠ Warning: Analysis configuration incomplete")

# ============================================================================
# LOAD EVENT MARKERS
# ============================================================================

print("\n2. LOADING EVENT MARKERS")
print("-" * 80)

try:
    if manifest.get('event_markers'):
        event_markers_path = manifest['event_markers']['path']
        print(f"Loading from: {event_markers_path}")
        
        df_markers = pd.read_csv(event_markers_path)
        print(f"✓ Loaded {df_markers.shape[0]} rows")
        print(f"  Columns: {df_markers.columns.tolist()}")
        
        # Prepare timestamps
        df_markers = prepare_event_markers_timestamps(df_markers)
        
        # Store results
        results['markers'] = {
            'shape': df_markers.shape,
            'columns': list(df_markers.columns),
            'head': df_markers.head(10).to_dict('records')
        }
        
        if 'condition' in df_markers.columns:
            results['markers']['conditions'] = df_markers['condition'].value_counts().to_dict()
        
    else:
        raise FileNotFoundError("No event markers file in manifest")
        
except Exception as e:
    error_msg = f"Error loading event markers: {str(e)}"
    print(f"ERROR: {error_msg}")
    results['errors'].append(error_msg)
    df_markers = None

In [None]:
if df_markers is not None and selected_metrics:
    
    print("\n3. ANALYZING SELECTED METRICS")
    print("-" * 80)
    
    for metric in selected_metrics:
        print(f"\nAnalyzing metric: {metric}")
        print("-" * 40)
        
        try:
            # Find the metric file
            metric_file = None
            for emotibit_file in manifest['emotibit_files']:
                if f'_{metric}.csv' in emotibit_file['filename']:
                    metric_file = emotibit_file['path']
                    break
            
            if not metric_file:
                print(f"  ⚠ Warning: File for metric {metric} not found - skipping")
                continue
            
            print(f"  Loading: {os.path.basename(metric_file)}")
            df_metric = pd.read_csv(metric_file)
            print(f"  ✓ Loaded {df_metric.shape[0]} rows")
            
            # Calculate offset
            print(f"  Calculating timestamp offset...")
            offset = find_timestamp_offset(df_markers, df_metric)
            
            # Extract baseline window data
            print(f"\n  Extracting BASELINE window data...")
            baseline_data = extract_window_data(df_metric, df_markers, offset, baseline_window)
            
            # Extract task window data
            print(f"\n  Extracting TASK window data...")
            task_data = extract_window_data(df_metric, df_markers, offset, task_window)
            
            if len(baseline_data) == 0 or len(task_data) == 0:
                print(f"  ⚠ Warning: Insufficient data for comparison - skipping {metric}")
                continue
            
            # Get metric column (last column)
            metric_col = df_metric.columns[-1]
            
            # Calculate statistics
            baseline_values = baseline_data[metric_col].dropna()
            task_values = task_data[metric_col].dropna()
            
            baseline_stats = {
                'mean': float(baseline_values.mean()),
                'std': float(baseline_values.std()),
                'min': float(baseline_values.min()),
                'max': float(baseline_values.max()),
                'count': int(len(baseline_values))
            }
            
            task_stats = {
                'mean': float(task_values.mean()),
                'std': float(task_values.std()),
                'min': float(task_values.min()),
                'max': float(task_values.max()),
                'count': int(len(task_values))
            }
            
            print(f"\n  Baseline {metric}: mean={baseline_stats['mean']:.2f}, std={baseline_stats['std']:.2f}, n={baseline_stats['count']}")
            print(f"  Task {metric}: mean={task_stats['mean']:.2f}, std={task_stats['std']:.2f}, n={task_stats['count']}")
            
            # Store analysis results
            results['analysis'][metric] = {
                'baseline': baseline_stats,
                'task': task_stats,
                'difference': task_stats['mean'] - baseline_stats['mean'],
                'percent_change': ((task_stats['mean'] - baseline_stats['mean']) / baseline_stats['mean'] * 100) if baseline_stats['mean'] != 0 else 0
            }
            
            # ================================================================
            # CREATE VISUALIZATIONS
            # ================================================================
            
            print(f"\n  Creating visualizations...")
            
            # Plot 1: Comparison bar plot
            fig, ax = plt.subplots(figsize=(10, 6))
            
            categories = ['Baseline', 'Task']
            means = [baseline_stats['mean'], task_stats['mean']]
            stds = [baseline_stats['std'], task_stats['std']]
            
            bars = ax.bar(categories, means, yerr=stds, capsize=10, 
                         color=['#4CAF50', '#2196F3'], alpha=0.7, edgecolor='black')
            
            ax.set_ylabel(f'{metric} Value', fontsize=12)
            ax.set_title(f'{metric}: Baseline vs Task Comparison', fontsize=14, fontweight='bold')
            ax.grid(True, alpha=0.3, axis='y', linestyle='--')
            
            # Add value labels on bars
            for i, (mean, std) in enumerate(zip(means, stds)):
                ax.text(i, mean + std + 0.05 * max(means), f'{mean:.2f}±{std:.2f}',
                       ha='center', va='bottom', fontsize=10, fontweight='bold')
            
            plt.tight_layout()
            plot1_path = os.path.join(OUTPUT_FOLDER, f'{metric}_comparison.png')
            plt.savefig(plot1_path, dpi=100, bbox_inches='tight')
            plt.close()
            
            results['plots'].append({
                'name': f'{metric} Comparison',
                'path': plot1_path,
                'filename': f'{metric}_comparison.png'
            })
            print(f"    ✓ Saved: {metric}_comparison.png")
            
            # Plot 2: Time series overlay
            fig, ax = plt.subplots(figsize=(14, 6))
            
            # Plot baseline data
            baseline_times = np.arange(len(baseline_values))
            ax.plot(baseline_times, baseline_values, linewidth=0.8, 
                   color='#4CAF50', alpha=0.7, label='Baseline')
            
            # Plot task data (offset on x-axis for clarity)
            task_times = np.arange(len(task_values)) + len(baseline_values) + 100
            ax.plot(task_times, task_values, linewidth=0.8, 
                   color='#2196F3', alpha=0.7, label='Task')
            
            # Add vertical separator
            separator_x = len(baseline_values) + 50
            ax.axvline(x=separator_x, color='red', linestyle='--', linewidth=2, alpha=0.5)
            
            ax.set_xlabel('Sample Index', fontsize=12)
            ax.set_ylabel(f'{metric} Value', fontsize=12)
            ax.set_title(f'{metric} Time Series: Baseline vs Task', fontsize=14, fontweight='bold')
            ax.legend(fontsize=10)
            ax.grid(True, alpha=0.3, linestyle='--')
            plt.tight_layout()
            
            plot2_path = os.path.join(OUTPUT_FOLDER, f'{metric}_timeseries.png')
            plt.savefig(plot2_path, dpi=100, bbox_inches='tight')
            plt.close()
            
            results['plots'].append({
                'name': f'{metric} Time Series',
                'path': plot2_path,
                'filename': f'{metric}_timeseries.png'
            })
            print(f"    ✓ Saved: {metric}_timeseries.png")
            
            # Plot 3: Distribution comparison (box plots)
            fig, ax = plt.subplots(figsize=(10, 6))
            
            data_to_plot = [baseline_values, task_values]
            bp = ax.boxplot(data_to_plot, labels=categories, patch_artist=True,
                           showmeans=True, meanline=True)
            
            # Color the boxes
            colors = ['#4CAF50', '#2196F3']
            for patch, color in zip(bp['boxes'], colors):
                patch.set_facecolor(color)
                patch.set_alpha(0.7)
            
            ax.set_ylabel(f'{metric} Value', fontsize=12)
            ax.set_title(f'{metric} Distribution: Baseline vs Task', fontsize=14, fontweight='bold')
            ax.grid(True, alpha=0.3, axis='y', linestyle='--')
            plt.tight_layout()
            
            plot3_path = os.path.join(OUTPUT_FOLDER, f'{metric}_distribution.png')
            plt.savefig(plot3_path, dpi=100, bbox_inches='tight')
            plt.close()
            
            results['plots'].append({
                'name': f'{metric} Distribution',
                'path': plot3_path,
                'filename': f'{metric}_distribution.png'
            })
            print(f"    ✓ Saved: {metric}_distribution.png")
            
        except Exception as e:
            error_msg = f"Error analyzing {metric}: {str(e)}"
            print(f"  ERROR: {error_msg}")
            results['errors'].append(error_msg)
            import traceback
            traceback.print_exc()

else:
    print("\n⚠ Skipping analysis - no event markers or metrics selected")

In [None]:
print("\n4. SAVING RESULTS")
print("-" * 80)

results['status'] = 'completed' if len(results['errors']) == 0 else 'completed_with_errors'
results_path = os.path.join(OUTPUT_FOLDER, 'results.json')

with open(results_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"✓ Analysis complete!")
print(f"  Status: {results['status']}")
print(f"  Plots generated: {len(results['plots'])}")
print(f"  Metrics analyzed: {len(results.get('analysis', {}))}")
if results['errors']:
    print(f"  Errors: {len(results['errors'])}")
if results['warnings']:
    print(f"  Warnings: {len(results['warnings'])}")

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)