In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')  
import os
import json
import re
from datetime import datetime

from analysis_utils import (
    prepare_event_markers_timestamps,
    find_timestamp_offset,
    extract_window_data
)

DATA_FOLDER = 'data'
OUTPUT_FOLDER = 'data/outputs'
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

results = {
    'status': 'processing',
    'timestamp': datetime.now().isoformat(),
    'errors': [],
    'warnings': [],
    'markers': {},
    'analysis': {},
    'plots': []
}

In [2]:
results['status'] = 'completed' if len(results['errors']) == 0 else 'completed_with_errors'
results_path = os.path.join(OUTPUT_FOLDER, 'results.json')

with open(results_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Analysis complete! Results saved to {results_path}")
print(f"Status: {results['status']}")
print(f"Plots generated: {len(results['plots'])}")



✓ Analysis complete! Results saved to data/outputs/results.json
Status: completed
Plots generated: 0


In [3]:
print("\n1. LOADING CONFIGURATION")
print("-" * 80)

# Find the most recent subject folder
subject_folders = []
for item in os.listdir(DATA_FOLDER):
    item_path = os.path.join(DATA_FOLDER, item)
    if os.path.isdir(item_path) and item not in ['outputs', 'test_temp']:
        manifest_path = os.path.join(item_path, 'file_manifest.json')
        if os.path.exists(manifest_path):
            mtime = os.path.getmtime(item_path)
            subject_folders.append((item, item_path, mtime))

if not subject_folders:
    error_msg = "No subject folder with manifest found"
    print(f"ERROR: {error_msg}")
    results['errors'].append(error_msg)
    results['status'] = 'failed'
else:
    # Sort by modification time and get the most recent
    subject_folders.sort(key=lambda x: x[2], reverse=True)
    folder_name, subject_folder, _ = subject_folders[0]
    
    print(f"Using subject folder: {folder_name}")

    manifest_path = os.path.join(subject_folder, 'file_manifest.json')
    with open(manifest_path, 'r') as f:
        manifest = json.load(f)
    
    print(f"✓ Manifest loaded")
    print(f"  EmotiBit files: {len(manifest.get('emotibit_files', []))}")
    print(f"  Event markers: {'Yes' if manifest.get('event_markers') else 'No'}")
    
    analysis_config = manifest.get('analysis_config', {})
    selected_metrics = analysis_config.get('selected_metrics', [])
    comparison_groups = analysis_config.get('comparison_groups', [])
    
    print(f"  Selected metrics: {selected_metrics}")
    print(f"  Comparison groups: {len(comparison_groups)}")
    
    if len(comparison_groups) < 2:
        results['warnings'].append('Need at least 2 comparison groups')
        print("  ⚠ Warning: Need at least 2 comparison groups")

print("\n2. LOADING EVENT MARKERS")
print("-" * 80)

try:
    if manifest.get('event_markers'):
        event_markers_path = manifest['event_markers']['path']
        print(f"Loading from: {event_markers_path}")
        
        df_markers = pd.read_csv(event_markers_path)
        print(f"✓ Loaded {df_markers.shape[0]} rows")
        print(f"  Columns: {df_markers.columns.tolist()}")
        
        df_markers = prepare_event_markers_timestamps(df_markers)
        
        results['markers'] = {
            'shape': df_markers.shape,
            'columns': list(df_markers.columns),
            'head': df_markers.head(10).replace({np.nan: None}).to_dict('records')
        }
        
        if 'condition' in df_markers.columns:
            results['markers']['conditions'] = df_markers['condition'].value_counts().to_dict()
        
    else:
        raise FileNotFoundError("No event markers file in manifest")
        
except Exception as e:
    error_msg = f"Error loading event markers: {str(e)}"
    print(f"ERROR: {error_msg}")
    results['errors'].append(error_msg)
    df_markers = None


1. LOADING CONFIGURATION
--------------------------------------------------------------------------------
Using subject folder: G3_1.4.2_2025-05-22T204215.885316_eGFub25vdmFhcmlhQHlhaG9vLmNvbQ
✓ Manifest loaded
  EmotiBit files: 1
  Event markers: Yes
  Selected metrics: ['HR']
  Comparison groups: 2

2. LOADING EVENT MARKERS
--------------------------------------------------------------------------------
Loading from: data/G3_1.4.2_2025-05-22T204215.885316_eGFub25vdmFhcmlhQHlhaG9vLmNvbQ/2025-05-23_eGFub25vdmFhcmlhQHlhaG9vLmNvbQ==_event_markers.csv
✓ Loaded 101466 rows
  Columns: ['timestamp', 'EDA', 'HR', 'BI', 'PG', 'event_marker', 'condition']
  ✓ Found 'timestamp' column (ISO format) - converting to unix_timestamp
  ⚠ Dropped 607 rows with invalid timestamps
  ✓ Converted 100859 timestamps from ISO to Unix format


In [4]:
if df_markers is not None and selected_metrics:
    
    print("\n3. ANALYZING SELECTED METRICS")
    print("-" * 80)
    
    for metric in selected_metrics:
        print(f"\nAnalyzing metric: {metric}")
        print("-" * 40)
        
        try:
            # Find and load metric file
            metric_file = None
            for emotibit_file in manifest['emotibit_files']:
                if f'_{metric}.csv' in emotibit_file['filename']:
                    metric_file = emotibit_file['path']
                    break
            
            if not metric_file:
                print(f"  ⚠ Warning: File for metric {metric} not found - skipping")
                continue
            
            print(f"  Loading: {os.path.basename(metric_file)}")
            df_metric = pd.read_csv(metric_file)
            print(f"  ✓ Loaded {df_metric.shape[0]} rows")
            
            # Calculate offset
            print(f"  Calculating timestamp offset...")
            offset = find_timestamp_offset(df_markers, df_metric)
            
            # Extract data for each comparison group
            group_data = {}
            
            for group in comparison_groups:
                group_label = group['label']
                print(f"\n  Extracting data for '{group_label}'...")
                
                data = extract_window_data(df_metric, df_markers, offset, group)
                
                if len(data) == 0:
                    print(f"  ⚠ Warning: No data for group '{group_label}' - skipping")
                    continue
                
                group_data[group_label] = data
            
            # Calculate statistics for each group
            if len(group_data) < 2:
                print(f"  ⚠ Warning: Need at least 2 groups with data - skipping {metric}")
                continue
            
            metric_col = df_metric.columns[-1]
            metric_results = {}
            
            for group_label, data in group_data.items():
                values = data[metric_col].dropna()
                
                stats = {
                    'mean': float(values.mean()),
                    'std': float(values.std()),
                    'min': float(values.min()),
                    'max': float(values.max()),
                    'count': int(len(values))
                }
                
                metric_results[group_label] = stats
                print(f"\n  {group_label}: mean={stats['mean']:.2f}, std={stats['std']:.2f}, n={stats['count']}")
            
            # Store results
            results['analysis'][metric] = metric_results
            
            # ================================================================
            # CREATE VISUALIZATIONS
            # ================================================================
            
            print(f"\n  Creating visualizations...")
            
            # Define color palette for groups (up to 10 groups)
            colors = ['#4CAF50', '#2196F3', '#FF9800', '#9C27B0', '#F44336', 
                      '#00BCD4', '#FFEB3B', '#795548', '#607D8B', '#E91E63']
            
            group_labels = list(metric_results.keys())
            
            # Plot 1: Individual time series for each comparison group
            num_groups = len(group_data)
            fig, axes = plt.subplots(num_groups, 1, figsize=(14, 4 * num_groups), squeeze=False)

            for idx, (group_label, data) in enumerate(group_data.items()):
                ax = axes[idx, 0]
                values = data[metric_col].dropna()
                
                # Convert to elapsed time in seconds from start of this specific event
                timestamps = data['AdjustedTimestamp'].values
                start_time = timestamps.min()
                elapsed_seconds = timestamps - start_time
                
                color = colors[idx % len(colors)]
                
                # Plot line and scatter
                ax.plot(elapsed_seconds, values, color=color, linewidth=1.5, alpha=0.8)
                ax.scatter(elapsed_seconds, values, color=color, s=12, alpha=0.6)
                
                # Add mean line
                mean_val = values.mean()
                ax.axhline(y=mean_val, color='red', linestyle='--', alpha=0.5, 
                        label=f'Mean: {mean_val:.2f}')
                
                # Statistics text box
                stats_text = f'Mean: {values.mean():.2f}\nStd: {values.std():.2f}\nn: {len(values)}'
                ax.text(0.98, 0.97, stats_text, transform=ax.transAxes,
                    verticalalignment='top', horizontalalignment='right',
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
                    fontsize=9, family='monospace')
                
                ax.set_xlabel('Elapsed Time (seconds)', fontsize=11)
                ax.set_ylabel(f'{metric} Value', fontsize=11)
                ax.set_title(f'{group_label}', fontsize=12, fontweight='bold', color=color)
                ax.grid(True, alpha=0.3, linestyle='--')
                ax.legend(loc='upper left', fontsize=9)

            plt.tight_layout()
            plot1_path = os.path.join(OUTPUT_FOLDER, f'{metric}_individual_timeseries.png')
            plt.savefig(plot1_path, dpi=100, bbox_inches='tight')
            plt.close()

            results['plots'].append({
                'name': f'{metric} Individual Time Series',
                'path': plot1_path,
                'filename': f'{metric}_individual_timeseries.png',
                'url': f'/api/plot/{metric}_individual_timeseries.png'
            })
            print(f"    ✓ Saved: {metric}_individual_timeseries.png")
            
            # Plot 2: Time series - chronological progression
            fig, ax = plt.subplots(figsize=(16, 6))

            # Combine all group data in chronological order with event timestamps
            all_time_series_data = []

            for group_label, data in group_data.items():
                values = data[metric_col].dropna()
                timestamps = data['AdjustedTimestamp'].values
                
                for i, (ts, val) in enumerate(zip(timestamps, values)):
                    all_time_series_data.append({
                        'timestamp': ts,
                        'value': val,
                        'group': group_label
                    })

            # Convert to DataFrame and sort by timestamp
            ts_df = pd.DataFrame(all_time_series_data)
            ts_df = ts_df.sort_values('timestamp').reset_index(drop=True)

            # Convert to elapsed minutes from start
            start_time = ts_df['timestamp'].min()
            ts_df['elapsed_minutes'] = (ts_df['timestamp'] - start_time) / 60

            # Plot the continuous time series with color coding by group
            for idx, group_label in enumerate(group_labels):
                group_segment = ts_df[ts_df['group'] == group_label]
                if len(group_segment) > 0:
                    color = colors[idx % len(colors)]
                    ax.plot(group_segment['elapsed_minutes'], group_segment['value'],
                        color=color, linewidth=1.5, alpha=0.8, label=group_label)
                    ax.scatter(group_segment['elapsed_minutes'], group_segment['value'],
                            color=color, s=8, alpha=0.6)

            # Add vertical lines and labels to mark event boundaries
            event_boundaries = []
            for group_label in group_labels:
                group_segment = ts_df[ts_df['group'] == group_label]
                if len(group_segment) > 0:
                    start_min = group_segment['elapsed_minutes'].min()
                    end_min = group_segment['elapsed_minutes'].max()
                    event_boundaries.append((group_label, start_min, end_min))

            # Draw boundary lines and labels
            y_min, y_max = ax.get_ylim()
            for group_label, start_min, end_min in event_boundaries:
                # Vertical lines at start and end
                ax.axvline(x=start_min, color='black', linestyle='--', alpha=0.3, linewidth=1)
                ax.axvline(x=end_min, color='black', linestyle='--', alpha=0.3, linewidth=1)
                
                # Label in the middle of the section
                mid_min = (start_min + end_min) / 2
                ax.text(mid_min, y_max * 0.97, group_label, 
                    ha='center', va='top', fontweight='bold', fontsize=10,
                    bbox=dict(boxstyle="round,pad=0.4", facecolor="white", alpha=0.9, edgecolor='gray'))

            # Format x-axis for elapsed time
            max_minutes = ts_df['elapsed_minutes'].max()
            if max_minutes <= 5:
                tick_interval = 0.5
            elif max_minutes <= 15:
                tick_interval = 1
            elif max_minutes <= 60:
                tick_interval = 2
            else:
                tick_interval = 5

            tick_positions = np.arange(0, max_minutes + tick_interval, tick_interval)
            ax.set_xticks(tick_positions)
            ax.set_xticklabels([f'{t:.1f}' if t % 1 != 0 else str(int(t)) for t in tick_positions])

            ax.set_xlabel('Elapsed Time (minutes)', fontsize=12)
            ax.set_ylabel(f'{metric} Value (bpm)', fontsize=12)
            ax.set_title(f'{metric} Time Series: Chronological Progression', fontsize=14, fontweight='bold')
            ax.legend(fontsize=10, loc='best')
            ax.grid(True, alpha=0.3, linestyle='--')
            plt.tight_layout()

            plot2_path = os.path.join(OUTPUT_FOLDER, f'{metric}_timeseries.png')
            plt.savefig(plot2_path, dpi=100, bbox_inches='tight')
            plt.close()

            results['plots'].append({
                'name': f'{metric} Time Series',
                'path': plot2_path,
                'filename': f'{metric}_timeseries.png',
                'url': f'/api/plot/{metric}_timeseries.png'
            })
            print(f"    ✓ Saved: {metric}_timeseries.png")
            
            # Plot 3: Comparison summary (bar chart with stats)
            fig, ax = plt.subplots(figsize=(max(10, len(group_data) * 2), 6))

            group_labels = list(metric_results.keys())
            means = [metric_results[label]['mean'] for label in group_labels]
            stds = [metric_results[label]['std'] for label in group_labels]

            x_pos = np.arange(len(group_labels))
            bars = ax.bar(x_pos, means, yerr=stds, capsize=10, 
                        color=[colors[i % len(colors)] for i in range(len(group_labels))], 
                        alpha=0.7, edgecolor='black', linewidth=1.5)

            ax.set_xticks(x_pos)
            ax.set_xticklabels(group_labels, rotation=45, ha='right')
            ax.set_ylabel(f'{metric} Value', fontsize=12)
            ax.set_title(f'{metric}: Statistical Comparison', fontsize=14, fontweight='bold')
            ax.grid(True, alpha=0.3, axis='y', linestyle='--')

            # Add value labels on bars
            for i, (mean, std) in enumerate(zip(means, stds)):
                ax.text(i, mean + std + 0.05 * max(means), f'{mean:.2f}±{std:.2f}',
                    ha='center', va='bottom', fontsize=9, fontweight='bold')

            plt.tight_layout()
            plot3_path = os.path.join(OUTPUT_FOLDER, f'{metric}_comparison.png')
            plt.savefig(plot3_path, dpi=100, bbox_inches='tight')
            plt.close()

            results['plots'].append({
                'name': f'{metric} Statistical Comparison',
                'path': plot3_path,
                'filename': f'{metric}_comparison.png',
                'url': f'/api/plot/{metric}_comparison.png'
            })
            print(f"    ✓ Saved: {metric}_comparison.png")
            
        except Exception as e:
            error_msg = f"Error analyzing {metric}: {str(e)}"
            print(f"  ERROR: {error_msg}")
            results['errors'].append(error_msg)
            import traceback
            traceback.print_exc()

else:
    print("\n⚠ Skipping analysis - no event markers or metrics selected")


3. ANALYZING SELECTED METRICS
--------------------------------------------------------------------------------

Analyzing metric: HR
----------------------------------------
  Loading: 2025-05-22_20-42-36_eGFub25vdmFhcmlhQHlhaG9vLmNvbQ==_emotibit_ground_truth_HR.csv
  ✓ Loaded 5563 rows
  Calculating timestamp offset...
  Event Marker Start: 2025-05-22 13:42:36.738613
  EmotiBit Start: 2025-05-22 20:33:59.566568
  Calculated Offset: -24682.83s (-6.86 hours)

  Extracting data for 'Baseline'...
  Found 2573 occurrences of 'biometric_baseline'
  Extracted 142 data points across all occurrences

  Extracting data for 'SART 1'...
  Found 7578 occurrences of 'sart_1'
  Extracted 396 data points across all occurrences

  Baseline: mean=72.96, std=4.45, n=142

  SART 1: mean=68.17, std=4.94, n=396

  Creating visualizations...
    ✓ Saved: HR_individual_timeseries.png
    ✓ Saved: HR_timeseries.png
    ✓ Saved: HR_comparison.png


In [5]:
print("\n4. SAVING RESULTS")
print("-" * 80)

results['status'] = 'completed' if len(results['errors']) == 0 else 'completed_with_errors'
results_path = os.path.join(OUTPUT_FOLDER, 'results.json')

with open(results_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"✓ Analysis complete!")
print(f"  Status: {results['status']}")
print(f"  Plots generated: {len(results['plots'])}")
print(f"  Metrics analyzed: {len(results.get('analysis', {}))}")
if results['errors']:
    print(f"  Errors: {len(results['errors'])}")
if results['warnings']:
    print(f"  Warnings: {len(results['warnings'])}")

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)


4. SAVING RESULTS
--------------------------------------------------------------------------------
✓ Analysis complete!
  Status: completed
  Plots generated: 3
  Metrics analyzed: 1

ANALYSIS COMPLETE
