# EEG Data Exploration

Interactive exploration of EEG datasets for long-range correlations.

**Expected:** EEG typically shows H â‰ˆ 0.6-0.9 (persistent, long memory)

## Quick Start:
1. Download some EEG data (see EEG_QUICKSTART.md)
2. Update file paths below
3. Run cells and see what you find!

In [None]:
# Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("deep")
%matplotlib inline

import sys
sys.path.insert(0, '../Python')

from eeg_loader import EEGLoader, EEGAnalyzer, quick_analysis
from hurst import hurst_rs

print("âœ“ Ready!")

## 1. Load a Single EEG File

**Change the path** to your downloaded EEG file.

In [None]:
# UPDATE THIS PATH!
eeg_file = '../data/eeg/sample_eeg.csv'  # or .edf

# Load
loader = EEGLoader(eeg_file)
loader.load()

# Summary
print(loader.summary())

## 2. Quick Look at Raw Data

In [None]:
# Get first channel
channel_idx = 0  # Change this to try different channels
channel_name = loader.channels[channel_idx]
data = loader.get_channel(channel_idx)

print(f"Channel: {channel_name}")
print(f"Length: {len(data)} samples")
print(f"Range: [{data.min():.2f}, {data.max():.2f}]")

# Plot first 1000 points
plt.figure(figsize=(14, 4))
plt.plot(data[:1000], linewidth=0.8, alpha=0.8)
plt.title(f'{channel_name} - First 1000 samples')
plt.xlabel('Sample')
plt.ylabel('Amplitude (Î¼V)')
plt.grid(True, alpha=0.3)
plt.show()

## 3. Calculate Hurst - Does it show long memory?

In [None]:
# Analyze this channel
analyzer = EEGAnalyzer(data, channel_name, loader.sampling_rate)

# Hurst
hurst_result = analyzer.calculate_hurst(min_window=10, num_windows=30)

h = hurst_result['hurst']
r2 = hurst_result['r_squared']

print(f"\n{'='*50}")
print(f"Channel: {channel_name}")
print(f"Hurst exponent: {h:.4f}")
print(f"RÂ²: {r2:.4f}")
print(f"{'='*50}\n")

# Interpretation
if h > 0.5:
    print(f"âœ“ Persistent (long memory)")
    print(f"  This is EXPECTED for EEG!")
elif h < 0.5:
    print(f"âœ— Anti-persistent (unusual for EEG)")
else:
    print(f"~ Random walk")

if r2 > 0.95:
    print(f"\nâœ“ Good fit (RÂ² > 0.95)")
else:
    print(f"\nâš  Moderate fit (RÂ² = {r2:.3f})")

In [None]:
# Visualize the R/S scaling
plt.figure(figsize=(10, 6))
plt.scatter(hurst_result['log_window_sizes'], hurst_result['log_rs_values'], 
            alpha=0.6, s=80, label='Data points')
plt.plot(hurst_result['log_window_sizes'], hurst_result['fitted_log_rs'], 
         'r--', linewidth=2.5, label=f'Fit: H={h:.3f} (RÂ²={r2:.3f})')
plt.xlabel('log(Window Size)', fontsize=12)
plt.ylabel('log(R/S)', fontsize=12)
plt.title(f'Hurst Analysis - {channel_name}', fontsize=14, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.show()

## 4. Full Overview Plot

4-panel diagnostic: signal, distribution, spectrum, autocorrelation

In [None]:
# Generate overview
analyzer.plot_overview(save_path=None)  # Set path to save

## 5. Compare Multiple Channels

**Try:** Do different brain regions show different H values?

In [None]:
# Analyze first N channels
n_channels = min(5, len(loader.channels))  # First 5 or all if less

channel_results = []

for i in range(n_channels):
    ch_name = loader.channels[i]
    ch_data = loader.get_channel(i)
    
    analyzer = EEGAnalyzer(ch_data, ch_name, loader.sampling_rate)
    result = analyzer.calculate_hurst()
    
    channel_results.append({
        'Channel': ch_name,
        'Hurst': result['hurst'],
        'RÂ²': result['r_squared'],
        'Mean': np.mean(ch_data),
        'Std': np.std(ch_data)
    })
    
    print(f"âœ“ {ch_name:10s} H={result['hurst']:.4f}")

results_df = pd.DataFrame(channel_results)
print("\nSummary:")
display(results_df)

In [None]:
# Visual comparison
fig, ax = plt.subplots(figsize=(12, 6))

bars = ax.bar(results_df['Channel'], results_df['Hurst'], alpha=0.7, edgecolor='black')

# Color by value
for bar, h in zip(bars, results_df['Hurst']):
    if h > 0.7:
        bar.set_color('darkred')
    elif h > 0.5:
        bar.set_color('orange')
    else:
        bar.set_color('blue')

ax.axhline(y=0.5, color='black', linestyle='--', linewidth=2, alpha=0.5, label='H=0.5 (random)')
ax.set_ylabel('Hurst Exponent', fontsize=12)
ax.set_xlabel('Channel', fontsize=12)
ax.set_title('Hurst Exponent by Channel', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

print("\nðŸŽ¨ Dark red: H>0.7 | Orange: 0.5<H<0.7 | Blue: H<0.5")

## 6. Time Series Overlay - Compare Channels Visually

In [None]:
# Plot first 3 channels overlaid
fig, axes = plt.subplots(min(3, n_channels), 1, figsize=(14, 8), sharex=True)

if n_channels == 1:
    axes = [axes]

for i in range(min(3, n_channels)):
    ch_data = loader.get_channel(i)
    ch_name = loader.channels[i]
    h_val = results_df.iloc[i]['Hurst']
    
    # Plot subset for clarity
    plot_data = ch_data[:2000] if len(ch_data) > 2000 else ch_data
    
    axes[i].plot(plot_data, linewidth=0.7, alpha=0.8)
    axes[i].set_ylabel('Amplitude', fontsize=10)
    axes[i].set_title(f'{ch_name} (H={h_val:.3f})', fontsize=11)
    axes[i].grid(True, alpha=0.3)

axes[-1].set_xlabel('Sample', fontsize=11)
plt.tight_layout()
plt.show()

## 7. Test Parameter Sensitivity

**Question:** Does the Hurst estimate change with different parameters?

In [None]:
# Try different min_window values
test_channel = loader.get_channel(0)

min_windows_to_test = [8, 16, 32, 50]
sensitivity_results = []

for min_win in min_windows_to_test:
    result = hurst_rs(test_channel, min_window=min_win, num_windows=25)
    sensitivity_results.append({
        'min_window': min_win,
        'Hurst': result['hurst'],
        'RÂ²': result['r_squared']
    })

sens_df = pd.DataFrame(sensitivity_results)
print("Parameter Sensitivity Test:")
display(sens_df)

In [None]:
# Plot sensitivity
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

ax1.plot(sens_df['min_window'], sens_df['Hurst'], 'o-', markersize=10, linewidth=2)
ax1.set_xlabel('Minimum Window Size', fontsize=11)
ax1.set_ylabel('Hurst Exponent', fontsize=11)
ax1.set_title('Parameter Sensitivity', fontsize=12, fontweight='bold')
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)

ax2.plot(sens_df['min_window'], sens_df['RÂ²'], 'o-', markersize=10, 
         linewidth=2, color='orange')
ax2.set_xlabel('Minimum Window Size', fontsize=11)
ax2.set_ylabel('RÂ² (Fit Quality)', fontsize=11)
ax2.set_title('Fit Quality', fontsize=12, fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Check stability
h_range = sens_df['Hurst'].max() - sens_df['Hurst'].min()
if h_range < 0.1:
    print(f"âœ“ Stable: H varies by only {h_range:.3f}")
else:
    print(f"âš  Sensitive: H varies by {h_range:.3f}")

## 8. Your Experiments!

**Ideas:**
- Load multiple files, compare subjects
- Compare rest vs. task conditions
- Try different frequency bands (filter data)
- Look at specific brain regions (motor cortex C3/C4)
- Calculate on windows (does H change over time?)

In [None]:
# Your experiments here
