# Behavioral EDA 

## Saccades outliers detection



## Setup and Initialization

In [13]:
# Import required libraries
from behavioral_eda_class import BehavioralEDA
from pathlib import Path
import holoviews as hv
from holoviews import opts
import numpy as np
import pandas as pd

# Enable Jupyter notebook display
from bokeh.io import output_notebook
output_notebook()
hv.extension('bokeh')

In [14]:
# monkey_name = 'yasmin'  # 'fiona' or 'yasmin'
# base_path = Path.cwd().parent / 'data' / f'{monkey_name}_sst'
# filepath = base_path.parent / 'csst_trials_pkls' / f'all_{monkey_name}_CSST_trials_df.pkl'
# eda = BehavioralEDA(filepath)

In [15]:
# Memory-efficient approach: Process one monkey at a time
# This avoids loading 25GB+ of data simultaneously

# Define processing function
def process_monkey_data(monkey_name):
    """Process a single monkey's data and return summary + plots"""
    try:
        base_path = Path.cwd().parent / 'data' / f'{monkey_name}_sst'
        filepath = base_path.parent / 'csst_trials_pkls' / f'all_{monkey_name}_CSST_trials_df.pkl'
        
        print(f"Loading data for {monkey_name.title()}...")
        print(f"Path: {filepath}")
        print(f"File exists: {filepath.exists()}")
        
        if not filepath.exists():
            print(f"❌ Data file not found for {monkey_name.title()}")
            return None
        
        # Create EDA instance
        eda = BehavioralEDA(str(filepath))

        print(f"✓ Successfully loaded {monkey_name.title()}'s data")
        eda.update_stop_trial_failures()
        print(f"Reset failed trials based on saccade amplitude")

        if monkey_name == 'fiona':
            to_be_excluded = ['fi210628', 'fi210629', 'fi210704']
            eda.df = eda.df[~eda.df['trial_session'].isin(to_be_excluded)]
            print(f"✓ Excluded sessions: {to_be_excluded}")

        
        # Extract all needed data and plots
        results = {
            'basic_summary': eda.get_basic_summary(),
            'signal_delay_plot': eda.plot_signal_delay_performance(),
            'signal_delay_data': eda.get_signal_delay_performance_data(),
            'rt_scatter_plot': eda.plot_rt_scatter(),
            'rt_scatter_data': eda.get_rt_scatter_data(),
            'rt_distribution_plot': eda.plot_rt_distributions(),
            'rt_distribution_data': eda.get_rt_distribution_data()
        }
        
        print(f"✓ Extracted all plots and data for {monkey_name.title()}")
        
        # Explicitly delete the EDA instance to free memory
        del eda
        print(f"✓ Freed memory for {monkey_name.title()}")
        
        return results
        
    except Exception as e:
        print(f"❌ Error processing {monkey_name.title()}'s data: {e}")
        return None

# Process monkeys sequentially
monkeys = ['yasmin', 'fiona']
monkey_results = {}

print("Processing monkeys sequentially to minimize memory usage...")
print("="*60)

for monkey in monkeys:
    print(f"\n{'='*20} PROCESSING {monkey.upper()} {'='*20}")
    result = process_monkey_data(monkey)
    if result:
        monkey_results[monkey] = result
        print(f"✓ {monkey.title()} processing complete")
    print()

print(f"Successfully processed data for: {list(monkey_results.keys())}")
print("Ready for analysis and plotting!")

Processing monkeys sequentially to minimize memory usage...

Loading data for Yasmin...
Path: /home/barak/Projects/population_analysis/data/csst_trials_pkls/all_yasmin_CSST_trials_df.pkl
File exists: True


Loaded data for yasmin
Total trials: 123,178
Date range: ya230501 to ya230904
✓ Reaction time data available, will add derived columns as needed
✓ Successfully loaded Yasmin's data
Reset failed trials based on saccade amplitude
Processing reaction times and adding to original DataFrame...
✓ Using existing reaction_time column
✓ Reaction time processing completed and added to original DataFrame
✓ Extracted all plots and data for Yasmin
✓ Freed memory for Yasmin
✓ Yasmin processing complete


Loading data for Fiona...
Path: /home/barak/Projects/population_analysis/data/csst_trials_pkls/all_fiona_CSST_trials_df.pkl
File exists: True


Loaded data for fiona
Total trials: 110,358
Date range: fi210628 to fi211125
✓ Reaction time data available, will add derived columns as needed
✓ Successfully loaded Fiona's data
Reset failed trials based on saccade amplitude
✓ Excluded sessions: ['fi210628', 'fi210629', 'fi210704']
Processing reaction times and adding to original DataFrame...
✓ Using existing reaction_time column
✓ Reaction time processing completed and added to original DataFrame
✓ Extracted all plots and data for Fiona
✓ Freed memory for Fiona
✓ Fiona processing complete

Successfully processed data for: ['yasmin', 'fiona']
Ready for analysis and plotting!


In [16]:
# monkey_results['yasmin']['signal_delay_data'][0]

## Basic Summary Comparison

In [17]:
# Print basic summaries for both monkeys
for monkey, results in monkey_results.items():
    if results and 'basic_summary' in results:
        print(f"{'='*60}")
        print(f"BASIC SUMMARY - {monkey.upper()}")
        print(f"{'='*60}")
        
        basic_summary = results['basic_summary']
        print(f"Total trials: {basic_summary['total_trials']:,}")
        print(f"Overall success rate: {basic_summary['overall_success_rate']:.1f}%")
        print("Trial types:")
        for trial_type, count in basic_summary['trial_types'].items():
            print(f"  {trial_type}: {count:,}")
        print()
    else:
        print(f"❌ No data available for {monkey.title()}")

BASIC SUMMARY - YASMIN
Total trials: 123,178
Overall success rate: 82.5%
Trial types:
  GO: 69,104
  CONT: 28,416
  STOP: 25,658

BASIC SUMMARY - FIONA
Total trials: 110,358
Overall success rate: 84.6%
Trial types:
  GO: 61,460
  CONT: 25,294
  STOP: 23,604



## 1. Signal Delay Performance Comparison

This replicates Figure 1b from the original paper, showing stop error rates and continue success rates as a function of signal delay.

In [18]:
# Signal delay performance plots are already created and stored
# Just extract them from our results
signal_delay_plots = {}

for monkey, results in monkey_results.items():
    if results and 'signal_delay_plot' in results:
        signal_delay_plots[monkey] = results['signal_delay_plot']
        print(f"✓ Signal delay plot available for {monkey.title()}")
    else:
        print(f"❌ No signal delay plot available for {monkey.title()}")

print(f"\nReady to display {len(signal_delay_plots)} signal delay plots")

✓ Signal delay plot available for Yasmin
✓ Signal delay plot available for Fiona

Ready to display 2 signal delay plots


In [19]:
(signal_delay_plots['yasmin'].opts(
    opts.Curve(line_dash='dashed')
) * signal_delay_plots['fiona']).opts(
    legend_position='bottom_right',
    title='Stop and continue performance',
    xlabel='Stop/Continue signal delay (ms)',
    ylabel='Presentage of saccades (%)',
    xlim=(0, 300),
    
)

## 2. RT Scatter Plot Comparison

These plots compare session mean reaction times across different trial types, showing consistency and relationships between GO, Continue, and Error Stop RTs.

In [20]:
# RT scatter plots are already created and stored
# Just extract them from our results
rt_scatter_plots = {}

for monkey, results in monkey_results.items():
    if results and 'rt_scatter_plot' in results:
        rt_scatter_plots[monkey] = results['rt_scatter_plot']
        print(f"✓ RT scatter plot available for {monkey.title()}")
    else:
        print(f"❌ No RT scatter plot available for {monkey.title()}")

print(f"\nReady to display {len(rt_scatter_plots)} RT scatter plots")

✓ RT scatter plot available for Yasmin
✓ RT scatter plot available for Fiona

Ready to display 2 RT scatter plots


In [26]:
(rt_scatter_plots['yasmin'].opts(
    opts.Scatter('Scatter.Continue_RT_yasmin', color='blue'),
    opts.Scatter('Scatter.Error_stop_RT_yasmin', color='red'),
) * rt_scatter_plots['fiona']).opts(
    legend_position='bottom_right',
    title='RT Scatter Plot Comparison',
)


## 3. RT Distribution Comparison

These plots replicate Figure 1d, showing the distribution of reaction times for successful continue trials and failed stop trials across different signal delays.

In [22]:
# RT distribution plots are already created and stored
# Just extract them from our results
rt_dist_plots = {}

for monkey, results in monkey_results.items():
    if results and 'rt_distribution_plot' in results:
        rt_dist_plots[monkey] = results['rt_distribution_plot']
        print(f"✓ RT distribution plot available for {monkey.title()}")
    else:
        print(f"❌ No RT distribution plot available for {monkey.title()}")

print(f"\nReady to display {len(rt_dist_plots)} RT distribution plots")

✓ RT distribution plot available for Yasmin
✓ RT distribution plot available for Fiona

Ready to display 2 RT distribution plots


In [23]:
# Display Yasmin's RT distribution plot
if 'yasmin' in rt_dist_plots:
    print("YASMIN - RT Distributions (Figure 1g)")
    display(rt_dist_plots['yasmin'].opts(xlim=(0,550)))
else:
    print("❌ Yasmin's RT distribution plot not available")

YASMIN - RT Distributions (Figure 1g)


In [24]:
# Display Fiona's RT distribution plot
if 'fiona' in rt_dist_plots:
    print("FIONA - RT Distributions (Figure 1d)")
    display(rt_dist_plots['fiona'].opts(xlim=(0,550)))
else:
    print("❌ Fiona's RT distribution plot not available")

FIONA - RT Distributions (Figure 1d)
