In [9]:
import pandas as pd
import numpy as np
import holoviews as hv
import hvplot

import maestro_file
from pre_proc_helper import single_trial
from pprint import pprint
from pathlib import Path

from bokeh.io import output_notebook
from bokeh.plotting import figure, show

import re
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from tqdm import tqdm

output_notebook()



In [10]:
base_path = Path.cwd().parent / 'data' / 'fiona_sst'
dirs = [d for d in base_path.iterdir() if d.is_dir()]
dirs

[PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109')]

In [11]:
base_path = Path.cwd().parent / 'data' / 'fiona_sst'
dirs = [d for d in base_path.iterdir() if d.is_dir()]

files = []
for dir_path in dirs:
    files += list(f for f in dir_path.iterdir() if f.is_file())

len(dirs), len(files), files[:3]

(1,
 2040,
 [PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109/fi211109a.0871'),
  PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109/fi211109a.0429'),
  PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109/fi211109a.1359')])

In [12]:
def get_file_data(file_path: Path) -> dict:
    single_trial_instance = single_trial(file_path)
    if single_trial_instance.set != 'CSST':
        return None
    single_trial_instance.get_saccades()
    single_trial_instance.get_first_relevant_saccade()
    trial_row = single_trial_instance.data_dict
    del trial_row['file_path']
    return trial_row

data_dir = Path.cwd() / "data/fiona_sst/fi211109"
file_path = data_dir / "fi211109a.2040"
test_data = get_file_data(file_path)
del test_data['neural_data']
pprint(test_data.keys())

dict_keys(['blinks', 'dir', 'direction', 'filename', 'first_relevant_saccade', 'go_cue', 'hPos', 'hVel', 'saccades', 'segs_durations', 'segs_times', 'set', 'speed', 'ssd_len', 'ssd_number', 'stop_cue', 'trail_number', 'trail_session', 'trial_failed', 'trial_length', 'trial_name', 'type', 'vPos', 'vVel'])


In [13]:
behaviour_db_file_path = base_path / "SST_fiona_behaviour_db.xlsx"  
behaviour_df = pd.read_excel(behaviour_db_file_path)
behaviour_df = behaviour_df[behaviour_df['Task'] == 'CSST']
print(behaviour_df['Task'].value_counts())
csst_lst = set(behaviour_df['behaviour_session'].tolist())
print(f"Number of unique CSST sessions: {len(csst_lst)}")

Task
CSST    174
Name: count, dtype: int64
Number of unique CSST sessions: 83


In [None]:
def process_file(file):
    try:
        file_data = get_file_data(file)
        if file_data == {}:
            return None
        else:
            return file_data
    except Exception as e:
        print(f'Error processing file {file}: {e}')
        return None

def analyse_files_async(files_list):
    results = []
    with ThreadPoolExecutor() as executor:
        # Submit all tasks to the executor
        futures = {
            executor.submit(process_file, file): 
            file for file in files_list
        }
        
        # Use tqdm to display the progress bar
        for future in tqdm(as_completed(futures), total=len(files_list), desc="Loading files"):
            results.append(future.result())
    
    return results

results = analyse_files_async(files)
# Filter out None results
results = [result for result in results if result is not None]
print(f"Processed {len(results)} valid files out of {len(files)}")
# Create a DataFrame from the results
files_df = pd.DataFrame(results)
files_df

Loading files: 100%|██████████| 10/10 [00:00<00:00, 64527.75it/s]

Processed 9 valid files out of 2040





Unnamed: 0,blinks,dir,direction,filename,first_relevant_saccade,go_cue,hPos,hVel,neural_data,saccades,...,ssd_number,stop_cue,trail_number,trail_session,trial_failed,trial_length,trial_name,type,vPos,vVel
0,,180,L,fi211109a.0624,"[1284, 1362]",1072,"[11.625, 11.625, 11.625, 11.675, 11.675, 11.67...","[-1.8377961148990132, -1.8377961148990132, -3....","{0: [580.03, 768.48, 979.75, 996.15, 1158.18, ...","[[86, 162], [1052, 1110], [1284, 1362], [1749,...",...,,,624,fi211109a,False,2223,GO_L,GO,"[0.075, 0.075, 0.05, 0.025, 0.025, 0.075, 0.07...","[0.5513388344697039, 0.5513388344697039, -0.36..."
1,,0,R,fi211109a.1392,"[1091, 1167]",939,"[11.25, 11.25, 11.25, 11.175, 11.175, 11.225, ...","[4.686380092992484, 4.686380092992484, 5.78905...","{0: [843.71, 1716.44], 1: [94.29, 185.99, 191....","[[87, 164], [574, 629], [1091, 1167], [1567, 1...",...,,,1392,fi211109a,False,2090,GO_R,GO,"[-0.375, -0.375, -0.4, -0.4, -0.4, -0.475, -0....","[-4.962049510227335, -4.962049510227335, -2.11..."
2,"[310, 427]",180,L,fi211109a.0871,"[1548, 1585]",932,"[11.7, 11.7, 11.7, 11.7, 11.7, 11.7, 11.7, 11....","[1.0107878631944571, 1.0107878631944571, 0.275...","{0: [790.45, 864.4000000000001, 1538.15, 1976....","[[19, 105], [270, 308], [275, 342], [407, 544]...",...,1.0,980.0,871,fi211109a,False,1680,STOP_L_SSD1,STOP,"[0.025, 0.025, 0.025, 0.025, 0.075, 0.075, 0.0...","[-1.1945674746843584, -1.1945674746843584, -1...."
3,,180,L,fi211109a.0416,"[1113, 1188]",977,"[10.975, 10.975, 10.975, 10.975, 10.975, 10.97...","[-1.9296859206439638, -2.4810247551136677, -2....","{0: [431.7, 438.37, 453.87, 588.87, 670.82, 15...","[[161, 233], [203, 239], [650, 710], [1113, 11...",...,3.0,1145.0,416,fi211109a,False,2128,CONT_L_SSD3,CONT,"[-0.35, -0.35, -0.35, -0.35, -0.4, -0.45, -0.4...","[0.18377961148990132, -0.8270082517045559, -0...."
4,,180,L,fi211109a.0429,"[1319, 1393]",1088,"[-10.825, -10.825, -10.8, -10.825, -10.825, -1...","[-0.36755922297980265, -0.36755922297980265, 0...","{0: [772.6, 1932.0, 1979.54, 2248.59, 3826.21]...","[[124, 203], [512, 563], [1319, 1393]]",...,,,429,fi211109a,False,2239,GO_L,GO,"[-1.525, -1.525, -1.525, -1.5, -1.5, -1.5, -1....","[-3.951261647032878, -3.951261647032878, -3.95..."
5,,0,R,fi211109a.0240,"[1172, 1243]",1037,"[-11.05, -11.05, -11.05, -11.075, -11.075, -11...","[3.951261647032878, 3.951261647032878, 4.22693...","{3: [1456.53, 1460.46, 1465.46, 2127.88, 2133....","[[80, 157], [748, 804], [1172, 1243]]",...,,,240,fi211109a,False,2188,GO_R,GO,"[-1.025, -1.025, -1.025, -0.95, -0.95, -0.975,...","[-0.36755922297980265, -0.36755922297980265, -..."
6,,180,L,fi211109a.1359,"[1109, 1181]",913,"[-10.75, -10.75, -10.75, -10.75, -10.75, -10.7...","[-3.3080330068182238, -3.3080330068182238, -1....","{0: [397.01, 778.11, 799.1800000000001, 805.36...","[[114, 200], [483, 543], [1109, 1181], [1623, ...",...,,,1359,fi211109a,False,2064,GO_L,GO,"[-0.95, -0.95, -0.95, -0.975, -0.975, -0.975, ...","[0.0, 0.0, -2.7566941723485194, -4.68638009299..."
7,,0,R,fi211109a.1957,"[1434, 1511]",1067,"[-10.55, -10.55, -10.55, -10.65, -10.65, -10.6...","[-2.2053553378788155, -2.2053553378788155, -1....","{0: [1006.44, 1290.79, 1394.49, 1496.39, 1854....","[[252, 332], [749, 807], [1434, 1511]]",...,,,1957,fi211109a,False,2218,GO_R,GO,"[-1.2, -1.2, -1.2, -1.175, -1.175, -1.175, -1....","[-0.09188980574495066, -0.09188980574495066, -..."
8,"[0, 27]",0,R,fi211109a.0885,"[1599, 1652]",1095,"[7.175, 7.175, 7.175, 7.175, 7.175, 7.175, 7.1...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","{0: [1283.97, 1475.67, 1580.66, 1638.03, 1836....","[[7, 140], [111, 177], [939, 996], [1599, 1652]]",...,2.0,1203.0,885,fi211109a,False,1903,STOP_R_SSD2,STOP,"[-27.075, -27.075, -27.075, -27.075, -27.075, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [None]:
files_df.to_pickle(base_path / f'fiona_csst_{files_df['trail_session'][0]}_trials_df.pkl')