In [17]:
import pandas as pd
import numpy as np
import holoviews as hv
import hvplot

import maestro_file
from pre_proc_helper import single_trial
from pprint import pprint
from pathlib import Path

from bokeh.io import output_notebook
from bokeh.plotting import figure, show

import re
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from tqdm import tqdm

output_notebook()



In [18]:
base_path = Path.cwd().parent / 'data' / 'fiona_sst'
dirs = [d for d in base_path.iterdir() if d.is_dir()]
dirs.remove(base_path / 'trial_names')

files = []
for dir_path in dirs:
    files += list(f for f in dir_path.iterdir() if f.is_file())

len(dirs), len(files), files[:3]

(88,
 126165,
 [PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0744'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0954'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0687')])

In [19]:
def get_file_data(file_path: Path) -> dict:
    single_trial_instance = single_trial(file_path)
    if single_trial_instance.set != 'CSST':
        return None
    single_trial_instance.get_saccades()
    single_trial_instance.get_first_relevant_saccade()
    trial_row = single_trial_instance.data_dict
    del trial_row['file_path']
    return trial_row

data_dir = Path.cwd() / "data/fiona_sst/fi211109"
file_path = data_dir / "fi211109a.2040"
test_data = get_file_data(file_path)
del test_data['neural_data']
pprint(test_data.keys())

dict_keys(['blinks', 'dir', 'direction', 'filename', 'first_relevant_saccade', 'go_cue', 'hPos', 'hVel', 'saccades', 'segs_durations', 'segs_times', 'set', 'speed', 'ssd_len', 'ssd_number', 'stop_cue', 'trail_number', 'trail_session', 'trial_failed', 'trial_length', 'trial_name', 'type', 'vPos', 'vVel'])


In [20]:
len(files), files[:3]

(126165,
 [PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0744'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0954'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0687')])

In [None]:
def process_file(file):
    try:
        file_data = get_file_data(file)
        if file_data == {}:
            return None
        else:
            return file_data
    except Exception as e:
        print(f'Error processing file {file}: {e}')
        return None

def analyse_files_async(files_list):
    results = []
    with ThreadPoolExecutor() as executor:
        # Submit all tasks to the executor
        futures = {
            executor.submit(process_file, file): 
            file for file in files_list
        }
        
        # Use tqdm to display the progress bar
        for future in tqdm(as_completed(futures), total=len(files_list), desc="Loading files"):
            results.append(future.result())
    
    return results

results = analyse_files_async(files)
# Filter out None results
results = [result for result in results if result is not None]
print(f"Processed {len(results)} valid files out of {len(files)}")
# Create a DataFrame from the results
files_df = pd.DataFrame(results)
files_df

Loading files: 100%|██████████| 10/10 [00:00<00:00, 109511.85it/s]

Processed 9 valid files out of 126165





Unnamed: 0,blinks,dir,direction,filename,first_relevant_saccade,go_cue,hPos,hVel,neural_data,saccades,...,ssd_number,stop_cue,trail_number,trail_session,trial_failed,trial_length,trial_name,type,vPos,vVel
0,,180,L,fi210824a.0163,"[1176, 1248]",1016,"[1.2, 1.2, 1.05, 1.05, 1.05, 1.05, 0.925, 0.9,...","[-29.58851744987411, -29.58851744987411, -33.5...","{0: [201.57, 1462.83], 1: [166.8, 325.58000000...","[[0, 88], [1176, 1248], [1222, 1259]]",...,,,163,fi210824a,False,2167,GO_L,GO,"[-5.825, -5.825, -5.275, -4.825, -3.725, -3.72...","[188.098432359914, 188.098432359914, 188.09843..."
1,,0,R,fi210824a.1553,,1073,"[-11.15, -11.15, -11.15, -11.175, -11.175, -11...","[-0.6432286402146545, -0.6432286402146545, -0....","{0: [207.23], 4: [481.2, 966.8], 5: [128.7, 78...","[[229, 314]]",...,4.0,1301.0,1553,fi210824a,False,2001,STOP_R_SSD4,STOP,"[-1.55, -1.55, -1.55, -1.55, -1.55, -1.55, -1....","[1.7459063091540625, 1.7459063091540625, -2.02..."
2,,0,R,fi210824a.1318,"[1306, 1376]",1077,"[1.325, 1.325, 1.325, 1.325, 1.325, 1.325, 1.3...","[-3.491812618308125, -3.491812618308125, -3.49...","{0: [801.95, 835.6800000000001], 3: [128.7, 21...","[[291, 351], [1005, 1067], [1306, 1376]]",...,,,1318,fi210824a,False,2228,GO_R,GO,"[1.425, 1.425, 1.425, 1.425, 1.425, 1.425, 1.4...","[-11.853784941098635, -11.853784941098635, -11..."
3,,180,L,fi210824a.1582,"[1190, 1264]",965,"[0.925, 0.925, 0.925, 0.925, 0.975, 0.975, 0.9...","[-0.4594490287247533, -0.4594490287247533, -1....","{0: [770.7, 1335.97, 1418.8, 1507.85], 1: [203...","[[712, 773], [1190, 1264]]",...,,,1582,fi210824a,False,2116,GO_L,GO,"[1.425, 1.425, 1.525, 1.525, 1.525, 1.525, 1.5...","[-5.237718927462187, -5.237718927462187, -1.83..."
4,"[0, 2]",0,R,fi210824a.0744,"[1347, 1419]",1070,"[6.85, 6.85, 1.875, 1.875, 1.875, 1.875, 2.1, ...","[0.0, 0.0, -124.23501736717328, -188.190322165...","{4: [2178.05], 5: [188.2, 697.4300000000001, 7...","[[0, 100], [67, 127], [297, 353], [1347, 1419]...",...,,,744,fi210824a,False,2221,GO_R,GO,"[-26.075, -26.075, -26.075, -26.075, -26.075, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5,,0,R,fi210824a.1314,"[1319, 1391]",1007,"[-11.575, -11.575, -11.55, -11.5, -11.5, -11.5...","[-1.9296859206439638, -1.9296859206439638, -1....","{0: [88.88], 3: [399.33, 629.5799999999999], 5...","[[252, 334], [835, 892], [1319, 1391]]",...,,,1314,fi210824a,False,2158,GO_R,GO,"[-1.05, -1.05, -1.05, -1.025, -1.025, -1.025, ...","[-0.18377961148990132, -0.18377961148990132, -..."
6,"[70, 175]",180,L,fi210824a.1702,"[1354, 1428]",1061,"[-23.55, -23.55, -23.55, -23.45, -23.45, -23.4...","[0.0, 0.0, -0.6432286402146545, 4.318820870012...","{0: [1016.38, 1069.75, 1101.35, 1218.25], 1: [...","[[0, 103], [155, 211], [180, 307], [286, 363],...",...,2.0,1193.0,1702,fi210824a,False,2212,CONT_L_SSD2,CONT,"[-3.975, -3.975, -3.975, -3.95, -3.95, -3.85, ...","[14.426699501957252, 14.426699501957252, 13.78..."
7,,180,L,fi210824a.1686,"[1037, 1115]",930,"[-12.25, -12.25, -12.25, -12.25, -12.25, -12.2...","[2.2053553378788155, 2.2053553378788155, 2.481...","{0: [1323.9], 1: [1694.92], 4: [256.37], 5: [4...","[[216, 314], [1037, 1115], [1496, 1554]]",...,,,1686,fi210824a,False,2081,GO_L,GO,"[-0.9, -0.9, -0.975, -0.975, -0.975, -0.95, -0...","[2.8485839780934703, 2.8485839780934703, 1.286..."
8,,180,L,fi210824a.0954,"[1035, 1109]",936,"[12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12....","[0.4594490287247533, 0.4594490287247533, 0.367...","{0: [1722.6], 4: [574.4], 5: [85.28, 357.25, 5...","[[119, 205], [640, 702], [1035, 1109], [1078, ...",...,1.0,1020.0,954,fi210824a,False,2087,CONT_L_SSD1,CONT,"[-0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0....","[-4.962049510227335, -4.962049510227335, -3.30..."


In [25]:
files_df.to_pickle(base_path.parent / 'csst_trials_pkls' /f'fiona_csst_trials_df.pkl')