In [1]:
import pandas as pd
import numpy as np
import holoviews as hv
import hvplot

import maestro_file
from pre_proc_helper import single_trial
from pprint import pprint
from pathlib import Path

from bokeh.io import output_notebook
from bokeh.plotting import figure, show

import re
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from tqdm import tqdm

output_notebook()



In [2]:
base_path = Path.cwd().parent / 'data' / 'fiona_sst'
dirs = [d for d in base_path.iterdir() if d.is_dir()]
dirs.remove(base_path / 'trial_names')

files = []
for dir_path in dirs:
    files += list(f for f in dir_path.iterdir() if f.is_file())

len(dirs), len(files), files[:3]

(88,
 126165,
 [PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0744'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0954'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0687')])

In [3]:
def get_file_data(file_path: Path) -> dict:
    single_trial_instance = single_trial(file_path)
    if single_trial_instance.set != 'CSST':
        return None
    single_trial_instance.get_saccades()
    single_trial_instance.get_first_relevant_saccade()
    trial_row = single_trial_instance.data_dict
    del trial_row['file_path']
    return trial_row

data_dir = Path.cwd() / "data/fiona_sst/fi211109"
file_path = data_dir / "fi211109a.2040"
test_data = get_file_data(file_path)
del test_data['neural_data']
pprint(test_data.keys())

dict_keys(['blinks', 'dir', 'direction', 'filename', 'first_relevant_saccade', 'go_cue', 'hPos', 'hVel', 'saccades', 'segs_durations', 'segs_times', 'set', 'speed', 'ssd_len', 'ssd_number', 'stop_cue', 'trail_number', 'trail_session', 'trial_failed', 'trial_length', 'trial_name', 'type', 'vPos', 'vVel'])


In [4]:
len(files), files[:3]

(126165,
 [PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0744'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0954'),
  PosixPath('/home/barak/Projects/population_analysis/data/fiona_sst/fi210824/fi210824a.0687')])

In [5]:
def process_file(file):
    try:
        file_data = get_file_data(file)
        if file_data == {}:
            return None
        else:
            return file_data
    except Exception as e:
        print(f'Error processing file {file}: {e}')
        return None

def analyse_files_async(files_list):
    results = []
    with ThreadPoolExecutor() as executor:
        # Submit all tasks to the executor
        futures = {
            executor.submit(process_file, file): 
            file for file in files_list
        }
        
        # Use tqdm to display the progress bar
        for future in tqdm(as_completed(futures), total=len(files_list), desc="Loading files"):
            results.append(future.result())
    
    return results

results = analyse_files_async(files)
# Filter out None results
results = [result for result in results if result is not None]
print(f"Processed {len(results)} valid files out of {len(files)}")
# Create a DataFrame from the results
files_df = pd.DataFrame(results)
files_df

Loading files: 100%|██████████| 126165/126165 [24:38<00:00, 85.35it/s] 


Processed 110358 valid files out of 126165


Unnamed: 0,blinks,dir,direction,filename,first_relevant_saccade,go_cue,hPos,hVel,neural_data,saccades,...,ssd_number,stop_cue,trail_number,trail_session,trial_failed,trial_length,trial_name,type,vPos,vVel
0,,180,L,fi210824a.1272,"[1340, 1413]",1074,"[-11.0, -11.0, -11.0, -11.0, -11.0, -10.975, -...","[-1.6540165034091119, -1.6540165034091119, -3....","{1: [1254.93], 5: [31.33, 813.0, 827.33, 914.9...","[[136, 217], [702, 749], [1340, 1413], [1383, ...",...,,,1272,fi210824a,False,2225,GO_L,GO,"[-1.475, -1.475, -1.475, -1.475, -1.475, -1.47...","[-2.389134949368717, -2.389134949368717, -2.66..."
1,,0,R,fi210824a.0935,"[1248, 1321]",1054,"[11.275, 11.275, 11.25, 11.275, 11.275, 11.275...","[2.7566941723485194, 2.7566941723485194, 3.216...","{1: [415.6], 2: [892.9, 1326.28, 1338.55, 1832...","[[155, 237], [1248, 1321], [1296, 1334]]",...,3.0,1234.0,0935,fi210824a,False,2205,CONT_R_SSD3,CONT,"[-0.55, -0.55, -0.55, -0.55, -0.55, -0.55, -0....","[-0.5513388344697039, -0.5513388344697039, -1...."
2,,0,R,fi210824a.0478,"[1367, 1424]",970,"[2.975, 2.975, 2.975, 2.975, 2.975, 2.9, 2.9, ...","[-15.069928142171907, -15.069928142171907, -13...","{1: [1093.62, 1552.1699999999998], 2: [164.57,...","[[0, 96], [70, 141], [402, 460], [1367, 1424]]",...,1.0,1054.0,0478,fi210824a,False,1754,STOP_R_SSD1,STOP,"[-18.225, -18.225, -17.2, -16.65, -16.65, -15....","[188.098432359914, 188.098432359914, 188.09843..."
3,"[179, 291]",180,L,fi210824a.1313,"[1152, 1226]",1016,"[11.575, 11.575, 11.55, 11.55, 11.55, 11.575, ...","[-1.286457280429309, -1.286457280429309, -2.29...","{0: [537.97, 1364.45], 3: [895.95, 1309.28], 4...","[[132, 208], [271, 413], [388, 456], [648, 711...",...,2.0,1148.0,1313,fi210824a,True,1849,STOP_L_SSD2,STOP,"[-0.775, -0.775, -0.775, -0.775, -0.775, -0.75...","[-0.8270082517045559, -0.8270082517045559, -2...."
4,,180,L,fi210824a.1067,"[1417, 1496]",1046,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.025, -0.025,...","[1.6540165034091119, 1.6540165034091119, 1.654...","{1: [493.8], 4: [924.67], 5: [358.08, 976.1800...","[[619, 659], [1417, 1496]]",...,2.0,1178.0,1067,fi210824a,False,2197,CONT_L_SSD2,CONT,"[0.9, 0.9, 0.9, 0.875, 0.875, 0.85, 0.85, 0.85...","[-0.09188980574495066, -0.09188980574495066, -..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110353,,0,R,fi211020a.0265,"[1167, 1234]",943,"[-10.925, -10.925, -10.9, -10.9, -10.9, -10.9,...","[5.053939315972286, 5.053939315972286, 3.67559...","{0: [7.38, 332.09, 449.40999999999997, 862.96,...","[[150, 233], [1167, 1234], [1211, 1252]]",...,3.0,1111.0,0265,fi211020a,True,1812,STOP_R_SSD3,STOP,"[-1.2, -1.2, -1.2, -1.225, -1.225, -1.225, -1....","[1.3783470861742597, 1.3783470861742597, 0.091..."
110354,,0,R,fi211020a.0230,"[1213, 1285]",943,"[-9.925, -9.925, -9.925, -9.925, -9.95, -9.95,...","[-0.09188980574495066, -0.09188980574495066, -...","{0: [1937.51, 2065.19, 2073.27, 2083.87, 2138....","[[424, 504], [796, 857], [1213, 1285], [1892, ...",...,,,0230,fi211020a,False,2094,GO_R,GO,"[-1.35, -1.35, -1.45, -1.45, -1.35, -1.35, -1....","[-1.8377961148990132, -1.8377961148990132, -3...."
110355,,0,R,fi211020a.0605,"[1284, 1355]",1055,"[-11.175, -11.175, -11.175, -11.2, -11.2, -11....","[0.9188980574495066, 0.9188980574495066, 1.837...","{0: [640.93, 648.13, 737.18, 1168.99, 1245.110...","[[186, 262], [1050, 1110], [1284, 1355]]",...,1.0,1103.0,0605,fi211020a,False,2206,CONT_R_SSD1,CONT,"[-0.025, -0.025, -0.025, -0.025, -0.025, -0.02...","[-3.1242533953283225, -3.1242533953283225, -2...."
110356,,180,L,fi211020a.0554,"[1045, 1103]",1037,"[11.1, 11.1, 11.15, 11.15, 11.15, 11.15, 11.15...","[-2.2972451436237664, -2.2972451436237664, -0....","{0: [284.81, 948.1600000000001, 1068.51, 1082....","[[216, 298], [1045, 1103], [1252, 1325]]",...,,,0554,fi211020a,False,2188,GO_L,GO,"[-0.425, -0.425, -0.425, -0.425, -0.425, -0.42...","[-0.9188980574495066, -0.9188980574495066, -0...."


In [6]:
files_df.to_pickle(base_path.parent / 'csst_trials_pkls' /f'fiona_csst_trials_df.pkl')