In [1]:
import pandas as pd
import numpy as np
import holoviews as hv
import hvplot

import maestro_file
from pre_proc_helper import single_trial
from pprint import pprint
from pathlib import Path

from bokeh.io import output_notebook
from bokeh.plotting import figure, show

import re
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from tqdm import tqdm

output_notebook()



In [2]:
base_path = Path.cwd().parent / 'data' / 'fiona_sst'
dirs = [d for d in base_path.iterdir() if d.is_dir()]
dirs

[PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109')]

In [3]:
base_path = Path.cwd().parent / 'data' / 'fiona_sst'
dirs = [d for d in base_path.iterdir() if d.is_dir()]

files = []
for dir_path in dirs:
    files += list(f for f in dir_path.iterdir() if f.is_file())

len(dirs), len(files), files[:3]

(1,
 2040,
 [PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109/fi211109a.0871'),
  PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109/fi211109a.0429'),
  PosixPath('/Users/barak/Projects/population_analysis/data/fiona_sst/fi211109/fi211109a.1359')])

In [4]:
def get_file_data(file_path: Path) -> dict:
    single_trial_instance = single_trial(file_path)
    if single_trial_instance.set != 'CSST':
        return None
    single_trial_instance.get_saccades()
    single_trial_instance.get_first_relevant_saccade()
    trial_row = single_trial_instance.data_dict
    del trial_row['file_path']
    return trial_row

data_dir = Path.cwd() / "data/fiona_sst/fi211109"
file_path = data_dir / "fi211109a.2040"
test_data = get_file_data(file_path)
del test_data['neural_data']
pprint(test_data.keys())

dict_keys(['blinks', 'dir', 'direction', 'filename', 'first_relevant_saccade', 'go_cue', 'hPos', 'hVel', 'saccades', 'segs_durations', 'segs_times', 'set', 'speed', 'ssd_len', 'ssd_number', 'stop_cue', 'trail_number', 'trail_session', 'trial_failed', 'trial_length', 'trial_name', 'type', 'vPos', 'vVel'])


In [5]:
behaviour_db_file_path = base_path / "SST_fiona_behaviour_db.xlsx"  
behaviour_df = pd.read_excel(behaviour_db_file_path)
behaviour_df = behaviour_df[behaviour_df['Task'] == 'CSST']
print(behaviour_df['Task'].value_counts())
csst_lst = set(behaviour_df['behaviour_session'].tolist())
print(f"Number of unique CSST sessions: {len(csst_lst)}")

Task
CSST    174
Name: count, dtype: int64
Number of unique CSST sessions: 83


In [6]:
def process_file(file):
    try:
        file_data = get_file_data(file)
        if file_data == {}:
            return None
        else:
            return file_data
    except Exception as e:
        print(f'Error processing file {file}: {e}')
        return None

def analyse_files_async(files_list):
    results = []
    with ThreadPoolExecutor() as executor:
        # Submit all tasks to the executor
        futures = {
            executor.submit(process_file, file): 
            file for file in files_list
        }
        
        # Use tqdm to display the progress bar
        for future in tqdm(as_completed(futures), total=len(files_list), desc="Loading files"):
            results.append(future.result())
    
    return results

results = analyse_files_async(files)
# Filter out None results
results = [result for result in results if result is not None]
print(f"Processed {len(results)} valid files out of {len(files)}")
# Create a DataFrame from the results
files_df = pd.DataFrame(results)
files_df

Loading files: 100%|██████████| 2040/2040 [00:25<00:00, 78.96it/s] 

Processed 1837 valid files out of 2040





Unnamed: 0,blinks,dir,direction,filename,first_relevant_saccade,go_cue,hPos,hVel,neural_data,saccades,...,ssd_number,stop_cue,trail_number,trail_session,trial_failed,trial_length,trial_name,type,vPos,vVel
0,"[310, 427]",180,L,fi211109a.0871,"[1548, 1585]",932,"[11.7, 11.7, 11.7, 11.7, 11.7, 11.7, 11.7, 11....","[1.0107878631944571, 1.0107878631944571, 0.275...","{0: [790.45, 864.4000000000001, 1538.15, 1976....","[[19, 105], [270, 308], [275, 342], [407, 544]...",...,1.0,980.0,0871,fi211109a,False,1680,STOP_L_SSD1,STOP,"[0.025, 0.025, 0.025, 0.025, 0.075, 0.075, 0.0...","[-1.1945674746843584, -1.1945674746843584, -1...."
1,,0,R,fi211109a.0612,"[1217, 1290]",998,"[-0.325, -0.325, -0.325, -0.3, -0.3, -0.3, -0....","[-3.491812618308125, -3.491812618308125, -3.03...","{0: [418.8, 571.98, 655.83, 677.12, 784.0, 148...","[[201, 266], [544, 598], [696, 753], [1217, 12...",...,,,0612,fi211109a,False,2149,GO_R,GO,"[-0.825, -0.825, -0.825, -0.7, -0.7, -0.7, -0....","[0.0, 0.0, 1.3783470861742597, 3.9512616470328..."
2,,0,R,fi211109a.0285,"[1234, 1308]",900,"[0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.4...","[-0.8270082517045559, -0.8270082517045559, -1....","{0: [105.9, 1375.3200000000002, 1617.800000000...","[[877, 932], [1234, 1308]]",...,,,0285,fi211109a,False,2051,GO_R,GO,"[-0.575, -0.575, -0.525, -0.525, -0.525, -0.5,...","[-4.0431514527778285, -4.0431514527778285, -1...."
3,,180,L,fi211109a.1968,"[1179, 1256]",925,"[11.475, 11.475, 11.45, 11.45, 11.45, 11.475, ...","[2.389134949368717, 2.389134949368717, -1.6540...","{0: [509.34, 512.14, 515.04, 518.64, 747.54, 7...","[[104, 189], [474, 533], [1179, 1256], [1875, ...",...,,,1968,fi211109a,False,2076,GO_L,GO,"[0.325, 0.325, 0.3, 0.325, 0.325, 0.3, 0.3, 0....","[-3.5837024240530755, -3.5837024240530755, -2...."
4,,180,L,fi211109a.1539,"[1264, 1341]",992,"[-11.025, -11.025, -11.025, -11.025, -11.025, ...","[-1.7459063091540625, -1.7459063091540625, -2....","{0: [118.65, 1580.5700000000002, 1754.44, 1865...","[[84, 170], [348, 410], [1264, 1341], [1311, 1...",...,,,1539,fi211109a,False,2143,GO_L,GO,"[-1.05, -1.05, -1.05, -1.05, -1.05, -1.05, -1....","[0.27566941723485194, 0.27566941723485194, 0.3..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1832,"[60, 159]",180,L,fi211109a.0204,"[956, 1036]",918,"[10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.925, 1...","[-3.399922812563174, -2.389134949368717, -2.38...","{1: [729.51, 827.9, 1028.52, 1456.69, 1564.17,...","[[23, 90], [139, 267], [235, 305], [956, 1036]...",...,1.0,966.0,0204,fi211109a,False,2069,CONT_L_SSD1,CONT,"[-0.425, -0.425, -0.425, -0.4, -0.4, -0.4, -0....","[-0.6432286402146545, 0.9188980574495066, 0.91..."
1833,,180,L,fi211109a.1574,"[1242, 1319]",981,"[-10.525, -10.525, -10.525, -10.55, -10.55, -1...","[0.6432286402146545, 0.6432286402146545, 0.0, ...","{0: [581.49, 591.28, 632.76, 637.56, 803.81, 8...","[[105, 188], [1242, 1319], [1651, 1706]]",...,,,1574,fi211109a,False,2132,GO_L,GO,"[-0.8, -0.8, -0.8, -0.775, -0.775, -0.775, -0....","[-0.6432286402146545, -0.6432286402146545, 1.1..."
1834,,180,L,fi211109a.1580,"[1116, 1190]",939,"[10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.075, 1...","[0.9188980574495066, 0.9188980574495066, -1.37...","{0: [533.16, 1057.46, 1078.03, 1269.71, 1278.8...","[[139, 220], [1116, 1190], [1157, 1198], [1961...",...,,,1580,fi211109a,False,2090,GO_L,GO,"[-0.325, -0.325, -0.325, -0.325, -0.325, -0.37...","[-2.664804366603569, -2.664804366603569, -2.38..."
1835,"[0, 20]",0,R,fi211109a.0660,"[1252, 1323]",1045,"[7.075, 7.075, 7.075, 7.075, 7.075, 7.075, 7.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","{0: [22.18, 173.0, 213.48, 236.25, 307.5, 531....","[[0, 144], [119, 192], [1252, 1323]]",...,4.0,1273.0,0660,fi211109a,True,1974,STOP_R_SSD4,STOP,"[-26.875, -26.875, -26.875, -26.875, -26.875, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [7]:
files_df.to_pickle(base_path / f'fiona_csst_{files_df['trail_session'][0]}_trials_df.pkl')