In [1]:
import pandas as pd
import numpy as np
import holoviews as hv
import hvplot

import maestro_file
from pre_proc_helper import single_trial
from pprint import pprint
from pathlib import Path

from bokeh.io import output_notebook
from bokeh.plotting import figure, show

import re
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from tqdm import tqdm

output_notebook()



In [2]:
def get_dirs_list(monkey):
    base_path = Path.cwd().parent / 'data' / f'{monkey}_sst'
    dirs = [d for d in base_path.iterdir() if d.is_dir()]
    dirs.remove(base_path / 'trial_names')
    return dirs

def get_files_list(monkey):
    dirs = get_dirs_list(monkey)
    files = []
    for dir_path in dirs:
        files += list(f for f in dir_path.iterdir() if f.is_file())
    return files

# monkey = 'fiona'
monkey = 'yasmin'
base_path = Path.cwd().parent / 'data' / f'{monkey}_sst'
dirs = get_dirs_list(monkey)
files = get_files_list(monkey)
len(dirs), len(files), files[:3]

(57,
 129254,
 [PosixPath('/home/barak/Projects/population_analysis/data/yasmin_sst/ya230528/ya230528a.0088'),
  PosixPath('/home/barak/Projects/population_analysis/data/yasmin_sst/ya230528/ya230528a.0352'),
  PosixPath('/home/barak/Projects/population_analysis/data/yasmin_sst/ya230528/ya230528a.0848')])

In [3]:
def get_file_data(file_path: Path) -> dict:
    single_trial_instance = single_trial(file_path)
    if single_trial_instance.set != 'CSST':
        return None
    single_trial_instance.get_saccades()
    single_trial_instance.get_first_relevant_saccade()
    trial_row = single_trial_instance.data_dict
    del trial_row['file_path']
    return trial_row

# data_dir = Path.cwd() / "data/fiona_sst/fi211109"
# file_path = data_dir / "fi211109a.2040"
# data_dir = Path.cwd() / "data/yasmin_sst/ya230726"
# file_path = data_dir / "ya230726a.1172"
data_dir = Path.cwd() / "data/yasmin_sst/ya230725"
file_path = data_dir / "ya230725a.0108"
test_data = get_file_data(file_path)
del test_data['neural_data']
pprint(test_data['screen_rotation'])

45.0


In [4]:
len(files), files[:3]

(129254,
 [PosixPath('/home/barak/Projects/population_analysis/data/yasmin_sst/ya230528/ya230528a.0088'),
  PosixPath('/home/barak/Projects/population_analysis/data/yasmin_sst/ya230528/ya230528a.0352'),
  PosixPath('/home/barak/Projects/population_analysis/data/yasmin_sst/ya230528/ya230528a.0848')])

In [5]:
def process_file(file):
    try:
        file_data = get_file_data(file)
        if file_data == {}:
            return None
        else:
            return file_data
    except Exception as e:
        print(f'Error processing file {file}: {e}')
        return None

def analyse_files_async(files_list):
    results = []
    with ThreadPoolExecutor() as executor:
        # Submit all tasks to the executor
        futures = {
            executor.submit(process_file, file): 
            file for file in files_list
        }
        
        # Use tqdm to display the progress bar
        for future in tqdm(as_completed(futures), total=len(files_list), desc="Loading files"):
            results.append(future.result())
    
    return results

results = analyse_files_async(files)
# Filter out None results
results = [result for result in results if result is not None]
print(f"Processed {len(results)} valid files out of {len(files)}")
# Create a DataFrame from the results
files_df = pd.DataFrame(results)
files_df

Loading files: 100%|██████████| 129254/129254 [28:16<00:00, 76.18it/s] 


Processed 123178 valid files out of 129254


Unnamed: 0,blinks,dir,direction,filename,first_relevant_saccade,go_cue,hPos,hVel,neural_data,saccades,...,ssd_number,stop_cue,trail_number,trail_session,trial_failed,trial_length,trial_name,type,vPos,vVel
0,"[319, 367]",0,R,ya230528a.1029,,1318,"[0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.0...","[1.8377961148990132, 1.8377961148990132, 1.929...","{1: [51.07, 141.87, 344.3], 2: [23.5, 261.77, ...","[[271, 308], [275, 342], [356, 405], [389, 532]]",...,4.0,1546.0,1029,ya230528a,False,2246,STOP_R_SSD4,STOP,"[0.125, 0.125, 0.125, 0.1, 0.1, 0.1, 0.1, 0.1,...","[1.6540165034091119, 1.6540165034091119, 1.654..."
1,"[83, 113]",0,R,ya230528a.1339,"[1690, 1766]",1577,"[-12.175, -12.175, -12.125, -12.1, -12.1, -12....","[0.0, 0.0, 1.8377961148990132, 4.2269310642677...","{1: [163.98, 773.08], 2: [4.95, 55.35, 101.43,...","[[40, 115], [93, 209], [476, 543], [1690, 1766]]",...,,,1339,ya230528a,False,2728,GO_R,GO,"[-1.025, -1.025, -1.0, -1.0, -1.0, -1.0, -1.0,...","[-3.8593718412879277, -3.8593718412879277, -3...."
2,,0,R,ya230528a.0741,"[1249, 1324]",1154,"[-12.175, -12.175, -12.175, -12.2, -12.2, -12....","[7.994413099810707, 7.994413099810707, 7.99441...","{0: [1210.17, 1266.8500000000001, 1269.3700000...","[[70, 156], [431, 508], [1249, 1324], [1859, 1...",...,,,0741,ya230528a,False,2305,GO_R,GO,"[-0.55, -0.55, -0.55, -0.5, -0.5, -0.45, -0.47...","[0.27566941723485194, 0.27566941723485194, 0.2..."
3,,180,L,ya230528a.1430,"[2094, 2145]",1645,"[2.325, 2.325, 2.325, 2.225, 2.225, 2.225, 2.0...","[-15.52937717089666, -15.52937717089666, -15.5...","{2: [89.72, 269.83000000000004, 374.4000000000...","[[0, 30], [24, 169], [2094, 2145]]",...,3.0,1813.0,1430,ya230528a,False,2513,STOP_L_SSD3,STOP,"[-22.075, -22.075, -22.075, -22.075, -22.075, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,,0,R,ya230528a.0500,"[1885, 1960]",1662,"[0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.7...","[-4.0431514527778285, -4.0431514527778285, -2....","{0: [1602.13], 1: [137.78, 155.03, 218.07, 237...","[[290, 461], [439, 511], [512, 601], [1885, 19...",...,,,0500,ya230528a,False,2813,GO_R,GO,"[0.5, 0.5, 0.5, 0.5, 0.45, 0.45, 0.45, 0.425, ...","[18.929299983459835, 18.929299983459835, 12.03..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123173,,0,R,ya230719a.2431,"[1796, 1851]",1612,"[2.65, 2.65, 2.275, 2.275, 2.275, 1.9, 1.775, ...","[-96.20862661496334, -96.20862661496334, -115....","{0: [118.17, 806.6999999999999, 968.3199999999...","[[0, 189], [466, 558], [547, 582], [550, 585],...",...,1.0,1636.0,2431,ya230719a,False,2336,STOP_R_SSD1,STOP,"[-22.525, -22.525, -22.525, -22.525, -22.525, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
123174,,0,R,ya230719a.0735,"[1916, 1968]",1196,"[-9.275, -9.275, -9.3, -9.3, -9.3, -9.3, -9.25...","[5.237718927462187, 5.237718927462187, 4.04315...","{0: [1.38, 1056.8500000000001, 1222.45, 1418.2...","[[31, 143], [382, 448], [575, 629], [1916, 1968]]",...,4.0,1460.0,0735,ya230719a,False,2160,STOP_R_SSD4,STOP,"[7.925, 7.925, 7.925, 7.925, 7.95, 7.95, 7.925...","[-1.3783470861742597, -1.3783470861742597, -2...."
123175,"[79, 95]",180,L,ya230719a.0359,"[1400, 1474]",1271,"[-9.175, -9.175, -9.175, -9.175, -9.175, -9.17...","[-0.9188980574495066, -0.9188980574495066, 0.8...","{0: [257.65, 329.17999999999995, 540.73, 767.1...","[[29, 122], [95, 222], [469, 527], [1400, 1474...",...,,,0359,ya230719a,False,2422,GO_L,GO,"[8.175, 8.175, 8.15, 8.125, 8.125, 8.125, 8.12...","[2.8485839780934703, 2.8485839780934703, 0.459..."
123176,,0,R,ya230719a.2420,"[1713, 1788]",1361,"[-7.625, -7.625, -7.625, -7.65, -7.65, -7.625,...","[1.4702368919192106, 1.4702368919192106, 1.837...","{0: [1387.02, 1389.47, 1415.02, 1436.02, 1509....","[[53, 141], [334, 396], [365, 405], [630, 685]...",...,2.0,1457.0,2420,ya230719a,False,2512,CONT_R_SSD2,CONT,"[-6.975, -6.975, -7.0, -7.025, -7.025, -7.025,...","[-0.36755922297980265, -0.36755922297980265, -..."


In [None]:
files_df.to_pickle(base_path.parent / 'csst_trials_pkls' /f'{monkey}_csst_trials_df.pkl')

In [8]:
tmp = files_df.iloc[:20].copy()
tmp.to_excel(base_path.parent / 'csst_trials_pkls' /f'{monkey}_csst_trials_df_sample.xlsx')