# Imports

In [72]:
from pathlib import Path
from datetime import datetime
import pandas as pd
from collections import defaultdict
import numpy as np

# Global settings

In [None]:
ROOT_PATH = Path(r'Z:\Ella')
EXPERIMENT_NAME = '5_sec_const'
FILES_TO_LOAD = [
    "angle_timeseries",
    "opto_rec",
    "speed_rec", 
    "swim_info_after",
    "swim_info_before",
    "swim_info_during"
]

# Data loading

In [71]:
records = []
exp_dict = {
    EXPERIMENT_NAME: defaultdict(dict)
}
experiment_path = ROOT_PATH / EXPERIMENT_NAME

for date_dir_name in experiment_path.iterdir():
    date = datetime.strptime(date_dir_name.stem, "%Y%m%d").date()
    
    for fish_dir_name_path in (experiment_path / date_dir_name).iterdir():
        full_fish_name = fish_dir_name_path.stem.removesuffix("_Exp00").removesuffix("_" + EXPERIMENT_NAME)
        fish_id = full_fish_name.split("_")[0]
        group_name = "_".join(full_fish_name.split("_")[1:])

        records.append({
            "path": str(fish_dir_name),
            "date": date,
            "fish_id": fish_id,
            "group_name": group_name,
        })

        exp_dict[EXPERIMENT_NAME][group_name][fish_id] = {
            key: np.load(fish_dir_name_path / f"{key}.npy", allow_pickle=True)
            for key in files_to_load
        }
        
df = pd.DataFrame.from_records(records)

# Raw data exploration

## data structure

In [84]:
exp_dict.keys()

dict_keys(['5_sec_const'])

In [86]:
exp_dict["5_sec_const"].keys()  # group_id

dict_keys(['narp_neg', 'narp_pos'])

In [87]:
exp_dict["5_sec_const"]["narp_neg"].keys()  # fish id (unique only inside group)

dict_keys(['F1', 'F2', 'F3', 'F4', 'F5', 'F6'])

In [89]:
exp_dict["5_sec_const"]["narp_neg"]["F1"].keys()  # npy files

dict_keys(['angle_timeseries', 'opto_rec', 'speed_rec', 'swim_info_after', 'swim_info_before', 'swim_info_during'])

## explore per `npy`

### `angle_timeseries`

In [75]:
exp_dict["5_sec_const"]["narp_neg"]["F1"]["angle_timeseries"].shape

(479816,)

### `opto_rec`

In [77]:
exp_dict["5_sec_const"]["narp_neg"]["F1"]["opto_rec"].shape

(479816,)

### `speed_rec`

In [78]:
exp_dict["5_sec_const"]["narp_neg"]["F1"]["speed_rec"].shape

(479816,)

### `swim_info_after`

In [90]:
exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_after"].item()

{'startSwimInd': array([  6791.,  15905.,  16201.,  19685.,  20058.,  31021.,  31297.,
         35132.,  36923.,  37818.,  39279.,  40702.,  42629.,      0.,
         52348.,  52773.,  57099.,  64090.,  64597.,  69838.,  72065.,
         78283.,  87706.,  92056., 114122., 114463., 155798., 156166.,
        156612., 165539., 188655., 189060., 220471., 220951., 221286.,
        222389., 229795., 230158., 230653., 235729., 236052.]),
 'endSwimInd': array([  6817.,  15929.,  16270.,  19712.,  20069.,  31094.,  31385.,
         35222.,  37022.,  37891.,  39355.,  40805.,  42837.,      0.,
         52359.,  52833.,  57149.,  64130.,  64639.,  69914.,  72119.,
         78539.,  87729.,  92118., 114243., 114971., 155935., 156288.,
        156726., 165604., 188694., 189231., 220674., 221050., 221450.,
        222431., 229847., 230234., 230699., 235824., 236111.]),
 'swim_duration': array([  65. ,   60. ,  172.5,   67.5,   27.5,  182.5,  220. ,  225. ,
         247.5,  182.5,  190. ,  257.5,  52

In [114]:
keys_lst = list(exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_after"].item().keys())
for k in keys_lst:
    print(k, "     shape: ", exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_after"].item()[k].shape)

startSwimInd      shape:  (41,)
endSwimInd      shape:  (41,)
swim_duration      shape:  (41,)
swim_bout_num      shape:  (41,)
swim_max_signal      shape:  (41,)
swim_mean_signal      shape:  (41,)
swim_LR_balance      shape:  (41,)
swim_frequency      shape:  (41,)


### `swim_info_before`

In [115]:
exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_before"].item()

{'startSwimInd': array([2.90000e+01, 2.38700e+03, 3.20600e+03, 5.96300e+03, 7.30800e+03,
        8.02200e+03, 8.67300e+03, 9.51500e+03, 1.02350e+04, 1.14260e+04,
        1.81010e+04, 1.85140e+04, 2.00930e+04, 2.05180e+04, 2.07300e+04,
        2.13800e+04, 2.34400e+04, 2.38710e+04, 2.41090e+04, 2.53330e+04,
        2.60190e+04, 2.68110e+04, 2.76050e+04, 2.88430e+04, 2.99160e+04,
        3.13860e+04, 3.22970e+04, 3.34640e+04, 3.39510e+04, 3.98000e+04,
        4.01310e+04, 4.04530e+04, 4.21220e+04, 4.25030e+04, 4.42730e+04,
        4.51430e+04, 4.69920e+04, 4.81580e+04, 4.91130e+04, 4.99080e+04,
        5.06970e+04, 5.16830e+04, 5.33450e+04, 5.57360e+04, 5.63780e+04,
        5.68420e+04, 5.84870e+04, 6.03980e+04, 6.05340e+04, 6.73390e+04,
        6.97650e+04, 7.00600e+04, 7.19150e+04, 7.23360e+04, 7.36950e+04,
        7.45040e+04, 7.68330e+04, 7.75390e+04, 7.97230e+04, 8.00580e+04,
        8.13550e+04, 8.23960e+04, 8.31190e+04, 8.47740e+04, 8.84130e+04,
        9.63720e+04, 9.66320e+04, 9

In [116]:
keys_lst = list(exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_before"].item().keys())
for k in keys_lst:
    print(k, "     shape: ", exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_before"].item()[k].shape)

startSwimInd      shape:  (131,)
endSwimInd      shape:  (131,)
swim_duration      shape:  (131,)
swim_bout_num      shape:  (131,)
swim_max_signal      shape:  (131,)
swim_mean_signal      shape:  (131,)
swim_LR_balance      shape:  (131,)
swim_frequency      shape:  (131,)


### `swim_info_during`

In [117]:
exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_during"].item()

{'startSwimInd': array([140., 493., 766., 971.]),
 'endSwimInd': array([ 277.,  537.,  856., 1265.]),
 'swim_duration': array([342.5, 110. , 225. , 735. ]),
 'swim_bout_num': array([ 46.,  24.,  51., 145.]),
 'swim_max_signal': array([3.05871186, 0.38568374, 0.44378919, 0.52889745]),
 'swim_mean_signal': array([0.23964121, 0.16595304, 0.1520898 , 0.16234459]),
 'swim_LR_balance': array([0.55489856, 0.04040048, 0.01047377, 0.09491629]),
 'swim_frequency': array([ 68.70229008, 112.19512195, 116.27906977,  98.63013699])}

In [119]:
keys_lst = list(exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_during"].item().keys())
for k in keys_lst:
    print(k, "     shape: ", exp_dict["5_sec_const"]["narp_neg"]["F1"]["swim_info_during"].item()[k].shape)

startSwimInd      shape:  (4,)
endSwimInd      shape:  (4,)
swim_duration      shape:  (4,)
swim_bout_num      shape:  (4,)
swim_max_signal      shape:  (4,)
swim_mean_signal      shape:  (4,)
swim_LR_balance      shape:  (4,)
swim_frequency      shape:  (4,)
