In [None]:
%load_ext autoreload
%autoreload 1

import glob
import numpy as np
import pandas as pd
from collections import OrderedDict

import os

import matplotlib.pyplot as plt
%matplotlib widget

processed_directory = './data/processed_mats/*.mat'
# processed_directory = '/Volumes/AnxietyBU/callbacks/processed/*.mat'

bird = "all_birds"  # just for file name
save_pickle_path = './data/data.pickle'
save_csv_directory = "./data"

acceptable_call_labels = ['Call', 'Stimulus']  # any stimulus_trials containing call types NOT in this list are excluded (this includes unlabeled, which are stored as 'USV'!!)

files = [f for f in glob.glob(processed_directory)]

# or only specific files. Note: may mess up histograms, which may require data from >1 file
# files = [
#     './data/processed_mats/or60rd49-d1-20240425115050-Block1-PROCESSED.mat',
#     './data/processed_mats/or60rd49-d2-20240426114923-Block1-PROCESSED.mat'
# ]
files

In [None]:
%aimport utils.callbacks
from utils.callbacks import call_mat_stim_trial_loader
from utils.file import multi_index_from_dict

df = pd.DataFrame()

call_types_all = pd.DataFrame()
rejected_trials_all = pd.DataFrame()
calls_all = pd.DataFrame()

for file in files:

    try:
        calls_df, stim_trials, rejected_trials, file_info, call_types = call_mat_stim_trial_loader(
                file, 
                acceptable_call_labels=acceptable_call_labels, 
                from_notmat=True, 
                verbose=False,
            )
    except TypeError:
        print(f"Failed to make dataframe for file: {file}")
        continue

    # TODO: make this a nicely editable parameter
    multi_index_info = OrderedDict()
    # multi_index_info['birdname'] = file_info['birdname']
    # multi_index_info['day'] = int(file_info['d'])
    # multi_index_info['block'] = int(file_info['block'])
    multi_index_info['file'] = os.path.split(file)[-1]

    # create multiindex: birdname, stim_trial_index, call_index
    stim_trials = multi_index_from_dict(
        stim_trials, 
        multi_index_info, 
        keep_current_index=True,
    )
    df = pd.concat((df, stim_trials), axis='rows')
    
    rejected_trials = multi_index_from_dict(
        rejected_trials, 
        multi_index_info, 
        keep_current_index=True
    )
    rejected_trials_all = pd.concat((rejected_trials_all, rejected_trials), axis='rows')

    call_types = multi_index_from_dict(
        call_types, 
        multi_index_info, 
        keep_current_index=True
    )
    call_types_all = pd.concat((call_types_all, call_types), axis='rows')

    calls_df = multi_index_from_dict(
        calls_df, 
        multi_index_info, 
        keep_current_index=True
    )
    calls_all = pd.concat((calls_all, calls_df), axis='rows')

print('Rejected trials:')
rejected_trials_all

In [None]:
print(
    "Call types in rejected trials."
    + "\nLabel `USV` means an accepted call was not given a label."
    + "\nGo back to DeepSqueak & fix ths."
)

rejected_trial_call_types = call_types_all.loc[rejected_trials_all.index]
rejected_trial_call_types
# TODO: add stim index to rej trial type df (is this the first stim?)

# # see only blocks with a specific call type
#
# label = 'USV'
# label = 'Noise'
# call_types_all.loc[~np.isnan(call_types_all.loc[:, label])]

In [None]:
df.sort_index(inplace=True)
df

In [None]:
all_birds = list(set(df.index.get_level_values(0)))
all_birds

In [None]:
# eliminate all block 0s - account for first loom bug

# raise Exception('Make sure you want to do this! You will need to reload the data afterward if you want block 0 back.')

# blocks = df.index.get_level_values(2)
# df = df[blocks != 0]

df

In [None]:
if save_pickle_path is not None:
    import pickle

    to_save = dict(
        all_birds=all_birds,
        df=df,
        rejected_trials_all=rejected_trials_all,
        calls_all=calls_all,
        call_types_all=call_types_all,
    )

    with open(save_pickle_path, "wb") as f:
        pickle.dump(to_save, file=f)

print(f"Saved to: {save_pickle_path}")

In [None]:
if save_csv_directory is not None:
    df.to_csv(os.path.join(save_csv_directory, f"{bird}-trials.csv"))
    calls_all.to_csv(os.path.join(save_csv_directory, f"{bird}-calls.csv"))

print(f"Saved to: {save_csv_directory}")