## Done
- full preprocessing on subject 1, no need to visualize
- for-loop over all subjects
- exclude bad trials

## Description

This notebook reads in the neural and behavioral data from all subjects, excludes bad trials, and preprocesses the data for modeling down the line

In [1]:
import glob
import pandas as pd
import pickle
import scipy.io as sio
from scipy.io import loadmat
import unittest

beh_dir = '../data/decision-making/data/data_behav'
neur_dir = '../data/decision-making/data/data_ephys'

beh_files = [file for file in glob.glob(os.path.join(beh_dir,"gamble.data*.csv"))]
neur_files = [file for file in glob.glob(os.path.join(neur_dir,"*.mat"))]

sfreq = 1000

## Bad trials

In [2]:
bad_trials = sio.loadmat(os.path.join(beh_dir, 'bad_trials_OFC.mat'))['bad_trials_OFC']

## Loop through the subjects

In [3]:
X_dict = {};
y_dict = {};

for subject, files in enumerate(zip(beh_files, neur_files)):
    beh_file = files[0]
    neur_file = files[1]
    
#     print(subject)
#     print(beh_file)
#     print(neur_file)
#     print()
    
    # read behavior as dataframe
    beh_df = pd.read_csv(beh_file)
    # Add a column for bad trials: notice that you need to replace the '0' by subject here
    beh_df['bad.trial'] = bad_trials[subject,:len(beh_df)]
    # Add a column for trials to include (for easy indexing)
    beh_df['include.trial'] = (beh_df['bad.trial']==0) & (beh_df['choice.location']!='Timeout')
    
    # create y (ground truth) label from behavior data
    y = beh_df[beh_df['include.trial']]['choice.class'].values
    
    # load neural data as np array
    neur = loadmat(neur_file)['buttonpress_events_hg']
    
    # create X-matrix from neural data
    X = np.swapaxes(neur,1,2)
    
    # Quality check: assert that the number of include.trials in behavior data frame 
    # is the same as the number of neural traces
    if X.shape[0] != len(y):
        raise AssertionError("Number of good behavior trials does not match number of neural trials")
        
    # insert data into a single dictionary
    X_dict[subject] = X
    y_dict[subject] = y

## Save both dictionaries

In [45]:
# X_dict

In [46]:
# y_dict

In [43]:
with open('../data/decision-making/data/data_preproc/X.pickle', 'wb') as handle:
    pickle.dump(X_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [44]:
with open('../data/decision-making/data/data_preproc/y.pickle', 'wb') as handle:
    pickle.dump(y_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)