## Import packages and initialize directories

In [1]:
import glob
import pandas as pd
import pickle
import scipy.io as sio
from scipy.io import loadmat
import unittest
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

beh_dir = '../../data/decision-making/data/data_behav'
neur_dir = '../../data/decision-making/data/data_ephys'
preproc_dir = '../../data/decision-making/data/data_preproc'

beh_files = [file for file in glob.glob(os.path.join(beh_dir,"gamble.data*.csv"))]
neur_files = [file for file in glob.glob(os.path.join(neur_dir,"*.mat"))]

sfreq = 1000

## Create df where each row corresponds to 1 subject-electrode-trial = 1 example

In [2]:
# read bad_trials, for exclusion
bad_trials = sio.loadmat(os.path.join(beh_dir, 'bad_trials_OFC.mat'))['bad_trials_OFC']

# read game_model which, we hope, is identical across subjects
game_model = pd.read_csv(os.path.join(beh_dir,'gamble_choices.csv'))

# make master df where you append each subject's df
df_master = pd.DataFrame(columns = ['subject', 
                                    'include.trial', 
                                    'round', 
                                    'newround.time', 
                                    'choice.time',
                                    'buttonpress.time', 
                                    'conf.time', 
                                    'reveal.time', 
                                    'choice.class',
                                    'choice.location', 
                                    'outcome', 
                                    'Safe.Bet.Amount', 
                                    'Risky.Bet.Amount',
                                    'Risky.bet.shown.number', 
                                    'Risky.bet.hidden.number', 
                                    'Risky.Side',
                                    'data', 
                                    'channel'])

for sub_index, files in enumerate(zip(beh_files, neur_files)):
    beh_file = files[0]
    neur_file = files[1]
    
    print(sub_index)
    print()
    
    ## Read data
    # ------------------------------------------------------------------------------------------------------.
    # behavior
    df = pd.DataFrame()
    df = pd.read_csv(os.path.join(beh_file))
    
    # neural
    neur = loadmat(neur_file)['buttonpress_events_hg']
    
    ## Number trials and number electrodes
    # ------------------------------------------------------------------------------------------------------.
    num_trials_beh = len(df)
    
    num_trials = neur.shape[0]
    num_samples = neur.shape[1]
    nchan = neur.shape[2]
    
    # add subject column on the left: make it be 1-indexed, corresponding to the subid's in the file
    df.insert(0, 'subject', sub_index+1)
    
    ## Append game model data
    # ------------------------------------------------------------------------------------------------------.
    df = df.merge(game_model[:num_trials_beh], left_index=True, right_index=True)    

    ## Exclude bad trials from entire df: Makes it easier to match with neural data
    # ------------------------------------------------------------------------------------------------------.
    df.insert(1, 'include.trial', (bad_trials[sub_index,:num_trials_beh]==0) & (df['choice.location']!='Timeout'))
    # exclude trials (shorten df)
    df = df[df['include.trial']]
    
    # create a new index that just counts up to the number of included trials, and corresponds to the neural data
    df.insert(0, 'trial_index_subject', np.arange(num_trials))
    df = df.set_index('trial_index_subject')
    
    ## Add neural data
    # ------------------------------------------------------------------------------------------------------.
    # initialize a data column, that will take a row of data subject-electrode-trial,
    # so a 1d-array of the number of time points in the data
    df = df.assign(data=None)
    df = df.assign(channel=None)
    df_subject = pd.DataFrame(columns = df.columns)
    
    # loop over electrodes
    for this_chan in range(nchan):
        # create a dataframe for this specific channel, containing the behavior data for this subject
        df_chan = df.copy()
        df_chan['channel'] = this_chan
        # loop over trials
        for this_trial in range(num_trials):
            # insert data for each trial of df: the neural data for electrode 0, that trial
            df_chan.at[this_trial, 'data'] = list(neur[this_trial,:,this_chan])
        df_subject = df_subject.append(df_chan)
    df_master = df_master.append(df_subject)
    
    df_master.insert(0, 'index', np.arange(len(df_master)))
    df_master = df_master.set_index('index')

0



of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


1

2

3

4

5

6

7

8

9



## Include only gambles where first number is 5 or 6

In [3]:
df_use = df_master[(df_master['Risky.bet.shown.number']==5) | (df_master['Risky.bet.shown.number']==6)]
df_use.insert(0, 'index_use', np.arange(len(df_use)))
df_use = df_use.set_index('index_use')

In [6]:
np.mean(df_use['choice.class']=='Gamble')

0.5698814409484724

## Extract a *separate* X-matrix, and separate y-labels, for each subject

In [8]:
sample_first = 0
sample_last = 950
num_samples = sample_last-sample_first

First subject only

In [None]:
num_examples = len(df_use)
# # extract all the listed data into an array
X = np.empty([num_examples,num_samples])

for this_example in range(num_examples):
    X[this_example,sample_first:sample_last] = np.asarray(df_use['data'][this_example])[sample_first:sample_last]

Loop over subjects and store in dictionary

In [29]:
subjects = set(df_use['subject'])

Xdict = dict()
ydict = dict()

for this_subject in subjects:
#     print(this_subject)
    df_subject = df_use[df_use['subject']==this_subject]
    num_examples = len(df_subject)
    X = np.empty([num_examples,num_samples])

    for this_example in range(num_examples):
        X[this_example,sample_first:sample_last] = np.asarray(df_use['data'][this_example])[sample_first:sample_last]
#     print(X.shape)
    Xdict[this_subject] = X
    ydict[this_subject] = df_subject['choice.class'].values
#     print(len(ydict[this_subject]))

1
(215, 950)
215
2
(282, 950)
282
3
(2832, 950)
2832
4
(150, 950)
150
5
(2562, 950)
2562
6
(322, 950)
322
7
(473, 950)
473
8
(480, 950)
480
9
(912, 950)
912
10
(544, 950)
544


## Save X and y dictionaries

In [30]:
import pickle

In [36]:
!mkdir data

In [45]:
with open(os.path.join('data','Xdict.pickle'), 'wb') as handle1:
    pickle.dump(Xdict, handle1, protocol=pickle.HIGHEST_PROTOCOL)

In [46]:
with open(os.path.join('data','ydict.pickle'), 'wb') as handle2:
    pickle.dump(ydict, handle2, protocol=pickle.HIGHEST_PROTOCOL)

In [47]:
os.listdir('data')

['Xdict.pickle', 'ydict.pickle']