# Creating Convenient Dataframe
We will compile all the sessions into one dataframe, with a rich multiindex so that we can group the data according to any dimension we might want. There are 2 steps: filtering and concatenating. There were way more sessions than what we want to use for various reasons. Again, all the heavy lifting is done by the SessionIterator and Session classes, so we must just use those to call all the appropriate sessions and concatenate in the later step. 

In [1]:
import os
import sys
import pickle
import itertools
import datetime
import numpy as np
import pandas as pd
from behavioral_performance.tools import SessionIteratorClass as SI
from behavioral_performance.tools import SessionClass as SC

idx = pd.IndexSlice
SessionIterator = SI.SessionIterator
Session = SC.Session

'''
This script creates a dataframe with all sessions of a given datasets. The
sessions are concatenated into one pandas dataframe which can then be searched
or groupby either animal, or type of session. This script creates 5 datsets:

1) DSR NAIVE
2) DSR TRAINING
3) PSR TRAINING
4) DSR TRAINED
5) PSR TRAINED
'''

ROOT = os.environ['HOME'] + '/python/'
notes_to_infusion_string = {'OFC-saline' : 'OFC-saline',
                            'OFC-muscimol' : 'OFC-muscimol',
                            'OFC-Musc-Naive' : 'OFC-muscimol',
                            'PL-saline' : 'PL-saline',
                            'PL-muscimol' : 'PL-muscimol',
                            'mPFC-Musc-Naive' : 'PL-muscimol',
                            'ipsiLeft-saline' : 'IPSI-saline',
                            'ipsiRight-saline' : 'IPSI-saline',
                            'ipsiLeft-muscimol' : 'IPSI-muscimol',
                            'ipsiRight-muscimol' : 'IPSI-muscimol',
                            'rOFCxlPL-saline' : 'CONTRA-saline',
                            'lOFCxrPL-saline' : 'CONTRA-saline',
                            'rOFCxlPL-muscimol' : 'CONTRA-muscimol',
                            'lOFCxrPL-muscimol' : 'CONTRA-muscimol',
                            'Saline-Naive' : 'Saline-Naive'}


datasets = [('deterministic', 'Trained', 'DSR'),
            ('probabilistic', 'Trained', 'PSR'),
            ('deterministic', 'Naive', 'Naive'),
            ('deterministic','Training', 'DSR_TRAINING'),
            ('probabilistic','Training', 'PSR_TRAINING')]

## Filtering
Same process (which is important) as the block dataframe filtering, via the class SessionIterator. 

In [2]:
for regime, training, savefile in datasets:

    print "loading %s sessions..." %regime,
    session_iterator = SessionIterator([regime, training])
    print "done"

    if regime == 'deterministic' and training == 'Trained':
        assert len(session_iterator.sessionList) == 88
    elif regime == 'probabilistic' and training == 'Trained':
        assert len(session_iterator.sessionList) == 82
    elif regime == 'deterministic' and training == 'Naive':
        assert len(session_iterator.sessionList) == 21


    sessions = [Session(w) for w in session_iterator.sessionList]
    rats = list(set([sess.ratID for sess in sessions]))

    out = []
    for rat in rats:
        rat_array = []
        infusion_type = []
        block_array = []
        trial_array = []
        tmp_sessions = [sess for sess in sessions if sess.ratID == rat]
        if training == 'Training':
            #only care about choronological order during training
            tmp_sessions.sort(key = lambda x: x.dateObj)
        for sess_index, sess in enumerate(tmp_sessions):
            for block, trial in zip(*sess.info.index.labels):
                rat_array.append(rat)

                if training == 'Training':
                    sessionID = 'training_session'
                    val_to_add = 1 + sess_index
                else:
                    sessionID = 'Infusion_String'
                    val_to_add = notes_to_infusion_string[sess.header['notes']]

                infusion_type.append(val_to_add)
                block_array.append(sess.info.index.levels[0][block])
                trial_array.append(sess.info.index.levels[1][trial])

        rows = pd.MultiIndex.from_arrays([rat_array,
                                          infusion_type,
                                          block_array,
                                          trial_array],
                                          names=('rat', sessionID,
                                                 'block','trial'))
        vals = np.concatenate([sess.info.values for sess in tmp_sessions])
        out.append(pd.DataFrame(vals, index=rows, columns = sess.info.columns))

    out = pd.concat(out, axis = 0)
    #convert appropriate columns to integers, no need for floats
    for column in ['SA','GA','Correct','AR',
                   'preChoice', 'Choice']:
        out.loc[:,column] =  out.loc[:,column].astype('int16')
    out.sort_index(axis = 0, inplace = True)
    out.sort_index(axis = 1, inplace = True)
    pickle.dump(out, open(ROOT + 'behavioral_performance/data/session_dataframes/' \
                            + savefile + '_SESSIONS_DATAFRAME.p', 'wb'))

loading deterministic sessions... {'date': '8_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'PL-saline'}
{'date': '11_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'OFC-muscimol'}
{'date': '15_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'ipsiRight-muscimol'}
{'date': '19_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'lOFCxrPL-saline'}
{'date': '10_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'OFC-saline'}
{'date': '7_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'PL-muscimol'}
{'date': '16_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'ipsiRight-saline'}
{'date': '18_3_2017', 'cohort': '2', 'rat': '6', 'task': 'deterministic', 'notes': 'lOFCxrPL-muscimol'}
{'date': '4_3_2017', 'cohort': '2', 'rat': '1', 'task': 'deterministic', 'notes': 'OFC-muscimol'}
{'date': '8_3_2017', 'cohort': '2', 'rat': '1', 'task': 'determini

{'date': '25_7_2017', 'cohort': '4', 'rat': '5', 'task': 'deterministic', 'notes': 'PL-saline'}
{'date': '3_8_2017', 'cohort': '4', 'rat': '5', 'task': 'deterministic', 'notes': 'ipsiRight-saline'}
{'date': '29_7_2017', 'cohort': '4', 'rat': '5', 'task': 'deterministic', 'notes': 'lOFCxrPL-saline'}
{'date': '21_7_2017', 'cohort': '4', 'rat': '5', 'task': 'deterministic', 'notes': 'OFC-muscimol'}
done


  user_expressions, allow_stdin)


loading probabilistic sessions... {'date': '15_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'ipsiLeft-muscimol'}
{'date': '4_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'OFC-muscimol'}
{'date': '8_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'PL-muscimol'}
{'date': '11_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'lOFCxrPL-muscimol'}
{'date': '13_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'ipsiLeft-saline'}
{'date': '5_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'OFC-saline'}
{'date': '7_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'PL-saline'}
{'date': '12_9_2017', 'cohort': '5', 'rat': '7', 'task': 'probabilistic', 'notes': 'lOFCxrPL-saline'}
{'date': '25_10_2017', 'cohort': '5', 'rat': '6', 'task': 'probabilistic', 'notes': 'rOFCxlPL-muscimol'}
{'date': '26_10_2017', 'cohort': '5', 'rat': '6', 'task': 'prob

done
loading deterministic sessions... {'date': '4_4_2018', 'cohort': '6', 'rat': '9', 'task': 'deterministic', 'notes': 'mPFC-Musc-Naive'}
{'date': '3_4_2018', 'cohort': '6', 'rat': '9', 'task': 'deterministic', 'notes': 'Saline-Naive'}
{'date': '9_4_2018', 'cohort': '6', 'rat': '9', 'task': 'deterministic', 'notes': 'OFC-Musc-Naive'}
{'date': '23_3_2018', 'cohort': '6', 'rat': '6', 'task': 'deterministic', 'notes': 'OFC-Musc-Naive'}
{'date': '24_3_2018', 'cohort': '6', 'rat': '6', 'task': 'deterministic', 'notes': 'mPFC-Musc-Naive'}
{'date': '22_3_2018', 'cohort': '6', 'rat': '6', 'task': 'deterministic', 'notes': 'Saline-Naive'}
{'date': '29_3_2018', 'cohort': '6', 'rat': '1', 'task': 'deterministic', 'notes': 'Saline-Naive'}
{'date': '1_4_2018', 'cohort': '6', 'rat': '1', 'task': 'deterministic', 'notes': 'mPFC-Musc-Naive'}
{'date': '30_3_2018', 'cohort': '6', 'rat': '1', 'task': 'deterministic', 'notes': 'OFC-Musc-Naive'}
{'date': '26_3_2018', 'cohort': '6', 'rat': '10', 'task': '

{'date': '20_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '18_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '14_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '27_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '8_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '6_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '25_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '21_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '17_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '19_7_2017', 'cohort': '4', 'rat': '1', 'task': 'deterministic', 'notes': 'TRAINING'}
{'date': '10_7_2017', 'cohort': '4', 'rat': '4', 'ta

{'date': '14_8_2017', 'cohort': '5', 'rat': '5', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '16_8_2017', 'cohort': '5', 'rat': '5', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '12_8_2017', 'cohort': '5', 'rat': '5', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '10_8_2017', 'cohort': '5', 'rat': '5', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '2_5_2017', 'cohort': '3', 'rat': '6', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '17_5_2017', 'cohort': '3', 'rat': '6', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '21_5_2017', 'cohort': '3', 'rat': '6', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '15_5_2017', 'cohort': '3', 'rat': '6', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '31_5_2017', 'cohort': '3', 'rat': '6', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '23_5_2017', 'cohort': '3', 'rat': '6', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '27_5_2017', 'cohort': '3', 'rat': '6', 't

{'date': '5_5_2017', 'cohort': '3', 'rat': '3', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '9_5_2017', 'cohort': '3', 'rat': '3', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '10_5_2017', 'cohort': '3', 'rat': '3', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '26_5_2017', 'cohort': '3', 'rat': '3', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '12_5_2017', 'cohort': '3', 'rat': '3', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '24_5_2017', 'cohort': '3', 'rat': '3', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '2_5_2017', 'cohort': '3', 'rat': '2', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '17_5_2017', 'cohort': '3', 'rat': '2', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '21_5_2017', 'cohort': '3', 'rat': '2', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '15_5_2017', 'cohort': '3', 'rat': '2', 'task': 'probabilistic', 'notes': 'TRAINING'}
{'date': '31_5_2017', 'cohort': '3', 'rat': '2', 'tas

## Naive Experiment
We will create a separate adataframe for each experiment. The simplest experiment was the "Naive" animal experiment, which has the least adata and so we'll compile it first.

In [4]:
target = ROOT + 'behavioral_performance/data/dataset_dataframes/'

Naive = pickle.load(open(ROOT + \
        'behavioral_performance/data/session_dataframes/Naive_SESSIONS_DATAFRAME.p', 'rb'))
DSR_TRAINING = pickle.load(open(ROOT + \
'behavioral_performance/data/session_dataframes/DSR_TRAINING_SESSIONS_DATAFRAME.p', 'rb'))

#first we make the saline dataset
#from Naive it is quite simple
Naive_Saline = Naive.loc[idx[:,'Saline-Naive',:,:],:]
#excluding sessions from cohort 2 because they had training in other task first
Naive_first_session = \
                DSR_TRAINING.loc[idx[['41','43','44','45','46','47'],1,:,:],:]
Naive_DSR = pd.concat([Naive_Saline, Naive_first_session], axis=0)
pickle.dump(Naive_DSR, open(target + 'Naive_DSR.p', 'wb'))

#now we make Naive mPFC and OFC datasets
Naive_mPFC = Naive.loc[idx[:,'PL-muscimol',:,:],:]
Naive_OFC = Naive.loc[idx[:,'OFC-muscimol',:,:],:]
pickle.dump(Naive_mPFC, open(target + 'Naive_DSR_mPFC.p', 'wb'))
pickle.dump(Naive_OFC, open(target + 'Naive_DSR_OFC.p', 'wb'))


#same for PSR Naive - just take the first session of training df
PSR_TRAINING = pickle.load(open(ROOT + \
'DATA_structures/session_dataframes/PSR_TRAINING_SESSIONS_DATAFRAME.p', 'rb'))
Naive_first_session = PSR_TRAINING.loc[idx[:,1,:,:],:]
pickle.dump(Naive_first_session, open(target + 'Naive_PSR.p', 'wb'))

#Mid-training - DSR
out = []
for label, animal_data in DSR_TRAINING.groupby(axis = 0, level = 'rat'):
    animal_sessions = animal_data.groupby(axis = 0, level = 'training_session')
    no_sessions = len(animal_sessions)
    mid_session_index = int(np.floor(no_sessions / 2))
    out.append(animal_sessions.get_group(mid_session_index))
out = pd.concat(out, axis=0)
pickle.dump(out, open(target + 'Mid_Training_DSR.p', 'wb'))

#Mid-training - PSR
out = []
for label, animal_data in PSR_TRAINING.groupby(axis = 0, level = 'rat'):
    animal_sessions = animal_data.groupby(axis = 0, level = 'training_session')
    no_sessions = len(animal_sessions)
    mid_session_index = int(np.floor(no_sessions / 2))
    out.append(animal_sessions.get_group(mid_session_index))
out = pd.concat(out, axis=0)
pickle.dump(out, open(target + 'Mid_Training_PSR.p', 'wb'))

## DSR and PSR Experiment Data
Now we will repeat the process for all the inactivation data for the main experiments DSR and PSR. 

In [5]:

#All DSR-Trained datasets: saline, mpfc, ofc, ipsi, contra
DSR_TRAINED = pickle.load(open(ROOT + \
        'DATA_structures/session_dataframes/DSR_SESSIONS_DATAFRAME.p', 'rb'))
saline = []
for label, dataset in DSR_TRAINED.groupby(axis = 0, level = 'Infusion_String'):
    if label.find('saline') >= 0:
        saline.append(dataset)
    else:
        if label == 'CONTRA-muscimol':
            pickle.dump(dataset, open(target + 'CONTRA_DSR.p', 'wb'))
        elif label == 'IPSI-muscimol':
            pickle.dump(dataset, open(target + 'IPSI_DSR.p', 'wb'))
        elif label == 'OFC-muscimol':
            pickle.dump(dataset, open(target + 'OFC_DSR.p', 'wb'))
        elif label == 'PL-muscimol':
            pickle.dump(dataset, open(target + 'mPFC_DSR.p', 'wb'))
saline = pd.concat(saline, axis=0)
pickle.dump(saline, open(target + 'Saline_DSR.p', 'wb'))

#Shuffled dataset
choice = saline['Choice']
choice = np.random.permutation(choice)
saline.loc[:,'Choice'] = choice
pickle.dump(saline, open(target + 'shuffled.p', 'wb'))


#All PSR-Trained datasets: saline, mpfc, ofc, ipsi, contra
PSR_TRAINED = pickle.load(open(ROOT + \
        'DATA_structures/session_dataframes/PSR_SESSIONS_DATAFRAME.p', 'rb'))
saline = []
for label, dataset in PSR_TRAINED.groupby(axis = 0, level = 'Infusion_String'):
    if label.find('saline') >= 0:
        saline.append(dataset)
    else:
        if label == 'CONTRA-muscimol':
            pickle.dump(dataset, open(target + 'CONTRA_PSR.p', 'wb'))
        elif label == 'IPSI-muscimol':
            pickle.dump(dataset, open(target + 'IPSI_PSR.p', 'wb'))
        elif label == 'OFC-muscimol':
            pickle.dump(dataset, open(target + 'OFC_PSR.p', 'wb'))
        elif label == 'PL-muscimol':
            pickle.dump(dataset, open(target + 'mPFC_PSR.p', 'wb'))
saline = pd.concat(saline, axis=0)
pickle.dump(saline, open(target + 'Saline_PSR.p', 'wb'))


## Rigged XOR Dataset
We will create the rigged linear and XOR rigged datasets based on the original PSR dataset by applying the appropriate rules. 

In [6]:

#rigged dataset - XOR
'''
We're gonna go session by session, and take the previous choice and reward,
and do an XOR operation to determine the next choice.
'''
rigged = []
for label, dataset in saline.groupby(axis = 0, level = 'Infusion_String'):
    for sess_label, sess in dataset.groupby(axis = 0, level = 'rat'):
        for trial in range(len(sess) - 1):
            sess['Choice'].iat[trial + 1] = \
                            sess['Choice'].iloc[trial] ^ sess['AR'].iloc[trial]
        rigged.append(sess)
rigged = pd.concat(rigged, axis = 0)
pickle.dump(rigged, open(target + 'XOR_rigged.p', 'wb'))
#rigged dataset - linear
'''
We're gonna go session by session, and do a "covert" pattern that a linear
model could pick up. Specifically, choice is gonna be EAST, WEST, WEST, WEST,
EAST, WEST, WEST, WEST, etc.
'''
rigged = []
for label, dataset in saline.groupby(axis = 0, level = 'Infusion_String'):
    for sess_label, sess in dataset.groupby(axis = 0, level = 'rat'):
        for trial in range(len(sess)):
            sess['Choice'].iat[trial] = int((trial % 4) > 0)
        rigged.append(sess)
rigged = pd.concat(rigged, axis = 0)
pickle.dump(rigged, open(target + 'linear_rigged.p', 'wb'))
