### SparklyRGT Template: Baseline and Acquisition Analysis 

**Requirements**
* The data must be an excel file from MEDPC2XL (trial by trial data) 
* The data, sparklyRGT.py file, and this notebook must all be in the same folder

**Getting started: Please make a copy of this (sparklyRGT_template_2) for each analysis**
- Refer to sparklyRGT_documentation for function information
- Note: depending on your analysis, you will only have to complete certain sections of the sparklyRGT_documentation
- Note: feel free to create a personal template once you've become comfortable - this is just an example

In [1]:
import sparklyRGT as rgt 
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import scipy.stats as stats
import model_data as md
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows',100)

I am being executed!


In [2]:
%load_ext autoreload
%autoreload 2

***

# 1) Load data into Python



In [4]:
#checks current working directory
os.getcwd()

#changes working directory to whatever is included in brackets
# os.chdir("C:\\Users\\dexte\\sparklyRGT\\data") 
os.chdir('/home/brett/sparklyRGT/data')

In [5]:
#CH01 must be loaded in separately to drop the forced choice session that was accidentally included
#for four rats

fnames2 = ['CH01_raw-free_S8-19.xlsx']

df2 = rgt.load_data(fnames2)
#drop forced choice session:
rgt.drop_sessions(df2, [6])
#reset the session numbers to start from 1:
rgt.edit_sessions(df2, orig_sess = list(range(8,20)),new_sess = list(range(1,13)))

Unnamed: 0,MSN,StartDate,StartTime,Subject,Group,Box,Experiment,Comment,Session,Trial,...,Pun_Persev_H5,Pun_HeadEntry,Pun_Dur,Premature_Resp,Premature_Hole,Rew_Persev_H1,Rew_Persev_H2,Rew_Persev_H3,Rew_Persev_H4,Rew_Persev_H5
0,MisrGT_A-cue,02/22/19,17:22:25,41,0,1,0,,1,1.1,...,0,0,0,1,4,0,0,0,0,0
1,MisrGT_A-cue,02/22/19,17:22:25,41,0,1,0,,1,1.1,...,0,0,0,1,1,0,0,0,0,0
2,MisrGT_A-cue,02/22/19,17:22:25,41,0,1,0,,1,1.1,...,0,0,0,1,5,0,0,0,0,0
3,MisrGT_A-cue,02/22/19,17:22:25,41,0,1,0,,1,1.1,...,0,0,0,1,5,0,0,0,0,0
4,MisrGT_A-cue,02/22/19,17:22:25,41,0,1,0,,1,1.1,...,0,0,0,1,4,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72643,rGT_B-cue,03/13/19,18:43:20,32,0,5,0,,12,48.1,...,0,0,0,1,1,0,0,0,0,0
72644,rGT_B-cue,03/13/19,18:43:20,32,0,5,0,,12,48.0,...,0,0,0,0,0,0,0,0,0,0
72645,rGT_B-cue,03/13/19,18:43:20,32,0,5,0,,12,49.0,...,0,0,0,0,0,0,0,0,0,0
72646,rGT_B-cue,03/13/19,18:43:20,32,0,5,0,,12,50.0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
#set these variables for extracting the data using the model_data python script

fnames = ['BH09_raw-free_S1-5_corrected.xlsx','CH02_raw-free_S8-15.xlsx','NA01_raw_free-choice_S8-18.xlsx']
#change these to the names of the excel files that you want to run modeling on 

#load in excel files into one df, resetting the session numbers to start at 1
df1 = rgt.load_multiple_data(fnames, reset_sessions = True)



In [7]:
rgt.check_sessions(df1)

Subject  StartDate            Session
101      2021-06-29 00:00:00  1          104.0
         2021-06-30 00:00:00  2          155.0
         2021-07-01 00:00:00  3          137.0
         2021-07-02 00:00:00  4          128.0
         2021-07-05 00:00:00  5          128.0
102      2021-06-29 00:00:00  1          102.0
         2021-06-30 00:00:00  2          129.0
         2021-07-01 00:00:00  3           83.0
         2021-07-02 00:00:00  4          112.0
         2021-07-05 00:00:00  5           75.1
103      2021-06-29 00:00:00  1           57.0
         2021-06-30 00:00:00  2           84.0
         2021-07-01 00:00:00  3           62.0
         2021-07-02 00:00:00  4           57.0
         2021-07-05 00:00:00  5           58.0
104      2021-06-29 00:00:00  1           75.0
         2021-06-30 00:00:00  2           70.0
         2021-07-01 00:00:00  3           77.1
         2021-07-02 00:00:00  4           46.0
         2021-07-05 00:00:00  5           58.0
105      2021-06-29 00

In [8]:
df = pd.concat([df1,df2], ignore_index = True)


In [9]:
#creates lists of subjects run on each task (classic A, classic B, etc.)

#rename MSNs so that the rats on the outcome task don't have "loss" in the MSN
for i in range(len(df)):
    if df.at[i, 'MSN'] == 'LossrGT_A-losscue_v1':
        df.at[i,'MSN'] = 'outcomeRGT_A'
    if df.at[i, 'MSN'] == 'LossrGT_B-losscue_v1':
        df.at[i,'MSN'] = 'outcomeRGT_B'
        
#rename MSNs so that the rats on the random task don't have "loss" in the MSN
for i in range(len(df)):
    if df.at[i,'MSN'] == 'AnarchyrGT_B-losscue_v6':
        df.at[i,'MSN'] = 'RandomRGT_B'
    if df.at[i,'MSN'] == 'AnarchyrGT_A-losscue_v6':
        df.at[i,'MSN'] = 'RandomRGT_A'
        
        
task_list = df.groupby(['MSN'])['Subject'].unique()

In [10]:
#these lines of code concatenates together the lists of subjects that run the same task (i.e., puts version A 
#and version B together) - based on unique string for each task name
uncued_subs = np.concatenate(task_list[[task for task in df.MSN.unique() if 'Classic' in task]])
standard_subs = np.concatenate((task_list['rGT_A-cue'], task_list['rGT_B-cue']))
#concatenating together MisRGT tasks, and RevRGT tasks, as they both refer to reverse-cue RGT
reverse_subs = np.concatenate((np.concatenate(task_list[[task for task in df.MSN.unique() if 'Mis' in task]]),
                              np.concatenate(task_list[[task for task in df.MSN.unique() if 'Rev' in task]])))
outcome_subs = np.concatenate(task_list[[task for task in df.MSN.unique() if 'outcome' in task]])
random_subs = np.concatenate(task_list[[task for task in df.MSN.unique() if 'Random' in task]])
loss_subs = np.concatenate(task_list[[task for task in df.MSN.unique() if 'oss' in task]])

subs = [uncued_subs,standard_subs, reverse_subs, outcome_subs,random_subs,loss_subs]

***
# 2A) Baseline & Acquisition Analysis


In [11]:
group_names = {0: 'uncued',
              1: 'cued',
              2: 'reverse',
              3: 'outcome',
              4: 'random', 
              5: 'loss'} 

group_list = [uncued_subs,standard_subs, reverse_subs, outcome_subs,random_subs,loss_subs]

title = 'Plot' #for plotting

startsess = 1 #first session you would like to include in figures
endsess = 5 #last session you would like to include in figures

## Data cleaning

### Check session numbers for each rat

In [12]:
rgt.check_sessions(df)

Subject  StartDate            Session
1        2003-01-19 00:00:00  5           72.0
         2003-04-19 00:00:00  6           81.0
         2003-05-19 00:00:00  7           94.1
         2003-07-19 00:00:00  8           78.0
         2003-08-19 00:00:00  9           83.0
         2003-11-19 00:00:00  10          49.1
         2003-12-19 00:00:00  11          53.0
         02/22/19             1           69.1
         02/25/19             2           85.0
         02/26/19             3           75.0
         02/27/19             4           83.0
         03/13/19             12         103.0
2        2003-01-19 00:00:00  5           72.1
         2003-04-19 00:00:00  6           80.1
         2003-05-19 00:00:00  7           88.0
         2003-07-19 00:00:00  8           92.1
         2003-08-19 00:00:00  9           71.0
         2003-11-19 00:00:00  10          99.0
         2003-12-19 00:00:00  11          99.1
         02/22/19             1           57.0
         02/25/19     

### Drop/edit session numbers

In [13]:
# df2 = rgt.drop_sessions(df, [6])
# df2 = rgt.edit_sessions(df, orig_sess = [0], new_sess = [15], subs = "all")

### Check that you dropped/edited the desired session(s)

In [14]:
# rgt.check_sessions(df2) 

## Data processing

### Calculate variables for each rat


In [15]:
df_sum = rgt.get_summary_data(df) #change to df instead of df2 if you didn't do any session editing
df_sum.loc[:,'risk1':'risk5'] 

Unnamed: 0,risk1,risk2,risk3,risk4,risk5
1,39.1304,42.8571,29.7297,51.8072,62.3188
2,50.8772,65.7143,60.6557,76.6234,85.9155
3,66.2651,83.6364,89.3617,86.2069,94.9367
4,56.6667,84.9057,77.3585,87.7193,85.7143
5,29.1139,61.6162,34.2466,45.679,39.2857
6,35.4839,45.0549,64.5833,69.8413,79.4872
7,73.494,68.0,80.9524,83.9286,87.0968
8,31.8182,69.4118,81.3084,93.2203,98.2143
9,48.9362,71.4286,85.567,,72.1519
10,9.7561,11.1111,18.9189,,20.0


### Get the risk status of the rats


In [None]:
df_sum, risky, optimal = rgt.get_risk_status(df_sum, startsess, endsess)

print(df_sum[['mean_risk','risk_status']]) 
print(risky, optimal) 

### Export your data to an Excel file 


In [None]:
rgt.export_to_excel(df_sum, groups = group_list, column_name = '', new_file_name = '', asin = True)

## Calculate means and SEMs for your experimental groups



In [None]:
mean_scores, SEM = rgt.get_means_sem(df_sum, groups = group_list, group_names = group_names)
mean_scores
# SEM

# 2B) Baseline & Acquisition Analysis: Plotting


## Bar plot of P1-P4 % choice


In [None]:
rgt.choice_bar_plot(startsess, endsess, mean_scores, SEM)

#To save figure:
# plt.savefig('BH07 Choice S29-30',facecolor = 'white')

## Line plot of other variables


In [None]:
rgt.rgt_plot('risk', startsess, endsess, title, mean_scores, SEM, group_names = group_names, y_label = 'Risk score') 
plt.savefig('rgt variants - risk score S1-5',facecolor = 'white')

rgt.rgt_plot('prem', startsess, endsess, title, mean_scores, SEM, group_names = group_names, y_label = 'Premature responding') 

## Bar plot of other variables



In [None]:
rgt.rgt_bar_plot('risk', startsess, endsess, title, mean_scores, SEM, group_names, y_label = 'Risk score')