### SparklyRGT Template: Baseline and Acquisition Analysis 

**Requirements**
* The data must be an excel file from MEDPC2XL (trial by trial data) 
* The data, sparklyRGT.py file, and this notebook must all be in the same folder

**Getting started: Please make a copy of this (sparklyRGT_template_2) for each analysis**
- Refer to sparklyRGT_documentation for function information
- Note: depending on your analysis, you will only have to complete certain sections of the sparklyRGT_documentation
- Note: feel free to create a personal template once you've become comfortable - this is just an example

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import scipy.stats as stats
import seaborn as sns
import pingouin as pg
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows',100)
%load_ext autoreload
%autoreload 2
os.chdir('C:\\Users\\dexte\\sparklyRGT\\sparklyRGT_tutorial')
import sparklyRGT as rgt

I am being executed!


***

# 1) Load data into Python



In [2]:
os.chdir('C:\\Users\\dexte\\sparklyRGT\\data')
file_names = ['BH08_raw_post-dosing_S31-39.xlsx'] 

df = rgt.load_data(file_names)

df.head()

Unnamed: 0,MSN,StartDate,StartTime,Subject,Group,Box,Experiment,Comment,Session,Trial,...,Pun_Persev_H5,Pun_HeadEntry,Pun_Dur,Premature_Resp,Premature_Hole,Rew_Persev_H1,Rew_Persev_H2,Rew_Persev_H3,Rew_Persev_H4,Rew_Persev_H5
0,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,1.1,...,0,0,0,1,1,0,0,0,0,0
1,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,1.1,...,0,0,0,1,1,0,0,0,0,0
2,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,1.0,...,0,0,0,0,0,0,0,0,0,0
3,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,2.1,...,0,0,0,1,3,0,0,0,0,0
4,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,2.1,...,0,0,0,1,2,0,0,0,0,0


***
# 2A) Baseline & Acquisition Analysis


In [3]:
control_group = [1,6,8,10,14,15,17,20,25,26,27,28,29,30,31,32] #In this example: Tg negative rats
exp_group = [2,3,4,5,7,9,11,12,13,16,18,19,21,22,23,24] #In this example: Tg positive rats
group_names = {0: 'tg neg',
              1: 'tg pos'} 
group_list = [control_group,exp_group]
title = 'SNc inhibition in females'
startsess = 36
endsess = 39

## Data cleaning

### Check session numbers for each rat

In [4]:
# rgt.check_sessions(df)

### Drop/edit session numbers

In [5]:
# df2 = rgt.drop_sessions(df, [])

### Check that you dropped/edited the desired session(s)

In [6]:
# rgt.check_sessions(df2) 

## Data processing

### Calculate variables for each rat


In [7]:
df_sum = rgt.get_summary_data(df) #change to df instead of df2 if you didn't do any session editing
df_sum 

Unnamed: 0,31P1,31P2,31P3,31P4,32P1,32P2,32P3,32P4,33P1,33P2,...,trial39,prem31,prem32,prem33,prem34,prem35,prem36,prem37,prem38,prem39
1,23.9437,35.2113,16.9014,23.9437,14.6067,56.1798,7.86517,21.3483,8.62069,51.7241,...,68.0,10.126582,12.745098,10.606061,19.277108,22.680412,25.301205,23.943662,13.043478,12.820513
2,34.0426,62.766,0.0,3.19149,10.3448,88.9655,0.0,0.689655,0.813008,98.374,...,144.0,28.888889,11.585366,23.125,17.791411,23.699422,14.201183,17.901235,19.10828,10.559006
3,1.49254,91.0448,4.47761,2.98507,3.65854,92.6829,2.43902,1.21951,2.5974,80.5195,...,55.0,2.5,10.0,4.545455,6.756757,6.756757,20.833333,9.473684,4.938272,5.172414
4,59.7938,34.0206,4.12371,2.06186,8.23529,91.7647,0.0,0.0,18.9474,78.9474,...,104.0,9.734513,10.309278,2.970297,4.938272,14.159292,7.142857,6.493506,9.708738,6.306306
5,8.69565,33.3333,42.029,15.942,5.17241,22.4138,55.1724,17.2414,11.8644,11.8644,...,67.0,13.580247,16.666667,20.0,29.333333,18.518519,16.438356,16.666667,19.736842,15.189873
6,11.7647,63.5294,0.0,24.7059,3.94737,60.5263,1.31579,34.2105,1.36986,65.7534,...,69.0,4.301075,4.761905,1.204819,4.054054,4.615385,0.0,4.819277,2.352941,4.166667
7,36.5385,42.3077,13.4615,7.69231,19.0476,61.9048,6.34921,12.6984,11.3924,74.6835,...,68.0,44.680851,34.951456,17.171717,12.903226,8.510638,16.513761,20.792079,26.213592,22.727273
8,22.5225,65.7658,5.40541,6.30631,20.1681,65.5462,0.840336,13.4454,21.7391,62.6087,...,132.0,27.922078,13.138686,15.328467,16.546763,30.15873,12.8,5.426357,9.230769,16.981132
9,15.2174,21.7391,43.4783,19.5652,1.81818,9.09091,80.0,9.09091,1.81818,9.09091,...,58.0,40.506329,32.098765,24.657534,61.165049,71.428571,50.617284,68.932039,57.894737,32.55814
10,6.45161,29.0323,24.1935,40.3226,22.3684,34.2105,13.1579,30.2632,21.25,38.75,...,47.0,23.45679,18.085106,12.903226,18.390805,27.941176,25.974026,17.567568,20.895522,25.396825


In [8]:
os.chdir('C:\\Users\\dexte\\sparklyRGT\\sparklyRGT_tutorial')
df_long = rgt.get_long_summary_data(df, df_sum)
df_long.head()

Unnamed: 0,Subject,Session,P1,P2,P3,P4,risk,collect_lat,choice_lat,omit,trial,prem
0,1,31,23.943662,35.211268,16.901408,23.943662,18.309859,2.154889,1.639296,0,71.0,10.126582
1,1,32,14.606742,56.179775,7.865169,21.348315,41.573034,1.836667,1.553596,0,89.0,12.745098
2,1,33,8.62069,51.724138,8.62069,31.034483,20.689655,1.462632,1.600862,1,59.1,10.606061
3,1,34,10.606061,37.878788,13.636364,37.878788,-3.030303,1.524146,1.53697,1,68.1,19.277108
4,1,35,32.0,40.0,4.0,24.0,44.0,1.896182,1.460533,0,76.1,22.680412


### Get the risk status of the rats


In [9]:
# df_sum, risky, optimal = rgt.get_risk_status(df_sum, startsess, endsess)

# print(df_sum[['mean_risk','risk_status']]) 
# print(risky, optimal) 

### Export your data to an Excel file 


In [10]:
# rgt.export_to_excel(df_sum, groups = group_list, column_name = '', new_file_name = '', asin = True)

## Run Anova on selected variables

In [11]:
# df_exp = rgt.drop_subjects(df, control_group) #drop control group 
df_control = rgt.drop_subjects(df, exp_group) #drop exp group
df

Unnamed: 0,MSN,StartDate,StartTime,Subject,Group,Box,Experiment,Comment,Session,Trial,...,Pun_HeadEntry,Pun_Dur,Premature_Resp,Premature_Hole,Rew_Persev_H1,Rew_Persev_H2,Rew_Persev_H3,Rew_Persev_H4,Rew_Persev_H5,option
0,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,1.1,...,0,0,1,1,0,0,0,0,0,0
1,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,1.1,...,0,0,1,1,0,0,0,0,0,0
2,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,1.0,...,0,0,0,0,0,0,0,0,0,1
3,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,2.1,...,0,0,1,3,0,0,0,0,0,0
4,rGT_A-cue,2021-07-17,13:58:31,25,0.0,1,0.0,,31,2.1,...,0,0,1,2,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14674,rGT_B-cue,2021-07-28,13:34:03,32,,5,,,39,90.0,...,0,0,0,0,0,0,0,0,0,2
14675,rGT_B-cue,2021-07-28,13:34:03,32,,5,,,39,91.0,...,0,0,0,0,0,0,0,0,0,2
14676,rGT_B-cue,2021-07-28,13:34:03,32,,5,,,39,92.0,...,0,0,0,0,0,0,0,0,0,2
14677,rGT_B-cue,2021-07-28,13:34:03,32,,5,,,39,93.0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
df_long = rgt.get_long_summary_data(df, df_sum)
df_long.Subject.unique()

array([ 1,  6,  8, 10, 14, 15, 17, 20, 25, 26, 27, 28, 29, 30, 31, 32],
      dtype=int64)

In [13]:
rgt.get_baseline(df_long, sessions = [36,37,38,39])

({},
 {'P1': 0.8439442352041859,
  'P2': 0.8491793202202587,
  'P3': 0.3891899621148487,
  'P4': 0.33619114337806677,
  'risk': 0.4047651981468341,
  'collect_lat': 0.9070889729151779,
  'choice_lat': 0.19927132854272922,
  'omit': 0.6492948531667078,
  'trial': 0.1140613159360219,
  'prem': 0.2654104132646972})

## Calculate means and SEMs for your experimental groups



In [None]:
mean_scores, SEM = rgt.get_means_sem(df_sum, groups = group_list, group_names = group_names)
mean_scores
# SEM

### Calculate means and SEMS for your experimental groups, split by risk status and/or sex

In [None]:
control_risky = [subject for subject in control_group if subject in risky]
exp_risky = [subject for subject in exp_group if subject in risky]

control_optimal = [subject for subject in control_group if subject in optimal]
exp_optimal = [subject for subject in exp_group if subject in optimal]


group_list_risk = [control_risky,exp_risky, control_optimal, exp_optimal]

#make sure the group names are in the same order as the group list!
group_names_risk = {0:'Control risky', 
                    1: 'Experimental risky',
                    2: 'Control optimal',
                    3: 'Experimental optimal'}

mean_scores_risk, SEM_risk = rgt.get_means_sem(df_sum, group_list_risk, group_names_risk)

mean_scores_risk

# 2B) Baseline & Acquisition Analysis: Plotting


## Bar plot of P1-P4 % choice


In [None]:
rgt.choice_bar_plot(startsess, endsess, mean_scores, SEM)

#To save figure:
# plt.savefig('BH07 Choice S29-30',facecolor = 'white')

## Line plot of other variables


In [None]:
rgt.rgt_plot('prem', startsess, endsess, title, mean_scores, SEM, group_names = group_names, y_label = 'Premature responding BH08 Session 36-39') 

## Bar plot of other variables



In [None]:
rgt.rgt_bar_plot('risk', startsess, endsess, title, mean_scores, SEM, group_names, y_label = 'Risk score')

## Plotting by risk status 

In [None]:
rgt.choice_bar_plot(startsess, endsess, mean_scores_risk, SEM_risk)

In [None]:
rgt.rgt_plot('risk', startsess, endsess, title, mean_scores_risk, SEM_risk, group_names = group_names_risk, y_label = 'Risk score') 

In [None]:
rgt.rgt_bar_plot('prem', startsess, endsess, title, mean_scores_risk, SEM_risk, group_names = group_names_risk,y_label = 'Premature responding')