### SparklyRGT Template: choiceRGT

**Requirements**
* The data must be an excel file from MEDPC2XL (trial by trial data) 
* The data, sparklyRGT.py file, and this notebook must all be in the same folder

**Getting started: Please make a copy of this (sparklyRGT_template_4) for each analysis**
- Refer to sparklyRGT_documentation for function information
- Note: depending on your analysis, you will only have to complete certain sections of the sparklyRGT_documentation
- Note: feel free to create a personal template once you've become comfortable - this is just an example

In [1]:
import sparklyRGT as rgt 
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import scipy.stats as stats
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows',100)

I am being executed!


In [2]:
%load_ext autoreload
%autoreload 2

***

# 1) Load data into Python



In [3]:
file_names = ['Free S33-37 Raw.xlsx'] 
df = rgt.load_data(file_names)
df.head()

Unnamed: 0,MSN,StartDate,StartTime,Subject,Group,Box,Experiment,Comment,Session,Trial,...,Premature_Resp,Premature_Hole,Rew_Persev_H1,Rew_Persev_H2,Rew_Persev_H4,Rew_Persev_H5,Lever_Latency,Uncued_Chosen,Cued_Chosen,Choice_Omit
0,ChoicerGT_A-FF-CR4,05/23/21,11:26:30,1,0.0,1,0.0,,33,1.0,...,0,0,0,0,0,0,6.97,1,0,0
1,ChoicerGT_A-FF-CR4,05/23/21,11:26:30,1,0.0,1,0.0,,33,2.0,...,0,0,0,0,0,0,2.78,0,1,0
2,ChoicerGT_A-FF-CR4,05/23/21,11:26:30,1,0.0,1,0.0,,33,3.0,...,0,0,0,0,0,0,2.29,0,1,0
3,ChoicerGT_A-FF-CR4,05/23/21,11:26:30,1,0.0,1,0.0,,33,4.1,...,1,5,0,0,0,0,5.48,0,1,0
4,ChoicerGT_A-FF-CR4,05/23/21,11:26:30,1,0.0,1,0.0,,33,4.0,...,0,0,0,0,0,0,1.3,0,1,0


# 4) Choice rGT

## Data cleaning

In [4]:
males = list(range(1,29))

females = list(range(30,32))

group_names = {0: 'males',
              1: 'females'} 

group_list = [males,females]

#for plotting: 
title = 'Choice rGT' 

startsess = 10 #first session you would like to include in figures
endsess = 15 #last session you would like to include in figures

In [5]:
# rgt.check_sessions(df)
# df2 = rgt.drop_sessions(df, [28])
# df2 = rgt.edit_sessions(df2, orig_sess = [30, 31], new_sess = [29, 30], subs = "all") 

## Data processing

### Calculate variables for each rat


If you are getting a ValueError, try running the following cell before `rgt.get_summary_data`

In [6]:
df.dropna(how = 'all', inplace = True)
df.reset_index(drop=True, inplace = True)
df['Session'] = df['Session'].astype(int)
df['Subject'] = df['Subject'].astype(int)

In [7]:
df_sum = rgt.get_summary_data(df, task = 'choiceRGT') #change to df instead of df2 if you didn't do any session editing
df_sum 

Unnamed: 0,33_cued_P1,33_cued_P2,33_cued_P3,33_cued_P4,33_uncued_P1,33_uncued_P2,33_uncued_P3,33_uncued_P4,34_cued_P1,34_cued_P2,...,prem_uncued_35,prem_cued_36,prem_uncued_36,prem_cued_37,prem_uncued_37,pref33,pref34,pref35,pref36,pref37
1,22.9508,8.19672,68.8525,0.0,42.8571,0.0,42.8571,14.2857,24.1935,3.22581,...,0.0,30.0,10.0,6.25,0.0,90.909091,86.746988,89.534884,88.888889,87.671233
2,11.2676,88.7324,0.0,0.0,10.0,90.0,0.0,0.0,22.5806,74.1935,...,33.333333,11.363636,11.764706,11.267606,12.5,87.356322,71.0,74.468085,72.131148,89.873418
3,18.1818,81.8182,0.0,0.0,10.7143,85.7143,3.57143,0.0,8.33333,83.3333,...,17.5,21.212121,13.235294,13.636364,11.538462,28.421053,19.565217,22.330097,32.673267,22.0
4,57.6923,0.0,42.3077,0.0,86.3636,4.54545,9.09091,0.0,41.5385,10.7692,...,28.125,11.111111,15.789474,7.575758,12.121212,71.09375,68.75,70.37037,81.0,66.666667
5,25.0,75.0,0.0,0.0,39.5349,60.4651,0.0,0.0,25.0,75.0,...,0.0,11.111111,5.0,0.0,12.765957,15.789474,23.076923,17.647059,18.367347,17.54386
6,8.77193,29.8246,38.5965,22.807,0.0,57.8947,42.1053,0.0,10.2041,34.6939,...,10.810811,14.285714,17.948718,21.153846,11.111111,75.609756,61.445783,59.340659,58.947368,59.090909
7,0.0,1.81818,98.1818,0.0,0.0,0.0,100.0,0.0,1.81818,0.0,...,20.0,3.508772,12.5,3.174603,50.0,87.692308,89.0625,92.647059,87.692308,88.732394
8,15.5844,57.1429,15.5844,11.6883,21.4286,50.0,21.4286,7.14286,30.137,39.726,...,5.555556,15.068493,6.25,18.666667,14.285714,84.848485,84.536082,80.851064,82.022472,78.125
9,21.6667,25.0,48.3333,5.0,37.5,62.5,0.0,0.0,0.0,97.6744,...,25.0,8.510638,24.0,11.926606,23.076923,86.956522,91.150442,86.554622,78.991597,89.344262
10,0.990099,98.0198,0.990099,0.0,0.0,100.0,0.0,0.0,0.0,100.0,...,28.571429,35.245902,43.478261,20.661157,23.529412,88.8,80.740741,89.552239,84.137931,87.681159


In [8]:
# df_sum

In [9]:
df_sum['cued_omit_33']
df_sum['lev_omit33']
df_sum['trial_init33']
df_sum['pref33']

1      90.909091
2      87.356322
3      28.421053
4      71.093750
5      15.789474
6      75.609756
7      87.692308
8      84.848485
9      86.956522
10     88.800000
11     58.441558
12     54.651163
13     89.908257
14     51.282051
15     99.082569
16     12.621359
17     80.303030
18    100.000000
19     33.707865
20     59.200000
21     53.846154
22      7.954545
23     88.095238
24     48.760331
25     39.361702
26     56.115108
27     64.615385
28     96.610169
29     72.727273
30     58.666667
31     71.698113
32     12.500000
Name: pref33, dtype: float64

### Export your data to an Excel file 


In [10]:
# rgt.export_to_excel(df_sum, groups = group_list, column_name = 'sex', new_file_name = 'compare_access2.xlsx', asin = True)
df_sum.to_excel("compare_access_Free_S33-37_jun16.xlsx")
# make sure new_file_name is a string that ends in .xlsx

## Calculate means and SEMs for your experimental groups



In [11]:
mean_scores, SEM = rgt.get_means_sem(df_sum, groups = group_list, group_names = group_names) #there must be at least 3 males and 3 females 
# mean_scores
# SEM

## Old test boxes

In [12]:
def get_premature(df_raw,df_sum,mode = 'Session', task = None):
    #extract premature response information on either group or session
    
    if task == 'choiceRGT':
        df_uncued = df_raw.loc[df_raw['Uncued_Chosen'] == 1]
        df_cued = df_raw.loc[df_raw['Cued_Chosen'] == 1]
        
        prem_resp_cued = df_cued.groupby(['Subject', mode],as_index=False)['Premature_Resp'].sum()
        prem_resp_cued['Trials'] = df_cued.groupby(['Subject',mode],as_index=False)['Trial'].count()['Trial']
        prem_resp_cued['prem_percent'] = prem_resp_cued['Premature_Resp']/prem_resp_cued['Trials'] * 100
        
        prem_resp_uncued = df_uncued.groupby(['Subject', mode],as_index=False)['Premature_Resp'].sum()
        prem_resp_uncued['Trials'] = df_uncued.groupby(['Subject',mode],as_index=False)['Trial'].count()['Trial']
        prem_resp_uncued['prem_percent'] = prem_resp_uncued['Premature_Resp']/prem_resp_uncued['Trials'] * 100

        for num in np.sort(df_raw[mode].unique()):
            df_sum['prem_cued_' + str(num)] = prem_resp_cued.loc[prem_resp_cued[mode]==num].set_index('Subject')['prem_percent']
            df_sum['prem_uncued_' + str(num)] = prem_resp_uncued.loc[prem_resp_uncued[mode]==num].set_index('Subject')['prem_percent']
        print(prem_resp_uncued)
        return df_sum 
    
    prem_resp = df_raw.groupby(['Subject', mode],as_index=False)['Premature_Resp'].sum()
    
    prem_resp['Trials'] = df_raw.groupby(['Subject',mode],as_index=False)['Trial'].count()['Trial']

    prem_resp['prem_percent'] = prem_resp['Premature_Resp']/prem_resp['Trials'] * 100

    for num in np.sort(df_raw[mode].unique()):
        df_sum['prem' + str(num)] = prem_resp.loc[prem_resp[mode]==num].set_index('Subject')['prem_percent']
        

    return df_sum

In [13]:
df_cued = df.loc[df['Cued_Chosen'] == 1]
df_uncued = df.loc[df['Uncued_Chosen'] == 1]
df_cued.groupby(['Subject','Session'],as_index=False)['Trial'].count()['Trial']

0      80
1      72
2      77
3      80
4      64
       ..
155     9
156    19
157    46
158    40
159    35
Name: Trial, Length: 160, dtype: int64

In [14]:
prem_resp_cued = df_cued.groupby(['Subject', 'Session'],as_index=False)['Premature_Resp'].sum()
prem_resp_uncued = df_uncued.groupby(['Subject', 'Session'],as_index=False)['Premature_Resp'].sum()
prem_resp_cued['Trials'] = df_cued.groupby(['Subject','Session'],as_index=False)['Trial'].count()['Trial']
prem_resp_uncued['Trials'] = df_uncued.groupby(['Subject','Session'],as_index=False)['Trial'].count()['Trial']
# prem_resp_cued['prem_percent'] = prem_resp_cued['Premature_Resp']/(prem_resp_cued['Trials'] + prem_resp_uncued['Trials']) * 100
prem_resp_uncued

Unnamed: 0,Subject,Session,Premature_Resp,Trials
0,1,33,1,8
1,1,34,1,11
2,1,35,0,9
3,1,36,1,10
4,1,37,0,9
...,...,...,...,...
152,32,33,5,63
153,32,34,5,55
154,32,35,8,51
155,32,36,11,49


In [15]:
mode = 'Session'
cued_omit = df_cued.groupby(['Subject',mode], as_index = False)['Omit'].sum()
cued_omit['Trials'] = df_cued.groupby(['Subject',mode],as_index=False)['Trial'].count()['Trial']
cued_omit['omit_percent'] = cued_omit['Omit']/cued_omit['Trials'] * 100
cued_omit

uncued_omit = df_uncued.groupby(['Subject',mode], as_index = False)['Omit'].sum()
uncued_omit['Trials'] = df_uncued.groupby(['Subject',mode],as_index=False)['Trial'].count()['Trial']
uncued_omit['omit_percent'] = uncued_omit['Omit']/cued_omit['Trials'] * 100
uncued_omit
# cued_omit

Unnamed: 0,Subject,Session,Omit,Trials,omit_percent
0,1,33,0,8,0.0
1,1,34,0,11,0.0
2,1,35,0,9,0.0
3,1,36,0,10,0.0
4,1,37,0,9,0.0
...,...,...,...,...,...
152,32,33,0,63,0.0
153,32,34,0,55,0.0
154,32,35,0,51,0.0
155,32,36,0,49,0.0


In [16]:
lev_omit = df.groupby(['Subject',mode],as_index=False)['Choice_Omit'].sum()
lev_omit['Trials'] = df.groupby(['Subject',mode],as_index=False)['Trial'].count()['Trial']
lev_omit


Unnamed: 0,Subject,Session,Choice_Omit,Trials
0,1,33,12,100
1,1,34,10,93
2,1,35,12,98
3,1,36,13,103
4,1,37,6,79
...,...,...,...,...
155,32,33,0,72
156,32,34,3,77
157,32,35,1,98
158,32,36,1,90


In [17]:
df_trials_init = df.loc[df['Choice_Omit'] != 1]
df_trials_init_count = df_trials_init.groupby(['Subject', 'Session'],as_index=False)['Trial'].count()
df_trials_init_count

Unnamed: 0,Subject,Session,Trial
0,1,33,88
1,1,34,83
2,1,35,86
3,1,36,90
4,1,37,73
...,...,...,...
155,32,33,72
156,32,34,74
157,32,35,97
158,32,36,89


In [18]:
trials = df.groupby(['Subject','Session'],as_index=False)['Trial'].count()
trials.loc[trials['Session']==33].set_index('Subject')['Trial']

Subject
1     100
2     106
3     123
4     129
5      87
6      83
7      72
8     107
9      92
10    134
11     80
12     88
13    116
14     97
15    140
16    108
17     86
18    149
19     91
20    128
21     66
22    105
23     84
24    121
25     95
26    140
27     70
28    128
29     99
30     75
31    106
32     72
Name: Trial, dtype: int64