Import all packages

In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
%matplotlib inline

# Stress Enhanced FearLearning - Freezing Analysis

## Convert dataframe from default Ethovision output

### Import (default) ethovision export file from inactivity analysis

Based on the original structure of the data output, I import with multiindexing to allow for Arena and Freezing thresholds to become columns later on. 

In [2]:
etho_df = pd.read_csv('SEFL_Inactivity_Cohort_1.csv',header = [3,4])

In [3]:
etho_df #examine data frame

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Arena 1,Arena 2,Arena 3,Arena 4,Arena 1,Arena 2,Arena 3,Arena 4,Arena 1,Arena 2,Arena 3,Arena 4
Unnamed: 0_level_1,Unnamed: 0_level_1.1,Unnamed: 1_level_1,Unnamed: 2_level_1,.10freeze,.10freeze,.10freeze,.10freeze,.05freeze,.05freeze,.05freeze,.05freeze,.03freeze,.03freeze,.03freeze,.03freeze
0,Trial 1,ContextAtraining,0:00:00-0:00:30,2.12,6.00,5.04,5.68,0.96,2.08,3.40,3.60,0.16,0.80,2.76,2.28
1,Trial 1,ContextAtraining,0:00:30-0:01:00,2.96,6.28,10.84,8.48,1.24,3.80,8.44,5.52,0.64,2.28,6.60,3.48
2,Trial 1,ContextAtraining,0:01:00-0:01:30,4.80,3.24,7.40,10.56,3.04,1.08,3.48,7.56,1.80,0.64,2.32,5.72
3,Trial 1,ContextAtraining,0:01:30-0:02:00,10.28,1.68,10.20,11.44,7.16,0.24,5.56,8.20,5.68,0.16,3.32,6.36
4,Trial 1,ContextAtraining,0:02:00-0:02:30,6.60,2.96,15.08,1.40,3.56,1.64,8.96,0.72,2.40,0.84,6.36,0.36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,Trial 39,ContextBTest,0:01:30-0:02:00,8.16,9.72,,,6.88,7.08,,,6.32,5.68,,
378,Trial 39,ContextBTest,0:02:00-0:02:30,18.16,7.36,,,15.48,3.52,,,13.56,2.52,,
379,Trial 39,ContextBTest,0:02:30-0:03:00,18.12,3.72,,,13.24,1.80,,,10.92,1.36,,
380,Trial 39,ContextBTest,0:03:00-0:03:30,11.04,7.44,,,8.08,5.72,,,7.04,4.72,,


Here, we rename the unnammed columns to something useful and discard the lower level

In [4]:
etho_df = etho_df.rename(columns={"Unnamed: 0_level_0": "Trial", 
                                  "Unnamed: 1_level_0": "Experiment",
                                  "Unnamed: 2_level_0": "Time_Bin",
                                  "Unnamed: 0_level_1": None,
                                  "Unnamed: 1_level_1": None,
                                  "Unnamed: 2_level_1": None                                 
                                 })

You'll see that there are 3 different freezing thresholds that have been collected. For the sake of simplicity, we will drop all but the most stringent setting `.03freeze`. (Would be nice for functionality to turn this into a function/selector for which freezing threshold using if/else)

In [5]:
etho_df_03 = etho_df.drop(columns = ['.10freeze', '.05freeze'], level = 1)

Now, we melt the table so that `Arena` becomes it's own column and the freezing duration is distributed between them for each animal 

In [6]:
etho_melt = etho_df_03.melt(col_level=0,
    id_vars=['Trial', 'Experiment', 'Time_Bin'],#columns to keep 
    var_name= 'Arena', #new column name
    value_name= 'freezing_duration(s)' #new column for freezing
)

### Calculate Percent Freezing for each time bin

Here, we add a column for the length of each bout (30 seconds). Eventually would like to be able to derive this from the time stamp. 

In [7]:
etho_melt.insert(5,"Bout(s)", 30) #same insert as above, adding to far right

Prior to running calculations on columns, verify the data types with `type` and `Loc`.

In [10]:
#veryifying data types prior to division
#type(etho_tidy.loc[0,'freezing duration (s)']) 
#type(etho_tidy.loc[0,'Bout (s)'])

Add a new column called `% Freezing` that takes `freezing duration (s)` divided by `Bout (s)` and multiply by 100.

In [8]:
etho_melt["PcentFreezing"] = (etho_melt['freezing_duration(s)']/ etho_melt['Bout(s)'])*100

Using the `Time_Bin` column, generate a new column called `Time(s)` containing the time bin total in seconds with a `for` loop. Considered converting to timedelta but...did not work.

In [9]:
result = []
for string in etho_melt['Time_Bin']:
    if string == '0:00:00-0:00:30':
        result.append(30)
    elif string == '0:00:30-0:01:00':
        result.append(60)
    elif string == '0:01:00-0:01:30':
        result.append(90)
    elif string == '0:01:30-0:02:00':
        result.append(120)
    elif string == '0:02:00-0:02:30':
        result.append(150)
    elif string == '0:02:30-0:03:00':
        result.append(180)
    elif string == '0:03:00-0:03:30':
        result.append(210)
    elif string == '0:03:30-0:04:00':
        result.append(240)
    elif string == '0:04:00-0:04:30':
        result.append(270)
    elif string == '0:04:30-0:05:00':
        result.append(300)
    elif string == '0:05:00-0:05:30':
        result.append(330)
    elif string == '0:05:30-0:06:00':
        result.append(360)
    elif string == '0:06:30-0:07:00':
        result.append(390)
    elif string == '0:07:00-0:07:30':
        result.append(420)
    else: 
        result.append('NaN')
    
etho_melt['Time(s)'] = result
etho_melt

Unnamed: 0,Trial,Experiment,Time_Bin,Arena,freezing_duration(s),Bout(s),PcentFreezing,Time(s)
0,Trial 1,ContextAtraining,0:00:00-0:00:30,Arena 1,0.16,30,0.533333,30
1,Trial 1,ContextAtraining,0:00:30-0:01:00,Arena 1,0.64,30,2.133333,60
2,Trial 1,ContextAtraining,0:01:00-0:01:30,Arena 1,1.80,30,6.000000,90
3,Trial 1,ContextAtraining,0:01:30-0:02:00,Arena 1,5.68,30,18.933333,120
4,Trial 1,ContextAtraining,0:02:00-0:02:30,Arena 1,2.40,30,8.000000,150
...,...,...,...,...,...,...,...,...
1523,Trial 39,ContextBTest,0:01:30-0:02:00,Arena 4,,30,,120
1524,Trial 39,ContextBTest,0:02:00-0:02:30,Arena 4,,30,,150
1525,Trial 39,ContextBTest,0:02:30-0:03:00,Arena 4,,30,,180
1526,Trial 39,ContextBTest,0:03:00-0:03:30,Arena 4,,30,,210


In a later step `merge`, we identified a couple issues with some `Trial` strings containing additional spaces. We remove them here using the `replace` function. 

In [12]:
etho_shrink = etho_melt.replace('Trial     ', 'Trial ', regex=True) # replace 5 spaces with one, using regular expresion
etho_shrink2 = etho_shrink.replace('Trial    ', 'Trial ', regex=True) #replace the 4 additional spaces with 1

In [13]:
etho_shrink2['Trial'].iloc[0] # check if 5 space has changed

'Trial 1'

In [14]:
etho_shrink2['Trial'].iloc[80] # check if  4 space has changed

'Trial 6'

Several rows also contain NaN, we drop those with `dropna`. 

In [None]:
etho_tidy = etho_shrink2.dropna(axis = 'rows') #requires axis to keep in DataFrame format
etho_tidy

Unnamed: 0,Trial,Experiment,Time_Bin,Arena,freezing_duration(s),Bout(s),PcentFreezing,Time(s)
0,Trial 1,ContextAtraining,0:00:00-0:00:30,Arena 1,0.16,30,0.533333,30
1,Trial 1,ContextAtraining,0:00:30-0:01:00,Arena 1,0.64,30,2.133333,60
2,Trial 1,ContextAtraining,0:01:00-0:01:30,Arena 1,1.80,30,6.000000,90
3,Trial 1,ContextAtraining,0:01:30-0:02:00,Arena 1,5.68,30,18.933333,120
4,Trial 1,ContextAtraining,0:02:00-0:02:30,Arena 1,2.40,30,8.000000,150
...,...,...,...,...,...,...,...,...
1427,Trial 27,ContextBTest,0:01:30-0:02:00,Arena 4,30.00,30,100.000000,120
1428,Trial 27,ContextBTest,0:02:00-0:02:30,Arena 4,30.00,30,100.000000,150
1429,Trial 27,ContextBTest,0:02:30-0:03:00,Arena 4,30.00,30,100.000000,180
1430,Trial 27,ContextBTest,0:03:00-0:03:30,Arena 4,30.00,30,100.000000,210


## Generating a Complete Dataset with Animal Info

### Import metadata 

In [27]:
etho_meta = pd.read_csv('SEFL_Cohort1_metadata.csv')
etho_meta

Unnamed: 0,Cohort,Cage,Dam ID,DOB,Exp Start,Experiment,Mouse,Sex,Condition,Arena,Trial
0,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,1,F,Ctrl,1,Trial 1
1,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,2,F,Ctrl,2,Trial 1
2,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,3,F,Ctrl,3,Trial 1
3,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,4,F,Ctrl,4,Trial 1
4,1,119476,279_C_2,6/23/2022,10/4/2022,ContextAtraining,5,F,Ctrl,1,Trial 2
...,...,...,...,...,...,...,...,...,...,...,...
91,1,119470,237_E_3,6/26/2022,11/3/2022,ContextBTest,7,M,ELS,3,Trial 37
92,1,119470,237_E_3,6/26/2022,11/3/2022,ContextBTest,8,M,ELS,4,Trial 38
93,1,113342,265_E_5,6/28/2022,11/3/2022,ContextBTest,9,M,ELS,1,Trial 38
94,1,113342,265_E_5,6/28/2022,11/3/2022,ContextBTest,10,M,ELS,2,Trial 39


### Make Sure that Key Columns Match

Adding "Arena" to arena str

In [28]:
etho_meta.insert(9, "concat", "Arena") #add a new column called 'concat' with the str "Arena"

In [29]:
etho_meta['Arena'] = etho_meta[['concat', 'Arena']].astype(str).apply(" ".join, axis=1) #concatenate the two columns using apply function

In [30]:
etho_meta_mod = etho_meta.drop(columns='concat') # drop the unnecessary column

Generate an ID Column combining `Mouse`, `Sex`, and `Condition`

In [31]:
etho_meta_mod #check work

Unnamed: 0,Cohort,Cage,Dam ID,DOB,Exp Start,Experiment,Mouse,Sex,Condition,Arena,Trial
0,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,1,F,Ctrl,Arena 1,Trial 1
1,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,2,F,Ctrl,Arena 2,Trial 1
2,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,3,F,Ctrl,Arena 3,Trial 1
3,1,119447,279_C_2,6/23/2022,10/4/2022,ContextAtraining,4,F,Ctrl,Arena 4,Trial 1
4,1,119476,279_C_2,6/23/2022,10/4/2022,ContextAtraining,5,F,Ctrl,Arena 1,Trial 2
...,...,...,...,...,...,...,...,...,...,...,...
91,1,119470,237_E_3,6/26/2022,11/3/2022,ContextBTest,7,M,ELS,Arena 3,Trial 37
92,1,119470,237_E_3,6/26/2022,11/3/2022,ContextBTest,8,M,ELS,Arena 4,Trial 38
93,1,113342,265_E_5,6/28/2022,11/3/2022,ContextBTest,9,M,ELS,Arena 1,Trial 38
94,1,113342,265_E_5,6/28/2022,11/3/2022,ContextBTest,10,M,ELS,Arena 2,Trial 39


### Merge freezing data with metadata

There are also arenas here that do not correspond with metadata and therefore contain NaN for the metadata. Remove using `dropna` again. 

In [40]:
contextA = etho_tidy[etho_tidy['Experiment'] == 'ContextAtraining']
contextA_meta = etho_meta_mod[etho_meta_mod['Experiment'] == 'ContextAtraining']
sefl_A_df = contextA.merge(contextA_meta, how='outer', on= ['Experiment','Trial', 'Arena']) #, indicator = True)
sefl_A_df = sefl_A_df.dropna(axis = 'rows') #requires axis to keep in DataFrame format
sefl_A_df

Unnamed: 0,Trial,Experiment,Time_Bin,Arena,freezing_duration(s),Bout(s),PcentFreezing,Time(s),Cohort,Cage,Dam ID,DOB,Exp Start,Mouse,Sex,Condition
0,Trial 1,ContextAtraining,0:00:00-0:00:30,Arena 1,0.16,30,0.533333,30,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
1,Trial 1,ContextAtraining,0:00:30-0:01:00,Arena 1,0.64,30,2.133333,60,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
2,Trial 1,ContextAtraining,0:01:00-0:01:30,Arena 1,1.80,30,6.000000,90,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
3,Trial 1,ContextAtraining,0:01:30-0:02:00,Arena 1,5.68,30,18.933333,120,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
4,Trial 1,ContextAtraining,0:02:00-0:02:30,Arena 1,2.40,30,8.000000,150,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400,Trial 6,ContextAtraining,0:05:00-0:05:30,Arena 4,25.72,30,85.733333,330,1.0,119470.0,237_E_3,6/26/2022,10/4/2022,8.0,M,ELS
401,Trial 6,ContextAtraining,0:05:30-0:06:00,Arena 4,17.80,30,59.333333,360,1.0,119470.0,237_E_3,6/26/2022,10/4/2022,8.0,M,ELS
402,Trial 6,ContextAtraining,0:06:00-0:06:30,Arena 4,28.56,30,95.200000,,1.0,119470.0,237_E_3,6/26/2022,10/4/2022,8.0,M,ELS
403,Trial 6,ContextAtraining,0:06:30-0:07:00,Arena 4,26.32,30,87.733333,390,1.0,119470.0,237_E_3,6/26/2022,10/4/2022,8.0,M,ELS


In [39]:
sefl_df = etho_tidy.merge(etho_meta_mod, how='outer', on= ['Experiment','Trial', 'Arena']) #, indicator = True)
sefl_df = sefl_df.dropna(axis = 'rows')
sefl_df

Unnamed: 0,Trial,Experiment,Time_Bin,Arena,freezing_duration(s),Bout(s),PcentFreezing,Time(s),Cohort,Cage,Dam ID,DOB,Exp Start,Mouse,Sex,Condition
0,Trial 1,ContextAtraining,0:00:00-0:00:30,Arena 1,0.16,30.0,0.533333,30,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
1,Trial 1,ContextAtraining,0:00:30-0:01:00,Arena 1,0.64,30.0,2.133333,60,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
2,Trial 1,ContextAtraining,0:01:00-0:01:30,Arena 1,1.80,30.0,6.000000,90,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
3,Trial 1,ContextAtraining,0:01:30-0:02:00,Arena 1,5.68,30.0,18.933333,120,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
4,Trial 1,ContextAtraining,0:02:00-0:02:30,Arena 1,2.40,30.0,8.000000,150,1.0,119447.0,279_C_2,6/23/2022,10/4/2022,1.0,F,Ctrl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1120,Trial 13,ContextATest,0:03:00-0:03:30,Arena 4,30.00,30.0,100.000000,210,1.0,119470.0,237_E_3,6/26/2022,10/5/2022,8.0,M,ELS
1121,Trial 13,ContextATest,0:03:30-0:04:00,Arena 4,29.76,30.0,99.200000,240,1.0,119470.0,237_E_3,6/26/2022,10/5/2022,8.0,M,ELS
1122,Trial 13,ContextATest,0:04:00-0:04:30,Arena 4,29.40,30.0,98.000000,270,1.0,119470.0,237_E_3,6/26/2022,10/5/2022,8.0,M,ELS
1123,Trial 13,ContextATest,0:04:30-0:05:00,Arena 4,27.88,30.0,92.933333,300,1.0,119470.0,237_E_3,6/26/2022,10/5/2022,8.0,M,ELS


## Analysis

Create Subject Column as a stop-gap until I figure out the metadata merging...

In [20]:
sefl_df

In [30]:
context_A_train = etho_tidy[(etho_tidy.Experiment == 'ContextATest') & (etho_tidy.Arena == 'Arena 1')]
context_A_freeze = np.array(context_A_train.PcentFreezing)

In [31]:
context_A_train

Unnamed: 0,Trial,Experiment,Time_Bin,Arena,freezing_duration(s),Bout(s),PcentFreezing
105,Trial 8,ContextATest,0:00:00-0:00:30,Arena 1,25.32,30,84.400000
106,Trial 8,ContextATest,0:00:30-0:01:00,Arena 1,28.56,30,95.200000
107,Trial 8,ContextATest,0:01:00-0:01:30,Arena 1,22.96,30,76.533333
108,Trial 8,ContextATest,0:01:30-0:02:00,Arena 1,18.08,30,60.266667
109,Trial 8,ContextATest,0:02:00-0:02:30,Arena 1,25.96,30,86.533333
...,...,...,...,...,...,...,...
177,Trial 14,ContextATest,0:03:00-0:03:30,Arena 1,22.80,30,76.000000
178,Trial 14,ContextATest,0:03:30-0:04:00,Arena 1,14.12,30,47.066667
179,Trial 14,ContextATest,0:04:00-0:04:30,Arena 1,8.56,30,28.533333
180,Trial 14,ContextATest,0:04:30-0:05:00,Arena 1,20.84,30,69.466667


We are interested in the effect of SEFL on animals that have experience early life stress. Initially, we will examine group differences independently of sex, and later explore the sex differences. 

In [25]:
#grouped_by_sex = sefl_df.groupby(['Experiment', 'Condition', 'Sex'])
#print(grouped_by_sex)

In [26]:
#group_means = mean()


In [None]:
empty_arena = []
for 

for value in etho_tidy['freezing_duration(s)']:
    if -- use sum/avg to raise error in graphing step