In [1]:
import os
import numpy as np
import scipy.stats as st
import pandas as pd
import scikit_posthocs

import iqplot

import bokeh.io
import bokeh.plotting
import bokeh.layouts
bokeh.io.output_notebook()

## Exploratory Data Analysis

1. Uploading the whole excel file to read from all the sheets later.

In [2]:
path = os.path.join('..', 'data', 'behavior_data', 'benCom_Motor_GI_function.xlsx')
data = pd.ExcelFile(path)

2. Creating a list of all sheets (tests) that we want to analyse.

In [3]:
tests = [i for i in data.sheet_names if ('Weight' not in i)]
tests

['Beam_time',
 'Beam_steps',
 'Pole',
 'Wirehang',
 'Adhesive_removal',
 'Hindlimb',
 'Fecal_output',
 'Fecal_score',
 'Water_content',
 'Gut_transit',
 'Bead_exp']

3. Parsing the Excel file into separate datasets (1 test = 1 dataset) and storing them in a dictionary with keys = names of the tests/sheets

In [4]:
data_dict = {}

for test in tests:
    temp_df = data.parse(test)   
    temp_df = temp_df.rename(columns={"Trial1": "Measurement", 
                                      "Trial15": "Measurement", 
                                      "Percent_water_content": "Measurement",
                                      "Time_min": "Measurement",
                                      "Slips_Step_Trial1": "Measurement"
                                     })
        
    temp_df = temp_df.dropna()
    data_dict[test] = temp_df

4. Plotting the raw data from all the tests for the EDA.

In [5]:
plots = []

for test in tests:
    df = data_dict[test]
        
    p = iqplot.stripbox(
        df,
        q='Measurement',
        q_axis='y',
        cats=['Genotype', 'Microbiome'],
        tooltips=[
            ('Mouse', '@ID'),
            ('Cage', '@Cage')
        ],
        title=test,
        color_column='ID',
    )
    plots.append(p)

lt = bokeh.layouts.grid(plots, ncols=2)
bokeh.io.show(lt)

for n, i in enumerate(plots):
    bokeh.io.save(
        i,
        filename=('../figures/' + 'EDA_Bencom_' + tests[n] + '.html'),
        title='Bokeh plot',
        resources=bokeh.resources.CDN)


## Pole descent

In [6]:
work_df = data_dict['Pole']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [7]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(6.396406670252844), pvalue=np.float64(0.040835505828134874))

In [8]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_pole_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,0.032084,0.313449,0.040836
ASO_SPF,0.032084,1.0,0.157383,0.040836
ASO_SPF-bC,0.313449,0.157383,1.0,0.040836


## Beam cross

In [9]:
work_df = data_dict['Beam_time']

group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [10]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(19.539070931093626), pvalue=np.float64(5.7166901878233e-05))

In [11]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_beam_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,2.4e-05,1.1e-05,5.7e-05
ASO_SPF,2.4e-05,1.0,0.817787,5.7e-05
ASO_SPF-bC,1.1e-05,0.817787,1.0,5.7e-05


## Sticker removal

In [12]:
work_df = data_dict['Adhesive_removal']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [13]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(13.944527996805817), pvalue=np.float64(0.000937527939525821))

In [14]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_adhesive_removal_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,0.000168,0.037984,0.000938
ASO_SPF,0.000168,1.0,0.030212,0.000938
ASO_SPF-bC,0.037984,0.030212,1.0,0.000938


## Wirehang

In [15]:
work_df = data_dict['Wirehang'].copy()

group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n


### NHST

In [16]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(20.615435162861054), pvalue=np.float64(3.3374528074606554e-05))

In [17]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_wirehang_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,8e-06,4e-06,3.3e-05
ASO_SPF,8e-06,1.0,0.763306,3.3e-05
ASO_SPF-bC,4e-06,0.763306,1.0,3.3e-05


## Hindlimb

In [18]:
work_df = data_dict['Hindlimb'].copy()

group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [19]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(12.939849801939362), pvalue=np.float64(0.0015493420826462657))

In [20]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_hindlimb_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,0.000654,0.516273,0.001549
ASO_SPF,0.000654,1.0,0.002659,0.001549
ASO_SPF-bC,0.516273,0.002659,1.0,0.001549


## Fecal output

In [21]:
work_df = data_dict['Fecal_output'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### NHST

In [22]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(3.136874538356031), pvalue=np.float64(0.20837055514468025))

In [23]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_fecoutput_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,0.230255,0.230255,0.208371
ASO_SPF,0.230255,1.0,0.847247,0.208371
ASO_SPF-bC,0.230255,0.847247,1.0,0.208371


## Fecal score

In [24]:
work_df = data_dict['Fecal_score'].copy()

group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [25]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(19.117927136007715), pvalue=np.float64(7.056589752860742e-05))

In [26]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_fecal_score_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,2e-06,0.006583,7.1e-05
ASO_SPF,2e-06,1.0,0.004568,7.1e-05
ASO_SPF-bC,0.006583,0.004568,1.0,7.1e-05


## Bead expulsion

In [27]:
work_df = data_dict['Bead_exp'].copy()

group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### NHST

In [28]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(14.764041721977692), pvalue=np.float64(0.0006223419477943169))

In [29]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_beadexp_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,0.000101,0.010535,0.000622
ASO_SPF,0.000101,1.0,0.031326,0.000622
ASO_SPF-bC,0.010535,0.031326,1.0,0.000622


## Beam steps/slips

In [30]:
work_df = data_dict['Beam_steps']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [31]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(22.74821132624602), pvalue=np.float64(1.1489170486191648e-05))

In [32]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_beamsteps_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,1e-06,5.059004e-07,1.1e-05
ASO_SPF,1.173407e-06,1.0,0.8423726,1.1e-05
ASO_SPF-bC,5.059004e-07,0.842373,1.0,1.1e-05


## Gut transit

In [33]:
work_df = data_dict['Gut_transit']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### NHST

In [34]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(5.089413843386435), pvalue=np.float64(0.0784960541004698))

In [35]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_gut_transit_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,0.096629,0.096629,0.078496
ASO_SPF,0.096629,1.0,0.933,0.078496
ASO_SPF-bC,0.096629,0.933,1.0,0.078496


## Water content

In [36]:
work_df = data_dict['Water_content'].copy()

group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [37]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(9.846132118046107), pvalue=np.float64(0.007276785554293323))

In [38]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Bencom_watercont_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-bC,Kruskal-Wallis
WT_SPF,1.0,0.005609,0.02096,0.007277
ASO_SPF,0.005609,1.0,0.386822,0.007277
ASO_SPF-bC,0.02096,0.386822,1.0,0.007277
