In [1]:
import os
import numpy as np
import scipy.stats as st
import pandas as pd
import scikit_posthocs

import iqplot

import bokeh.io
import bokeh.plotting
import bokeh.layouts
bokeh.io.output_notebook()

## Exploratory Data Analysis

1. Uploading the whole excel file to read from all the sheets later.

In [2]:
path_1 = os.path.join('..', 'data', 'behavior_data', 'Fp1_Motor_GI_function.xlsx')
path_2 = os.path.join('..', 'data', 'behavior_data', 'Fp2_Motor_GI_function.xlsx')
data_1 = pd.ExcelFile(path_1)
data_2 = pd.ExcelFile(path_2)

2. Creating a list of all sheets (tests) that we want to analyse.

In [3]:
tests = [i for i in data_1.sheet_names if ('Weight' not in i)]
tests

['Beam_time',
 'Beam_steps',
 'Pole',
 'Wirehang',
 'Adhesive_removal',
 'Hindlimb',
 'Fecal_output',
 'Fecal_score',
 'Water_content',
 'Gut_transit',
 'Bead_exp']

3. Parsing the Excel file into separate datasets (1 test = 1 dataset) and storing them in a dictionary with keys = names of the tests/sheets

In [4]:
data_dict = {}

for test in tests:
    temp_df_1 = data_1.parse(test)   
    temp_df_1['Experiment'] = 'Exp1'
    temp_df_2 = data_2.parse(test) 
    temp_df_2['Experiment'] = 'Exp2'
    temp_df = pd.concat((temp_df_1, temp_df_2))

    temp_df = temp_df.rename(columns={"Trial1": "Measurement", 
                                      "Trial15": "Measurement", 
                                      "Percent_water_content": "Measurement",
                                      "Time_min": "Measurement",
                                      "Slips_Step_Trial1": "Measurement"
                                     })        

    temp_df = temp_df.dropna()
    data_dict[test] = temp_df


4. Plotting the raw data from all the tests for the EDA.

In [5]:
plots = []

for test in tests:
    df = data_dict[test]
        
    p = iqplot.stripbox(
        df,
        q='Measurement',
        q_axis='y',
        cats=['Genotype', 'Microbiome'],
        tooltips=[
            ('Mouse', '@ID'),
            ('Cage', '@Cage')
        ],
        title=test,
        color_column='ID',
    )
    plots.append(p)

lt = bokeh.layouts.grid(plots, ncols=2)
bokeh.io.show(lt)

for n, i in enumerate(plots):
    bokeh.io.save(
        i,
        filename=('../figures/' + 'EDA_Fp_' + tests[n] + '.html'),
        title='Bokeh plot',
        resources=bokeh.resources.CDN)


## Pole descent

In [6]:
work_df = data_dict['Pole']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [7]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(16.76288375109955), pvalue=np.float64(0.0002290794032219133))

In [8]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_pole_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,0.003223,7.8e-05,0.000229
ASO_SPF,0.003223,1.0,0.225595,0.000229
ASO_SPF-Fp,7.8e-05,0.225595,1.0,0.000229


## Beam cross

In [9]:
work_df = data_dict['Beam_time']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [10]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(33.47993423726103), pvalue=np.float64(5.369386334288302e-08))

In [11]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_beam_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,5.278404e-10,4.73346e-07,5.369386e-08
ASO_SPF,5.278404e-10,1.0,0.04624185,5.369386e-08
ASO_SPF-Fp,4.73346e-07,0.04624185,1.0,5.369386e-08


## Sticker removal

In [12]:
work_df = data_dict['Adhesive_removal']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [13]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(21.392739644850415), pvalue=np.float64(2.26269287718972e-05))

In [14]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_adhesive_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,3e-06,0.027018,2.3e-05
ASO_SPF,3e-06,1.0,0.001981,2.3e-05
ASO_SPF-Fp,0.027018,0.001981,1.0,2.3e-05


## Wirehang

In [15]:
work_df = data_dict['Wirehang'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n


### NHST

In [16]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(32.49329266401278), pvalue=np.float64(8.79368993946784e-08))

In [17]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_wirehang_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,1.799057e-09,4.237998e-07,8.79369e-08
ASO_SPF,1.799057e-09,1.0,0.08390708,8.79369e-08
ASO_SPF-Fp,4.237998e-07,0.08390708,1.0,8.79369e-08


## Hindlimb

In [18]:
work_df = data_dict['Hindlimb'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [19]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(36.31550996676287), pvalue=np.float64(1.3007301499034528e-08))

In [20]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_hindlimb_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,1.959081e-11,0.000837,1.30073e-08
ASO_SPF,1.959081e-11,1.0,4e-06,1.30073e-08
ASO_SPF-Fp,0.0008374047,4.220273e-06,1.0,1.30073e-08


## Fecal output

In [21]:
work_df = data_dict['Fecal_output'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### NHST

In [22]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(0.9055013970355891), pvalue=np.float64(0.6358766388563656))

In [23]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_fecal_output_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,0.764094,0.764094,0.635877
ASO_SPF,0.764094,1.0,0.808259,0.635877
ASO_SPF-Fp,0.764094,0.808259,1.0,0.635877


## Fecal score

In [24]:
work_df = data_dict['Fecal_score'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [25]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(29.876034355653413), pvalue=np.float64(3.254629593845345e-07))

In [26]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_fecal_score_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,1.278526e-07,0.8916665,3.25463e-07
ASO_SPF,1.278526e-07,1.0,1.103301e-07,3.25463e-07
ASO_SPF-Fp,0.8916665,1.103301e-07,1.0,3.25463e-07


## Bead expulsion

In [27]:
work_df = data_dict['Bead_exp'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### NHST

In [28]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(16.59959075672096), pvalue=np.float64(0.00024856768423161965))

In [29]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_beadexp_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,0.00014,0.233252,0.000249
ASO_SPF,0.00014,1.0,0.001678,0.000249
ASO_SPF-Fp,0.233252,0.001678,1.0,0.000249


## Beam steps/slips

In [30]:
work_df = data_dict['Beam_steps']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [31]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(43.17107993699116), pvalue=np.float64(4.2220084947516963e-10))

In [32]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_beamslip_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,6.479769e-15,4.917254e-09,4.222008e-10
ASO_SPF,6.479769e-15,1.0,0.0002391631,4.222008e-10
ASO_SPF-Fp,4.917254e-09,0.0002391631,1.0,4.222008e-10


## Gut transit

In [33]:
work_df = data_dict['Gut_transit']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### NHST

In [34]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(5.683500126943781), pvalue=np.float64(0.0583235067592959))

In [35]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_gut_transit_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,0.068786,0.957712,0.058324
ASO_SPF,0.068786,1.0,0.068786,0.058324
ASO_SPF-Fp,0.957712,0.068786,1.0,0.058324


## Water content

In [36]:
work_df = data_dict['Water_content'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### NHST

In [37]:
res_kw = st.kruskal(*list(group_vals.values()))
res_kw

KruskalResult(statistic=np.float64(14.355730243671783), pvalue=np.float64(0.0007632956471597887))

In [38]:
res = scikit_posthocs.posthoc_conover(list(group_vals.values()), p_adjust="fdr_bh")
names = list(group_vals.keys())
res = res.rename(columns={1:names[0], 2:names[1], 3:names[2]}, index={1:names[0], 2:names[1], 3:names[2]})
res['Kruskal-Wallis'] = res_kw.pvalue
res.to_csv("../output/Fp_watercont_pvals_conover_test.csv")
res

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp,Kruskal-Wallis
WT_SPF,1.0,0.000409,0.004558,0.000763
ASO_SPF,0.000409,1.0,0.246482,0.000763
ASO_SPF-Fp,0.004558,0.246482,1.0,0.000763
