# One-Way ANOVA with Dunnett Posthoc Comparisons 

Performs ANOVAs and Dunnett posthoc comparisons for ANOVAs using a Python wrapper for R. 
 

### User Input

In [1]:
f_in = "Shank3B_coupling_connected_nmda.csv"

### Import Statements for Python and Library loading for R

In [2]:
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import pandas as pd
import numpy as np
import os
from rpy2.robjects import r, pandas2ri
from IPython.display import display, HTML


base = importr('base')
utils = importr('utils')
desc_tools = importr('DescTools')
fsa = importr('FSA')
rcompanion = importr('rcompanion')
mcv = importr('multcompView')
lsmeans = importr('lsmeans')
stats = importr('stats')
dunnett = ro.r['DunnettTest']

### Read in the Data via Python

There are well-documented bugs in the transferring of a pandas dataframe to an R dataframe, so the offical way to ensure your analysis is formatted correctly, you have to parse out the columns into individual variables at the start.

In [3]:
# Read the clean data into a pandas dataframe
df = pd.read_csv(f_in)

# Parse the data in local R variables
# Pandas dataframes do not faithfully convert to R dataframes
cc = ro.vectors.FloatVector (df.cc)
cc_nmda = ro.vectors.FloatVector (df.cc_nmda)
cc_pchange = ro.vectors.FloatVector (df.cc_pchange)
gj = ro.vectors.FloatVector (df.gj)
gj_nmda = ro.vectors.FloatVector (df.gj_nmda)
gj_pchange = ro.vectors.FloatVector (df.gj_pchange)
vm = ro.vectors.FloatVector (df.vm1)
vm_nmda = ro.vectors.FloatVector (df.vm1_nmda)
rin = ro.vectors.FloatVector (df.rin1)
rin_nmda = ro.vectors.FloatVector (df.rin1_nmda)
dist = ro.vectors.FloatVector (df.distance)
strain = ro.vectors.FactorVector (df.strain)

# Convert local R variables to global R variables
# Does not work if you do it all in one step
ro.globalenv ['strain'] = strain
ro.globalenv ['cc'] = cc
ro.globalenv ['cc_nmda'] = cc_nmda
ro.globalenv ['cc_pchange'] = cc_pchange
ro.globalenv ['gj'] = gj
ro.globalenv ['gj_nmda'] = gj_nmda
ro.globalenv ['gj_pchange'] = gj_pchange
ro.globalenv ['vm'] = vm
ro.globalenv ['vm_nmda'] = vm_nmda
ro.globalenv ['rin'] = rin
ro.globalenv ['rin_nmda'] = rin_nmda
ro.globalenv ['dist'] = dist

# Make a list of measurements to be analyzed
analyze = [cc, cc_nmda, cc_pchange, gj, gj_nmda, gj_pchange, vm, vm_nmda, rin, 
           rin_nmda]
labels = ["cc", "cc_nmda", "cc_pchange", "gj", "gj_nmda", "gj_pchange", "vm", 
          "vm_nmda", "rin", "rin_nmda"]

### One-Way ANOVA with mouse strain as Factor

In [18]:
def get_anova1 (dep_var, label, df_stat):
    """User passes the the dependent variable, measurement label, and the output
    dataframe (pandas). The function calculates the linear model, summarizes the data, 
    then performs an anova."""
    ro.globalenv['dep_var'] = dep_var
    lm_x = stats.lm("dep_var ~ strain")
    lm_summary = base.summary(lm_x)
    anova_results = stats.anova(lm_x)
    anova_summary = base.summary(anova_results)
    
    # Parse the chaotic output of lm() and anova() into a pandas dataframe
    df_groups = int(anova_results[0][0])
    df_err = int(anova_results[0][1])
    ss_groups = float(anova_results[1][0])
    ss_err = float(anova_results[1][1])
    ms_groups = float(anova_results[2][0])
    ms_err = float(anova_results[2][1])
    f_stat = float(anova_results[3][0])    
    p_value = float(anova_results[4][0])
    stat = {'measure': label, 'df_group': df_groups, 'ss_groups': ss_groups, 
            'ss_err':ss_err, 'ms_groups': ms_groups, 'ms_err': ms_err, 
            'df_err': df_err, 'f_stat': f_stat, 'p_value': p_value}
    df_stat = df_stat.append(stat, ignore_index = True)
    df_stat = df_stat[['measure', 'df_group', 'df_err', 'ss_groups',
                       'ss_err', 'ms_groups', 'ms_err','f_stat', 'p_value']]
    
    return df_stat

# Create and empty pandas dataframe to catch the output of the get_anova1() function
df_anova1 = pd.DataFrame()

# Cycle through the measurements and perform one way anova on each with strain as the ind. var
i = 0
while i < len(analyze):
    df_anova1 = get_anova1(analyze[i], labels[i], df_anova1)
    i = i + 1   

# Display the results in a table
display(HTML(df_anova1.to_html()))

Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,cc,3.0,65.0,6.014555,27.65002,2.004852,0.425385,4.713029,0.004882
1,cc_nmda,3.0,65.0,11.1268,135.8449,3.708933,2.089922,1.774675,0.16072
2,cc_pchange,3.0,65.0,21507700.0,187201400.0,7169233.0,2880022.0,2.489298,0.068088
3,gj,3.0,65.0,45074.02,206528.1,15024.67,3177.355,4.728674,0.004794
4,gj_nmda,3.0,65.0,107234.6,903533.7,35744.86,13900.52,2.571477,0.061675
5,gj_pchange,3.0,65.0,11186970.0,94162230.0,3728991.0,1448650.0,2.574115,0.061479
6,vm,3.0,65.0,216.3371,2528.243,72.11236,38.89605,1.853976,0.146168
7,vm_nmda,3.0,65.0,776.8292,11363.3,258.9431,174.82,1.481198,0.227873
8,rin,3.0,65.0,41086.9,1236990.0,13695.63,19030.61,0.719664,0.543843
9,rin_nmda,3.0,65.0,193202.9,1417382.0,64400.96,21805.88,2.953376,0.038972


### Dunnett Pairwise Comparisons with Strain as Factor

In [19]:
 def get_dunnett1(dep_var, label, df_dun):
        dunnett_results = dunnett(x = dep_var, g = strain, control = 'wt')
        pairs = base.labels(dunnett_results[0])[0]
        calculations = base.labels(dunnett_results[0][1])
        i = 0
        for pair in pairs:
            row = {'measurement': label,
                   'pair': pair,
                   'diff': dunnett_results[0][i+0],
                   'lwr_ci': dunnett_results[0][i+1],
                   'upr_ci': dunnett_results[0][i+2],
                   'p_val': dunnett_results[0][i+3]}
            df_dun = df_dun.append(row, ignore_index = True)
            i = i + 4
        return df_dun 

# Create an empty pandas dataframe to catch the data from the get_dunnett1 function
df_dun1 = pd.DataFrame()

i = 0
while i < len(analyze)-1:
    df_dun1 = get_dunnett1(analyze[i], labels[i], df_dun1)
    i = i + 1   
    
df_dun1 = get_dunnett1(analyze[i], labels[i], df_dun1)
df_dun1 = df_dun1[['measurement','pair', 'diff', 'lwr_ci', 'upr_ci', 'p_val']]
display(HTML(df_dun1.to_html()))

Unnamed: 0,measurement,pair,diff,lwr_ci,upr_ci,p_val
0,cc,het_b-wt,-0.814449,-0.496233,-0.594873,-1.439434
1,cc,het_c-wt,-1.006289,-1.076748,-0.189465,0.013822
2,cc,ko_b-wt,-0.112998,0.007075,0.05842,0.011325
3,cc_nmda,het_b-wt,0.770378,-0.33335,-0.492271,-0.614921
4,cc_nmda,het_c-wt,-1.463905,-1.560363,2.155677,0.797205
5,cc_nmda,ko_b-wt,0.57582,0.416846,0.831325,0.56763
6,cc_pchange,het_b-wt,1660.640101,83.58631,-42.658788,34.431219
7,cc_pchange,het_c-wt,-1243.577649,-1296.496468,3286.848983,1410.750269
8,cc_pchange,ko_b-wt,1211.178892,0.044287,0.997762,0.999643
9,gj,het_b-wt,-68.044175,-48.860113,-50.665678,-122.058789


### Reference code for remotely installing R packages through python

In [None]:
utils.chooseCRANmirror(ind=1) 
utils.install_packages ("DescTools")
utils.install_packages("rcompanion")
utils.install_packages("lsmeans")
utils.install_packages("multcompView")
utils.install_packages("FSA")