# Two-Way ANOVA with Dunnett Posthoc Comparisons 

Performs ANOVAs and Dunnett posthoc comparisons for ANOVAs using a Python wrapper for R. 
 

### User Input

In [2]:
f_in = "coupling_connected_nmda.csv"

### Import Statements for Python and Library loading for R

In [3]:
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import pandas as pd
import numpy as np
import os
from rpy2.robjects import r, pandas2ri
from IPython.display import display, HTML


base = importr('base')
utils = importr('utils')
desc_tools = importr('DescTools')
fsa = importr('FSA')
rcompanion = importr('rcompanion')
mcv = importr('multcompView')
lsmeans = importr('lsmeans')
stats = importr('stats')
dunnett = ro.r['DunnettTest']

### Read in the Data via Python

There are well-documented bugs in the transferring of a pandas dataframe to an R dataframe, so the offical way to ensure your analysis is formatted correctly, you have to parse out the columns into individual variables at the start.

In [78]:
# Read the clean data into a pandas dataframe
df_in = pd.read_csv(f_in)

# Rearrange data for 2-way Anova (indexed)
df_control = df_in[['strain', "distance", "cc", "gj", "vm1", "rin1"]]
df_control['treatment'] = 'control'
df_treated = df_in[['strain', "distance", "cc_nmda", "gj_nmda", "vm1_nmda", "rin1_nmda"]]
df_treated.columns = ['strain', "distance", "cc", "gj", "vm1", "rin1"]
df_treated['treatment'] = 'nmda'

df = df_control.append(df_treated)
df['interaction'] = df.strain + "_" + df.treatment
display(HTML(df.to_html()))

# Parse the data in local R variables
# Pandas dataframes do not faithfully convert to R dataframes
cc = ro.vectors.FloatVector (df.cc)
gj = ro.vectors.FloatVector (df.gj)
vm = ro.vectors.FloatVector (df.vm1)
rin = ro.vectors.FloatVector (df.rin1)
dist = ro.vectors.FloatVector (df.distance)
strain = ro.vectors.FactorVector (df.strain)
treatment = ro.vectors.FactorVector (df.treatment)
interaction = ro.vectors.FactorVector (df.interaction)


# Convert local R variables to global R variables
# Does not work if you do it all in one step
ro.globalenv ['strain'] = strain
ro.globalenv ['cc'] = cc
ro.globalenv ['gj'] = gj
ro.globalenv ['vm'] = vm
ro.globalenv ['rin'] = rin
ro.globalenv ['dist'] = dist
ro.globalenv ['treatment'] = treatment
ro.globalenv ['interaction'] = interaction

# Make a list of measurements to be analyzed
analyze = [cc, gj, vm, rin]
labels = ["cc",  "gj", "vm", "rin"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


Unnamed: 0,strain,distance,cc,gj,vm1,rin1,treatment,interaction
0,het_c,10.0,0.069556,4.460774,-37.92,128.763561,control,het_c_control
1,wt,10.0,0.067104,2.115295,-50.45,424.019656,control,wt_control
2,wt,30.0,1.284205,170.57822,-39.08,64.165716,control,wt_control
3,wt,30.0,0.236654,36.881944,-52.5,75.295994,control,wt_control
4,het_b,20.0,0.129783,5.824517,-39.8088,369.584123,control,het_b_control
5,het_b,20.0,0.099758,2.699185,-44.3908,222.821828,control,het_b_control
6,het_b,0.0,0.279514,12.593841,-41.2103,348.481933,control,het_b_control
7,het_b,0.0,0.291173,8.355504,-43.4743,221.947547,control,het_b_control
8,het_b,10.0,0.214582,17.586433,-43.2295,67.882628,control,het_b_control
9,het_b,5.0,0.280175,16.734346,-47.7369,59.379018,control,het_b_control


### Two-Way ANOVA with Mouse Strain and Treatment as Factors

In [61]:
def get_anova2 (dep_var, label, df_stat):
    """User passes the the dependent variable, the group label, and output dataframe (pandas).
        The function calculates the linear model, summarizes the data, then performs an 
        anova."""
    ro.globalenv['dep_var'] = dep_var
    lm_x = stats.lm("dep_var ~ strain + treatment +  strain:treatment")
    lm_summary = base.summary(lm_x)
    anova_results = stats.anova(lm_x)
    anova_summary = base.summary(anova_results)
    
    # Parse the chaotic output of lm() and anova() into a pandas dataframe
    df_strain = int(anova_results[0][0])
    df_treatment =int(anova_results[0][1])
    df_interaction = int(anova_results[0][2])
    df_res = int(anova_results[0][3])
    ss_strain = format(round(anova_results[1][0],3), 'f')
    ss_treatment = format(round(anova_results[1][1],3), 'f')
    ss_interaction = format(round(anova_results[1][2],3), 'f')
    ss_res = format(round(anova_results[1][3],3), 'f')
    ms_strain = anova_results[2][0]
    ms_treatment = anova_results[2][1]
    ms_interaction = anova_results[2][2]
    ms_res = anova_results[2][3]
    f_strain = anova_results[3][0]
    f_treatment = anova_results[3][1]
    f_interaction = anova_results[3][2]
    f_res = anova_results[3][3]
    p_strain = anova_results[4][0]
    p_treatment = anova_results[4][1]
    p_interaction = anova_results[4][2]
    p_res = anova_results[4][3]
    
    # Assemble the rows for the dataframe
    row1 = {'measure': label, 'comparison': 'strain', 'df': df_strain, 'ss': ss_strain,
           'ms': ms_strain, 'f_stat': f_strain, 'p_val': p_strain}
    row2 = {'measure': label, 'comparison': 'treatment', 'df': df_treatment, 'ss': ss_treatment,
           'ms': ms_treatment, 'f_stat': f_treatment, 'p_val': p_treatment}
    row3 = {'measure': label, 'comparison': 'interaction', 'df': df_interaction, 'ss': ss_interaction,
           'ms': ms_interaction, 'f_stat': f_interaction, 'p_val': p_interaction}
    row4 = {'measure': label, 'comparison': 'residual', 'df': df_res, 'ss': ss_res,
           'ms': ms_res, 'f_stat': f_res, 'p_val': p_res}
    
    # Append the rows to the dataframe
    df_stat = df_stat.append([row1,row2,row3,row4], ignore_index = True, sort = False)    
    
    return df_stat

# Create and empty pandas dataframe to catch the output of the get_anova1() function
df_anova2 = pd.DataFrame()

# Cycle through the measurements and perform one way anova on each with strain as the ind. var
i = 0
while i < len(analyze):
    df_anova2 = get_anova2(analyze[i], labels[i], df_anova2)
    i = i + 1   

# Display the results in a table
display(HTML(df_anova2.to_html()))

Unnamed: 0,measure,comparison,df,ss,ms,f_stat,p_val
0,cc,strain,3,8.17,2.72349,2.165533,0.095196
1,cc,treatment,1,1.096,1.095524,0.871085,0.352385
2,cc,interaction,3,8.971,2.990295,2.377678,0.072837
3,cc,residual,130,163.495,1.257654,,
4,gj,strain,3,89645.211,29881.736962,3.499468,0.017457
5,gj,treatment,1,7805.54,7805.540394,0.914112,0.3408
6,gj,interaction,3,62663.402,20887.800648,2.446183,0.066787
7,gj,residual,130,1110061.72,8538.936304,,
8,vm,strain,3,782.962,260.987253,2.442373,0.06711
9,vm,treatment,1,417.107,417.106831,3.903373,0.050307


### Dunnett Pairwise Comparisons with Strain as Factor

In [83]:
 def get_dunnett(dep_var, label, df_dun):
        dunnett_results = dunnett(x = dep_var, g = interaction, control = 'wt_control')
        pairs = base.labels(dunnett_results[0])[0]
        calculations = base.labels(dunnett_results[0][1])
        i = 0
        for pair in pairs:
            row = {'measurement': label,
                   'pair': pair,
                   'diff': dunnett_results[0][i+0],
                   'lwr_ci': dunnett_results[0][i+1],
                   'upr_ci': dunnett_results[0][i+2],
                   'p_val': dunnett_results[0][i+3]}
            df_dun = df_dun.append(row, ignore_index = True, sort = False)
            i = i + 4
        return df_dun 

# Create an empty pandas dataframe to catch the data from the get_dunnett1 function
df_dun = pd.DataFrame()

i = 0
while i < len(analyze)-1:
    df_dun = get_dunnett(analyze[i], labels[i], df_dun)
    i = i + 1   

# In case of unintentional sorting, rearrange the columns of the df to make sense
df_dun = df_dun[['measurement','pair', 'diff', 'lwr_ci', 'upr_ci', 'p_val' ]]
display(HTML(df_dun.to_html()))

Unnamed: 0,measurement,pair,diff,lwr_ci,upr_ci,p_val
0,cc,het_b_control-wt_control,-0.814449,0.670502,-0.496233,-0.433227
1,cc,het_b_nmda-wt_control,-0.594873,-0.592148,-0.099877,-1.99997
2,cc,het_c_control-wt_control,-0.515018,-1.463747,-1.400741,-1.508932
3,cc,het_c_nmda-wt_control,-1.506206,-1.003244,0.371071,1.856022
4,cc,ko_b_control-wt_control,0.47128,0.534287,0.319185,0.321911
5,cc,ko_b_nmda-wt_control,0.803491,0.320917,0.534849,0.638853
6,cc,wt_nmda-wt_control,0.763655,0.37889,0.383873,0.999899
7,gj,het_b_control-wt_control,-68.044175,56.432559,-48.860113,-47.110315
8,gj,het_b_nmda-wt_control,-50.665678,-58.551604,2.428757,-165.729719
9,gj,het_c_control-wt_control,-41.252985,-128.58218,-126.832382,-125.983072


### Reference code for remotely installing R packages through python

In [None]:
utils.chooseCRANmirror(ind=1) 
utils.install_packages ("DescTools")
utils.install_packages("rcompanion")
utils.install_packages("lsmeans")
utils.install_packages("multcompView")
utils.install_packages("FSA")