# One-Way ANOVA with Dunnett Posthoc Comparisons 

Performs ANOVAs and Dunnett posthoc comparisons for ANOVAs using a Python wrapper for R. 
 

### User Input

In [1]:
f_in = "coupling_connected.csv"
exp_name = "Demo"

### Import Statements for Python and Library loading for R

In [2]:
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import pandas as pd
import numpy as np
import os
from rpy2.robjects import r, pandas2ri
from IPython.display import display, HTML


base = importr('base')
utils = importr('utils')
desc_tools = importr('DescTools')
fsa = importr('FSA')
rcompanion = importr('rcompanion')
mcv = importr('multcompView')
lsmeans = importr('lsmeans')
stats = importr('stats')
dunnett = ro.r['DunnettTest']

### Read in the Data via Python

There are well-documented bugs in the transferring of a pandas dataframe to an R dataframe, so the offical way to ensure your analysis is formatted correctly, you have to parse out the columns into individual variables at the start.

In [3]:
# Function for saving pandas dataframes to csv spreadsheets
def save_csv (df_out, file_out):
    df_out.to_csv(file_out, index = False)
    
# Read the clean data into a pandas dataframe
df = pd.read_csv(f_in)

# Parse the data in local R variables
# Pandas dataframes do not faithfully convert to R dataframes
cc = ro.vectors.FloatVector (df.cc)
cc_nmda = ro.vectors.FloatVector (df.cc_nmda)
cc_pchange = ro.vectors.FloatVector (df.cc_pchange)
gj = ro.vectors.FloatVector (df.gj)
gj_nmda = ro.vectors.FloatVector (df.gj_nmda)
gj_pchange = ro.vectors.FloatVector (df.gj_pchange)
vm = ro.vectors.FloatVector (df.vm1)
vm_nmda = ro.vectors.FloatVector (df.vm1_nmda)
rin = ro.vectors.FloatVector (df.rin1)
rin_nmda = ro.vectors.FloatVector (df.rin1_nmda)
dist = ro.vectors.FloatVector (df.distance)
strain = ro.vectors.FactorVector (df.strain)

# Convert local R variables to global R variables
# Does not work if you do it all in one step
ro.globalenv ['strain'] = strain
ro.globalenv ['cc'] = cc
ro.globalenv ['cc_nmda'] = cc_nmda
ro.globalenv ['cc_pchange'] = cc_pchange
ro.globalenv ['gj'] = gj
ro.globalenv ['gj_nmda'] = gj_nmda
ro.globalenv ['gj_pchange'] = gj_pchange
ro.globalenv ['vm'] = vm
ro.globalenv ['vm_nmda'] = vm_nmda
ro.globalenv ['rin'] = rin
ro.globalenv ['rin_nmda'] = rin_nmda
ro.globalenv ['dist'] = dist

# Make a list of measurements to be analyzed
analyze = [cc, cc_nmda, cc_pchange, gj, gj_nmda, gj_pchange, vm, vm_nmda, rin, 
           rin_nmda]
labels = ["cc", "cc_nmda", "cc_pchange", "gj", "gj_nmda", "gj_pchange", "vm", 
          "vm_nmda", "rin", "rin_nmda"]

### One-Way ANOVA with mouse strain as Factor

In [4]:
def get_anova (dep_var, label, df_anovas):
    """User passes the the dependent variable and measurement label. 
    The function calculates the linear model, summarizes the data, 
    then performs an anova. Output is saved as a .csv spreadsheet in
    the working directory."""
    df_anova = pd.DataFrame()
    display(HTML('<h4> One-Way ANOVA Results: ' + label))
    ro.globalenv['dep_var'] = dep_var
    lm_x = stats.lm("dep_var ~ strain")
    lm_summary = base.summary(lm_x)
    anova_results = stats.anova(lm_x)
    anova_summary = base.summary(anova_results)
    
    # Parse the chaotic output of lm() and anova() into a pandas dataframe
    df_groups = int(anova_results[0][0])
    df_err = int(anova_results[0][1])
    ss_groups = float(anova_results[1][0])
    ss_err = float(anova_results[1][1])
    ms_groups = float(anova_results[2][0])
    ms_err = float(anova_results[2][1])
    f_stat = float(anova_results[3][0])    
    p_value = float(anova_results[4][0])
    row = {'measure': label, 'df_group': df_groups, 'ss_groups': ss_groups, 
            'ss_err':ss_err, 'ms_groups': ms_groups, 'ms_err': ms_err, 
            'df_err': df_err, 'f_stat': f_stat, 'p_value': p_value}
    df_anova = df_anova.append(row, ignore_index = True)
    df_anova = df_anova[['measure', 'df_group', 'df_err', 'ss_groups',
                       'ss_err', 'ms_groups', 'ms_err','f_stat', 'p_value']]
    display(HTML(df_anova.to_html())) # Display the results in a table
    df_anovas = df_anovas.append(df_anova)
    return df_anovas

# Cycle through the measurements and perform one way anova on each 
# with strain as the ind. var
df_anovas = pd.DataFrame()
i = 0
while i < len(analyze):
    df_anovas = get_anova(analyze[i], labels[i], df_anovas)
    i = i + 1   
save_csv (df_anovas, exp_name + '_one_way_anova.csv' ) # Save the dataframe to a csv spreadsheet



Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,cc,3.0,82.0,5.129109,35.925878,1.709703,0.43812,3.902358,0.011678


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,cc_nmda,3.0,64.0,2.877577,33.591824,0.959192,0.524872,1.827478,0.151053


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,cc_pchange,3.0,64.0,169403.856105,2236488.0,56467.952035,34945.12439,1.615904,0.194394


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,gj,3.0,82.0,42646.087054,257283.246771,14215.362351,3137.60057,4.530648,0.005454


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,gj_nmda,3.0,64.0,44912.491768,257204.866481,14970.830589,4018.826039,3.725175,0.015603


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,gj_pchange,3.0,64.0,232027.222359,4041551.0,77342.407453,63149.237993,1.224756,0.307988


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,vm,3.0,82.0,51.450197,3645.797634,17.150066,44.460947,0.385733,0.763558


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,vm_nmda,3.0,64.0,769.644387,11360.047436,256.548129,177.500741,1.445336,0.237891


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,rin,3.0,82.0,54526.578506,1397999.0,18175.526169,17048.763751,1.066091,0.368127


Unnamed: 0,measure,df_group,df_err,ss_groups,ss_err,ms_groups,ms_err,f_stat,p_value
0,rin_nmda,3.0,64.0,102934.784362,845069.557246,34311.594787,13204.211832,2.598534,0.059844


### Dunnett Pairwise Comparisons with Strain as Factor

In [6]:
 def get_dunnett(dep_var, label):
        """User passes the the dependent variable and measurement label. 
        The function performs planned comparisons using the Dunnett test 
        and summarizes the data. Output is saved as a .csv spreadsheet in
        the working directory."""
        
        df_dun = pd.DataFrame()
        display(HTML('<h4>Dunnett Planned Comparisons: ' + label))
        dunnett_results = dunnett(x = dep_var, g = strain, control = 'group1')
        pairs = base.labels(dunnett_results[0])
        calculations = base.labels(dunnett_results[0][1])
        i = 0
        for pair in pairs[0]:
            groups = pair.split('-')
            row = {'measurement': label,
                   'group1': groups[0],
                   'group2': groups[1],
                   'diff': dunnett_results[0][i],
                   'lwr_ci': dunnett_results[0][i+3],
                   'upr_ci': dunnett_results[0][i+6],
                   'p_val': dunnett_results[0][i+9]}
            df_dun = df_dun.append(row, ignore_index = True)
            i = i + 1
        df_dun = df_dun[['measurement','group1', 'group2', 'diff', 'lwr_ci', 'upr_ci', 'p_val']]
        display(HTML(df_dun.to_html()))

# Cycle through the measurements and perform Dunnett planned comparisons on 
# each with strain as the ind. var      
i = 0
while i < len(analyze)-1:
    get_dunnett(analyze[i], labels[i])
    i = i + 1   


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,cc,group2,group1,-0.457058,-0.938514,0.024397,0.067098
1,cc,group3,group1,-0.551472,-1.099086,-0.003858,0.047869
2,cc,group4,group1,-0.580431,-1.019249,-0.141614,0.006067


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,cc_nmda,group2,group1,-0.33335,-0.900428,0.233728,0.371943
1,cc_nmda,group3,group1,-0.421336,-1.146356,0.303685,0.381277
2,cc_nmda,group4,group1,-0.492271,-1.028017,0.043475,0.079395


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,cc_pchange,group2,group1,83.58631,-62.735474,229.908094,0.39548
1,cc_pchange,group3,group1,57.843295,-129.232146,244.918737,0.811985
2,cc_pchange,group4,group1,-42.658788,-180.896241,95.578666,0.812868


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,gj,group2,group1,-50.517694,-91.261204,-9.774184,0.01089
1,gj,group3,group1,-48.02183,-94.363992,-1.679669,0.040198
2,gj,group4,group1,-49.711964,-86.847187,-12.576741,0.005408


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,gj_nmda,group2,group1,-49.539072,-99.160048,0.081903,0.050657
1,gj_nmda,group3,group1,-40.742116,-104.183564,22.699332,0.300555
2,gj_nmda,group4,group1,-60.980361,-107.85976,-14.100962,0.007156


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,gj_pchange,group2,group1,49.017647,-147.680265,245.715559,0.889275
1,gj_pchange,group3,group1,47.38,-204.102368,298.862368,0.947254
2,gj_pchange,group4,group1,-92.901429,-278.731711,92.928854,0.504735


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,vm,group2,group1,-0.552567,-5.402648,4.297513,0.986847
1,vm,group3,group1,0.856314,-4.660226,6.372855,0.968201
2,vm,group4,group1,1.366382,-3.05417,5.786935,0.808018


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,vm_nmda,group2,group1,-6.773946,-17.202301,3.654408,0.291744
1,vm_nmda,group3,group1,0.243952,-13.088915,13.57682,0.999945
2,vm_nmda,group4,group1,1.838115,-8.014069,11.690299,0.948651


Unnamed: 0,measurement,group1,group2,diff,lwr_ci,upr_ci,p_val
0,rin,group2,group1,-14.791205,-109.765496,80.183086,0.967896
1,rin,group3,group1,-13.117876,-121.142784,94.907032,0.984192
2,rin,group4,group1,43.696177,-42.867093,130.259447,0.492501


### Tukey HSD Posthoc Comparisons

In [7]:
def get_tukey(dep_var, label, df_tukey):
    lm_x = stats.lm("dep_var ~ strain")
    tukey_aov = stats.aov(lm_x)
    tukey_results = stats.TukeyHSD(tukey_aov)
    calculations = base.labels(tukey_results[0])[1]
    col_length = int(len(tukey_results[0])/len(calculations))
    comparisons = base.labels(tukey_results[0])[0]

    i = 0
    while i < col_length:
        comparison = comparisons[i].split('-')
        row = {'group1': comparison[0],
               'group2': comparison[1],
                'diff': tukey_results[0][i],
                'lwr' : tukey_results[0][i + col_length],
                'upr' : tukey_results[0][i + col_length*2],
                'p_adj' : tukey_results[0][i + col_length*3]}

        df_tukey = df_tukey.append(row, ignore_index = True)
        df_tukey = df_tukey[['group1', 'group2', 'diff', 'upr','lwr', 'p_adj']]
        i = i + 1
    return df_tukey
    

i = 0
while i < len(analyze)-1:
    df_tukey = pd.DataFrame()
    display(HTML('<h4> Tukey HSD: ' + labels[i]))
    df_tukey = get_tukey (analyze[i], labels[i], df_tukey)
    display(HTML(df_tukey.to_html()))
    i = i + 1   
    




Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


Unnamed: 0,group1,group2,diff,upr,lwr,p_adj
0,group2,group1,13.013907,110.895488,-84.867674,0.98506
1,group3,group1,55.297498,180.441134,-69.846137,0.650532
2,group4,group1,90.62822,183.101809,-1.845369,0.056798
3,group3,group2,42.283591,172.242191,-87.675008,0.826184
4,group4,group2,77.614313,176.506541,-21.277915,0.173923
5,group4,group3,35.330722,161.266413,-90.60497,0.880541


### Reference code for remotely installing R packages through python

In [None]:
utils.chooseCRANmirror(ind=1) 
utils.install_packages ("DescTools")
utils.install_packages("rcompanion")
utils.install_packages("lsmeans")
utils.install_packages("multcompView")
utils.install_packages("FSA")