# Exercise 10: Multivariate models

## Table of Contents

* ANOVA w/o interactions
    * One-way ANOVA
    * Post-hoc testing 
* ANOVA w/ interactions
    * Two-way ANOVA
    
## Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.stats as sps
import pingouin as pg
import statsmodels.api as sm
import statsmodels.formula.api as smf
import os

# For retina displays only 
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
%matplotlib inline

## Multivariate models w/o interactions

In [None]:
# Creating some fake data
n = 20

X1 = np.random.normal(loc=.5, scale=2, size=n)
X2 = np.random.normal(loc=2, scale=2, size=n)
X3 = np.random.normal(loc=0, scale=2, size=n)

In [None]:
# Reorganizing data in dataframe for statsmodels
df = pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3})
df_m = df.melt(value_vars=['X1', 'X2', 'X3'], var_name='Group', value_name='X')
df_m

### One-way ANOVA

In [None]:
# Using scipy.stats 
sps.f_oneway(X1, X2, X3)

In [None]:
# Using statsmodels, "C()" indicates it is a categorical variable

model = smf.ols('X ~ C(Group)', data=df_m).fit()
sm.stats.anova_lm(model)

### Post-hoc testing 

In [None]:
# Using t-tests with bonferroni correction 

results = model.t_test_pairwise('C(Group)', method='bonferroni')
results.result_frame

In [None]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [None]:
pairwise_tukeyhsd(df_m['X'], df_m['Group'], alpha=0.05).summary()

### Adjusting for multiple comparisons

In [None]:
# Exercise: calculate adjusted p-values for all comparisons between X1, X2, and X3. 



## Multivariate models w/ interactions

## Two-way ANOVA

In [None]:
# Creating some fake data with no interaction
n = 15

age_w_s = np.random.normal(loc=0, scale=2, size=n)
age_w_l = np.random.normal(loc=2, scale=2, size=n)
age_m_s = np.random.normal(loc=.5, scale=2, size=n)
age_m_l = np.random.normal(loc=2.5, scale=2, size=n)
age = np.concatenate([age_w_s, age_w_l, age_m_s, age_m_l])

typ = np.repeat(["wild-type", "wild-type", "mutant", "mutant"], repeats=[len(age_w_s), len(age_w_l), len(age_m_s), len(age_m_l)])
size = np.repeat(["small", "large", "small", "large"], repeats=[len(age_w_s), len(age_w_l), len(age_m_s), len(age_m_l)])
df_2 = pd.DataFrame({'type': typ, 'size': size, 'age': age})
df_2

In [None]:
# Plotting the data
df_2.boxplot(by=['type', 'size'], figsize=(5,4))

In [None]:
two_way_result = smf.ols('age ~ C(type) + C(size)', data=df_2).fit()
sm.stats.anova_lm(two_way_result)

In [None]:
two_way_result = smf.ols('age ~ C(type) + C(size) + C(type):C(size)', data=df_2).fit()
sm.stats.anova_lm(two_way_result)

In [None]:
# Exercise: Generate data with an interaction effect 





