# Lesson 7 aka Lesson 13 ANOVA Continued

In [13]:
import numpy as np
import pandas as pd
import scipy.stats as stats

## Grand Mean

In [3]:
foods_df = pd.DataFrame.from_dict({'a': [2, 4, 3], 'b': [6, 5, 7], 'c': [8, 9, 10]})
foods_df

Unnamed: 0,a,b,c
0,2,6,8
1,4,5,9
2,3,7,10


In [4]:
grand_mean = np.array([foods_df['a'].mean(), foods_df['b'].mean(), foods_df['c'].mean()]).mean()
print 'Grand mean: {:.2f}'.format(grand_mean)

Grand mean: 6.00


## Group Means

In [7]:
a_mean = foods_df['a'].mean()
b_mean = foods_df['b'].mean()
c_mean = foods_df['c'].mean()

print 'Group means: a: {:.2f}, b: {:.2f}, c: {:.2f}'.format(a_mean, b_mean, c_mean)

Group means: a: 3.00, b: 6.00, c: 9.00


## Sum of Squares Between Groups

In [8]:
samp_size = foods_df['a'].count()
a_ss = (a_mean - grand_mean)**2
b_ss = (b_mean - grand_mean)**2
c_ss = (c_mean - grand_mean)**2
btwn_ss = samp_size * np.array([a_ss, b_ss, c_ss]).sum()

print 'Sum of squares between groups: {:.2f}'.format(btwn_ss)

Sum of squares between groups: 54.00


## Sum of Squares Within Groups

In [9]:
wi_ss = np.array([(foods_df['a'] - a_mean)**2, (foods_df['b'] - b_mean)**2, (foods_df['c'] - c_mean)**2]).sum()

print 'Sum of squares within groups: {:.2f}'.format(wi_ss)

Sum of squares within groups: 6.00


## Degrees of Freedom

In [10]:
btwn_dof = 2
wi_dof = 6

## Mean Squares

In [11]:
btwn_ms = btwn_ss / btwn_dof
wi_ms = wi_ss / wi_dof

print 'Mean squares: between: {:.2f}, within: {:.2f}'.format(btwn_ms, wi_ms)

Mean squares: between: 27.00, within: 1.00


## F-Statistic

In [15]:
f_stat = stats.f_oneway(foods_df['a'], foods_df['b'], foods_df['c'])

print 'F-statistic: {:.2f}'.format(f_stat[0])

F-statistic: 27.00


## Deviation from Grand Mean

In [19]:
gm_dev = ((foods_df - grand_mean)**2).values.sum()

print 'Deviation from grand mean: {:.2f}'.format(gm_dev)

Deviation from grand mean: 60.00


## Tukey's HSD

In [20]:
q = 4.34
tukeys = q * np.sqrt(wi_ms / samp_size)

print "Tukey's: {:.2f}".format(tukeys)

Tukey's: 2.51


## Significant Differences

In [21]:
ab_diff = a_mean - b_mean
bc_diff = b_mean - c_mean
ac_diff = a_mean - c_mean

print 'Differences: ab: {}, bc: {}, ac: {}'.format(ab_diff, bc_diff, ac_diff)

Differences: ab: -3.0, bc: -3.0, ac: -6.0


## Cohen's d for Mulitple Comparisons

In [22]:
pooled_var = np.sqrt(wi_ms)
ab_d = (a_mean - b_mean) / pooled_var
bc_d = (b_mean - c_mean) / pooled_var
ac_d = (a_mean - c_mean) / pooled_var

print "Cohen's: ab: {:.2f}, bc: {:.2f}, ac: {:.2f}".format(ab_d, bc_d, ac_d)

Cohen's: ab: -3.00, bc: -3.00, ac: -6.00


## Ada^2

In [23]:
ada_sq = btwn_ss / (btwn_ss + wi_ss)

print 'Ada^2: {:.2f}'.format(ada_sq)

Ada^2: 0.90


## Different Sample Sizes

In [25]:
drugs_df = pd.DataFrame.from_dict({'p': [1.5, 1.3, 1.8, 1.6, 1.3, None, None], 
                                   '1': [1.6, 1.7, 1.9, 1.2, None, None, None], 
                                    '2': [2.0, 1.4, 1.5, 1.5, 1.8, 1.7, 1.4],
                                    '3': [2.9, 3.1, 2.8, 2.7, None, None, None]
                                  })
drugs_df

Unnamed: 0,1,2,3,p
0,1.6,2.0,2.9,1.5
1,1.7,1.4,3.1,1.3
2,1.9,1.5,2.8,1.8
3,1.2,1.5,2.7,1.6
4,,1.8,,1.3
5,,1.7,,
6,,1.4,,


In [27]:
p_mean = drugs_df['p'].mean()
one_mean = drugs_df['1'].mean()
two_mean = drugs_df['2'].mean()
three_mean = drugs_df['3'].mean()

print 'Means: p: {:.2f}, 1: {:.2f}, 2: {:.2f}, 3: {:.2f}'.format(p_mean, one_mean, two_mean, three_mean)

Means: p: 1.50, 1: 1.60, 2: 1.61, 3: 2.88
