In [1]:
import numpy       as np
import pandas      as pd
import scipy.stats as stats
import statsmodels.api         as     sm
from   statsmodels.formula.api import ols

#### example 1

In [2]:
sectionA = np.array([1,2,3])
sectionB = np.array([2,3,5,4,7])
sectionC = np.array([6,8,10,9]) 

In [3]:
# normality tests
stats.shapiro(sectionA)
stats.shapiro(sectionB)
stats.shapiro(sectionC)

ShapiroResult(statistic=0.9713736176490784, pvalue=0.8499705791473389)

In [4]:
# variance equality test
stats.levene(sectionA, sectionB, sectionC)

LeveneResult(statistic=0.5483558994197292, pvalue=0.5960483263976416)

In [5]:
# one way ANOVA
f_test, p_val = stats.f_oneway(sectionA, sectionB, sectionC)
print(f_test)
print(p_val)

12.760273972602741
0.0023590748981715767


In [6]:
# one way ANOVA using statsmodels OLS
marks_df = pd.DataFrame()

In [8]:
df1= pd.DataFrame({'section': 'A', 'marks':sectionA})
df2= pd.DataFrame({'section': 'B', 'marks':sectionB})
df3= pd.DataFrame({'section': 'C', 'marks':sectionC})

In [9]:
marks_df = marks_df.append(df1) 
marks_df = marks_df.append(df2) 
marks_df = marks_df.append(df3) 

In [10]:
marks_df.head()
marks_df.shape

(12, 2)

In [11]:
mod = ols('marks ~ section', data = marks_df).fit()
aov_table = sm.stats.anova_lm(mod, typ=2)
print(aov_table)

          sum_sq   df          F    PR(>F)
section    72.45  2.0  12.760274  0.002359
Residual   25.55  9.0        NaN       NaN


In [12]:
crit = stats.f.ppf(q = 1-0.05, dfn = 2, dfd = 9)
print('F critical value for 2 and 9 df with .95 confidence %3.2f' %crit)

F critical value for 2 and 9 df with .95 confidence 4.26


In [13]:
p_value = 1 -stats.f.cdf(12.760274, dfn = 2, dfd = 9)
print('P value for 2 and 9 df with .95 confidence for the calculated F value %3.5f' % p_value)

P value for 2 and 9 df with .95 confidence for the calculated F value 0.00236


#### example 2

In [19]:
sectionA = np.array([5,4,6,7,2])
sectionB = np.array([9,0,1,2])
sectionC = np.array([1,5,9]) 

In [20]:
# one way ANOVA
f_test, p_val = stats.f_oneway(sectionA, sectionB, sectionC)
print(f_test)
print(p_val)

0.4393078512396694
0.6575955772545194


In [21]:
# one way ANOVA using statsmodels OLS
marks_df = pd.DataFrame()

In [22]:
df1= pd.DataFrame({'section': 'A', 'marks':sectionA})
df2= pd.DataFrame({'section': 'B', 'marks':sectionB})
df3= pd.DataFrame({'section': 'C', 'marks':sectionC})

In [23]:
marks_df = marks_df.append(df1) 
marks_df = marks_df.append(df2) 
marks_df = marks_df.append(df3) 

In [25]:
# if data was in a CSV file like marks.csv we could have directly read it and gone to next step
# it will have two colums, section and marks

In [24]:
mod = ols('marks ~ section', data = marks_df).fit()
aov_table = sm.stats.anova_lm(mod, typ=2)
print(aov_table)

          sum_sq   df         F    PR(>F)
section     9.45  2.0  0.439308  0.657596
Residual   96.80  9.0       NaN       NaN
