In [281]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [287]:
df = pd.read_csv("sleep_omega3.csv")

df.columns
data = df.loc[(df['group']!=5) & (df['group']!=6),['group', 'Omega3_base', 'Omega3_3months', 'sleep_cbcl_base',
       'sleep_cbcl_3months', 'sleep_cbcl_6months', 'sleep_cbcl_12months']]
data['group_omega3'] = 1 * ((data['group']==2) | (data['group']==3))
data['group_cbt'] = 1 * ((data['group']==1) | (data['group']==3))
data['group_omega3_cbt'] = 1 * (data['group']==3)
data.head()

Unnamed: 0,group,Omega3_base,Omega3_3months,sleep_cbcl_base,sleep_cbcl_3months,sleep_cbcl_6months,sleep_cbcl_12months,group_omega3,group_cbt,group_omega3_cbt
0,2,21.114324,25.688973,0.0,1.0,0.0,0.0,1,0,0
1,4,15.382893,,3.0,,,,0,0,0
2,2,16.724842,17.01435,4.0,5.0,3.0,3.0,1,0,0
3,4,15.321093,16.146781,4.0,1.0,1.0,2.0,0,0,0
4,3,16.370078,16.82079,0.0,0.0,,0.0,1,1,1


In [288]:
def short_smry(series):
    return str(round(np.mean(series),2)) + ' (' + str(round(np.std(series),2)) + ')'
data_smry= (
    data
    .groupby('group_omega3')
    .agg(short_smry)
    .drop(['group', 'group_cbt', 'group_omega3_cbt'], axis = 1)
)
data_smry
data_smry.to_csv(r'cbcl_table.csv')

In [289]:
stats.f_oneway(
    data['sleep_cbcl_base'][data['group'] == 1].dropna(), 
    data['sleep_cbcl_base'][data['group'] == 2].dropna(),
    data['sleep_cbcl_base'][data['group'] == 3].dropna(),
    data['sleep_cbcl_base'][data['group'] == 4].dropna()
)

F_onewayResult(statistic=1.6197876790301087, pvalue=0.1849798978443969)

In [290]:
def anova(x, y):
    print(y + ' ~ ' + x)
    result = ols(y + '~ C(' + x + ')', data).fit()
    print(sm.stats.anova_lm(result, typ=2))

for group in ['group', 'group_omega3', 'group_cbt', 'group_omega3_cbt']:
    for variable in ['sleep_cbcl_base','sleep_cbcl_3months', 'sleep_cbcl_6months', 'sleep_cbcl_12months']:
        anova(group, variable)

sleep_cbcl_base ~ group
              sum_sq     df         F   PR(>F)
C(group)   12.494748    3.0  1.619788  0.18498
Residual  717.385111  279.0       NaN      NaN
sleep_cbcl_3months ~ group
              sum_sq     df         F    PR(>F)
C(group)    1.993783    3.0  0.237532  0.870116
Residual  553.986415  198.0       NaN       NaN
sleep_cbcl_6months ~ group
              sum_sq     df         F    PR(>F)
C(group)    9.617048    3.0  1.624545  0.184971
Residual  386.762952  196.0       NaN       NaN
sleep_cbcl_12months ~ group
              sum_sq     df         F    PR(>F)
C(group)   12.363500    3.0  1.479431  0.221903
Residual  476.345072  171.0       NaN       NaN
sleep_cbcl_base ~ group_omega3
                     sum_sq     df         F    PR(>F)
C(group_omega3)    3.755545    1.0  1.453344  0.229006
Residual         726.124313  281.0       NaN       NaN
sleep_cbcl_3months ~ group_omega3
                     sum_sq     df         F    PR(>F)
C(group_omega3)    1.980198    1.0  