In [14]:
import numpy as np
from scipy.stats import ttest_1samp, ttest_ind, mannwhitneyu, levene, shapiro, wilcoxon
from statsmodels.stats.power import ttest_power
import pandas as pd

In [27]:
weight = np.array([
# sugar consumption in grams and stature (0=diabetic, 1=non diabetic)
[9.31, 0],
[7.76, 0],
[6.98, 1],
[7.88, 1],
[8.49, 1],
[10.05, 1],
[8.80, 1],
[10.88, 1],
[6.13, 1],
[7.90, 1],
[11.51, 0],
[12.59, 0],
[7.05, 1],
[11.85, 0],
[9.99, 0],
[7.48, 0],
[8.79, 0],
[8.69, 1],
[9.68, 0],
[8.58, 1],
[9.19, 0],
[8.11, 1]])

In [28]:
# Seperating the data into 2 groups
group1 = weight[:, 1] == 0
group1 = weight[group1][:, 0]
group2 = weight[:, 1] == 1
group2 = weight[group2][:, 0]

In [29]:
# two-sample t-test
# null hypothesis: the two groups have the same mean
# this test assumes the two groups have the same variance...
# (can be checked with tests for equal variance - Levene)
# independent groups: e.g., how boys and girls fare at an exam
# dependent groups: e.g., how the same class fare at 2 different exams
t_statistic, p_value = ttest_ind(group1, group2)
print(t_statistic, p_value)

2.3730593334 0.0277774161135


In [30]:
# p_value < 0.05 => alternative hypothesis:
# they don't have the same mean at the 5% significance level
print ("two-sample t-test p-value=", p_value)

two-sample t-test p-value= 0.0277774161135


In [31]:
# two-sample wilcoxon test
# a.k.a Mann Whitney U - Used when samples are not normally distributed
u, p_value = mannwhitneyu(group1, group2)
print ("two-sample wilcoxon-test p-value=", p_value)

two-sample wilcoxon-test p-value= 0.0221568961416


In [32]:
# pre and post-Exercise food energy intake
intake = np.array([
[5460, 3980],
[5230, 4890],
[5640, 3885],
[6180, 5160],
[6390, 5645],
[6512, 4650],
[6765, 6109],
[7515, 5975],
[7515, 6790],
[8230, 6970],
[8770, 7335],
])

In [33]:
# Seperating data into 2 groups
pre = intake[:, 0]
post = intake[:, 1]

In [34]:
# paired t-test: doing two measurments on the same experimental unit
# e.g., before and after exercise
t_statistic, p_value = ttest_1samp(post - pre, 0)
print(t_statistic, p_value)

-7.75063325783 1.55218941481e-05


In [35]:
# p < 0.05 => alternative hypothesis:
# the difference in mean is not equal to 0
print ("paired t-test p-value=", p_value)

paired t-test p-value= 1.55218941481e-05


In [36]:
# alternative to paired t-test when data has an ordinary scale or when not
# normally distributed
z_statistic, p_value = wilcoxon(post - pre)

print ("paired wilcoxon-test p-value=", p_value)

paired wilcoxon-test p-value= 0.00334561811585


In [37]:
# For checking equality of variance between groups
# Null Hypothesis: Variances are equal
levene(pre,post)

LeveneResult(statistic=0.055086879201557228, pvalue=0.81682315923152338)

In [38]:
# For checking Normality distribution of each distribution
# Null Hypothesis: Distribution is Normal
shapiro(post)
shapiro(pre)

(0.9533762335777283, 0.6872939467430115)

In [39]:
# Calculating Power of Test
# Compute the difference in Means between 2 sample means and divide by pooled Standard Deviation 
# number of Observations/tuples
# Set the alpha value to 0.05 and alternative values 'two-sided' , 'larger' , 'smaller'
(np.mean(pre) - np.mean(post)) / np.sqrt(((11-1)*np.var(pre)+(11-1)*np.var(post)) / 11+11-2)



0.77970485269756429

In [40]:
print(ttest_power(0.79, nobs=11, alpha=0.05, alternative='two-sided'))

0.65686785415


In [41]:
# Caculating power of test for the energ dataset

(np.mean(group1) - np.mean(group2)) / np.sqrt(((9-1)*np.var(group1)+(13-1)*np.var(group2)) / 9+13-2)

0.38743013314377195

In [42]:
print(ttest_power(0.528, nobs=22, alpha=0.10, alternative='two-sided'))

0.773595410907
