In [12]:
import numpy as np
from scipy.stats import ttest_1samp, ttest_ind, mannwhitneyu, levene, shapiro, wilcoxon
from statsmodels.stats.power import ttest_power

In [2]:
energ = np.array([
# energy expenditure in mJ and stature (0=obese, 1=lean)
[9.21, 0],
[7.53, 1],
[7.48, 1],
[8.08, 1],
[8.09, 1],
[10.15, 1],
[8.40, 1],
[10.88, 1],
[6.13, 1],
[7.90, 1],
[11.51, 0],
[12.79, 0],
[7.05, 1],
[11.85, 0],
[9.97, 0],
[7.48, 1],
[8.79, 0],
[9.69, 0],
[9.68, 0],
[7.58, 1],
[9.19, 0],
[8.11, 1]])

In [3]:
# Seperating the data into 2 groups
group1 = energ[:, 1] == 0
group1 = energ[group1][:, 0]
group2 = energ[:, 1] == 1
group2 = energ[group2][:, 0]

In [4]:
# two-sample t-test
# null hypothesis: the two groups have the same mean
# this test assumes the two groups have the same variance...
# (can be checked with tests for equal variance - Levene)
# independent groups: e.g., how boys and girls fare at an exam
# dependent groups: e.g., how the same class fare at 2 different exams
t_statistic, p_value = ttest_ind(group1, group2)
print(t_statistic, p_value)

3.9455649161549835 0.0007989982111700593


In [5]:
# p_value < 0.05 => alternative hypothesis:
# they don't have the same mean at the 5% significance level
print ("two-sample t-test p-value=", p_value)

two-sample t-test p-value= 0.0007989982111700593


In [6]:
# two-sample wilcoxon test
# a.k.a Mann Whitney U - Used when samples are not normally distributed
u, p_value = mannwhitneyu(group1, group2)
print ("two-sample wilcoxon-test p-value=", p_value)

two-sample wilcoxon-test p-value= 0.0010608066929400244


In [7]:
# pre and post-surgery energy intake
intake = np.array([
[5260, 3910],
[5470, 4220],
[5640, 3885],
[6180, 5160],
[6390, 5645],
[6515, 4680],
[6805, 5265],
[7515, 5975],
[7515, 6790],
[8230, 6900],
[8770, 7335],
])

In [8]:
# Seperating data into 2 groups
pre = intake[:, 0]
post = intake[:, 1]

In [9]:
# paired t-test: doing two measurments on the same experimental unit
# e.g., before and after a treatment
t_statistic, p_value = ttest_1samp(post - pre, 0)
print(t_statistic, p_value)

-11.941392877647603 3.059020942934875e-07


In [10]:
# p < 0.05 => alternative hypothesis:
# the difference in mean is not equal to 0
print ("paired t-test p-value=", p_value)

paired t-test p-value= 3.059020942934875e-07


In [20]:
# alternative to paired t-test when data has an ordinary scale or when not
# normally distributed
z_statistic, p_value = wilcoxon(post - pre)

print ("paired wilcoxon-test p-value=", p_value)

paired wilcoxon-test p-value= 0.0033300139117459797


In [21]:
# For checking equality of variance between groups
# Null Hypothesis: Variances are equal
levene(pre,post)

LeveneResult(statistic=0.07826946818973061, pvalue=0.7825293782115069)

In [22]:
# For checking Normality distribution of each distribution
# Null Hypothesis: Distribution is Normal
shapiro(post)

(0.9363633990287781, 0.47873449325561523)

In [23]:
# Calculating Power of Test
# Compute the difference in Means between 2 sample means and divide by pooled Standard Deviation 
# number of Observations/tuples
# Set the alpha value to 0.05 and alternative values 'two-sided' , 'larger' , 'smaller'
(np.mean(pre) - np.mean(post)) / np.sqrt(((11-1)*np.var(pre)+(11-1)*np.var(post)) / 11+11-2)



0.8703477369680591

In [24]:
print(ttest_power(0.87, nobs=11, alpha=0.05, alternative='two-sided'))

0.7392948947066131


In [25]:
# Caculating power of test for the energ dataset

(np.mean(group1) - np.mean(group2)) / np.sqrt(((9-1)*np.var(group1)+(13-1)*np.var(group2)) / 9+13-2)

0.5874624977414711

In [26]:
print(ttest_power(0.587, nobs=22, alpha=0.10, alternative='two-sided'))

0.8456736280306766
