#### Import statements

In [108]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_1samp, wilcoxon
from statsmodels.stats.power import ttest_power
import scipy.stats as scipy_stat
from statsmodels.stats.weightstats import ztest

In [109]:
# daily intake of energy in kJ for 11 women
daily_intake = np.array([5260,5470,5640,6180,6390,6515,
                         6805,7515,7515,8230,8770])

In [110]:
# one sample t-test
# null hypothesis: expected value = 7725
t_statistic, p_value = ttest_1samp(daily_intake, 7725)
print(t_statistic,p_value)

-2.8207540608310198 0.018137235176105812


In [111]:
# p_value < 0.05 => alternative hypothesis:
# data deviate significantly from the hypothesis that the mean
# is 7725 at the 5% level of significance
print ("one-sample t-test p-value=", p_value)

one-sample t-test p-value= 0.018137235176105812


In [112]:
# one sample wilcoxon-test
z_statistic, p_value = wilcoxon(daily_intake - 7725)
print ("one-sample wilcoxon-test p-value", p_value)

one-sample wilcoxon-test p-value 0.0244140625


In [113]:
# Calculating Power of Test
# Compute the difference in Means between Population and Sample and divide it by Standard Deviation
# Set the number of Observations
# Set the alpha value and alternative values 'two-sided' , 'larger' , 'smaller'

(np.mean(daily_intake) - 7725) / np.std(daily_intake)

-0.8920007551395173

In [114]:
print(ttest_power(-0.892, nobs=11, alpha=0.05, alternative='two-sided'))

0.7600146595982609


#### Workout

In [115]:
data = pd.read_csv("insurance.csv")

In [116]:
data.info()
data["age"].mean()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


39.20702541106129

##### Two tailed test
H0 - Average age is 40  /  Ha - Average age is not equal to 40

In [117]:
# Using scipy package
std1, pvalue1 = ttest_1samp(data["age"],40)
print(std1,pvalue1)

# p-value less than 5% - Ho rejected.

-2.064487357318845 0.03916402791493093


In [118]:
# Using statsmodels package
std2, pvalue2 = ztest(data["age"],value=40)
print(std2,pvalue2)

-2.064487357318845 0.03897153137969687


##### One tailed test - lower tail
Ho - Average age is greater than equal to 40 /  Ha - Average age is less than 40

In [119]:
def one_sample_one_tailed(sample_data, popmean, alpha=0.05, alternative='greater'):
    t, p = ttest_1samp(sample_data, popmean)
    print ('t:',t)
    print ('p:',p)
    if alternative == 'larger' and (p/2 < alpha) and t > 0:
        print ('Reject Null Hypothesis for greater-than test')
    elif alternative == 'smaller' and (p/2 < alpha) and t < 0:
        print ('Reject Null Hypothesis for less-thane test')
    else:
        print ('Failed to reject Null Hypothesis')

In [120]:
# Using scipy package
# std1, pvalue1 = ttest_1samp(data["age"],40)
# pvalue1 = pvalue1 /2
# print(std1,pvalue1)
# print(ttest_1samp(data["age"],40))

one_sample_one_tailed(data["age"],40,alternative='smaller')


# ttest_1samp - question to be raised
# if alternative == 'less' and (p/2 < alpha) and t < 0:


t: -2.064487357318845
p: 0.03916402791493093
Reject Null Hypothesis for less-thane test


In [121]:
std2, pvalue2 = ztest(data["age"],value=40,alternative="smaller")
print(std2,pvalue2)
# p-value less than 5% - Ho rejected.  

-2.064487357318845 0.019485765689848434


##### One tailed test - upper tail
Ho - Average age is less than equal to 40 /  Ha - Average age is greater than 40

In [122]:
one_sample_one_tailed(data["age"],40,alternative='larger')

t: -2.064487357318845
p: 0.03916402791493093
Failed to reject Null Hypothesis


In [123]:
std2, pvalue2 = ztest(data["age"],value=40,alternative="larger")
print(std2,pvalue2)
# p-value greater than 5% - Failed to reject Ho  

-2.064487357318845 0.9805142343101516
