# 1 sample test

## Testing means with unknown variance

In [1]:
from scipy import stats
import numpy as np

### H0: μ = 10 <br>  Ha: μ $\neq$ 10 

In [2]:
# Sample of 10 containers
sample = [10.2, 9.7, 10.1, 10.3, 10.1, 9.8, 9.9, 10.4, 10.3, 9.8]

In [3]:
hypothesized_mean = 10

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_1samp.html

In [4]:
# Using the ttest_1samp function to test the sample mean 
# Note that by default, the funtion returns the p-value for a 2 tailed test
t_statistic, p_value = stats.ttest_1samp(sample, hypothesized_mean)

In [5]:
p_value

0.46004898227096647

In [6]:
α = 0.05
if p_value < α:
    print('Reject H0')
else:
    print('Cannot reject H0')

Cannot reject H0


### H0: μ $\geq$ 10 $\;\;\;\;\;\;$ H0: μ $\leq$ 10  <br>  Ha: μ < 10 $\;\;\;\;\;\;\;$ Ha: μ > 10

In [7]:
# For a one tailed test, you divide the p value by 2
p_value_1tailed = p_value/2

In [8]:
α = 0.05
if p_value_1tailed < α:
    print('Reject H0')
else:
    print('Cannot reject H0')

Cannot reject H0


## Testing proportion

In [9]:
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

In [10]:
'''
proportions_ztest(count, nobs, value=None, alternative='two-sided')

count: number of successes in nobs
nobs: number of trials
value: hypothesized_proportion (in a one sample test)
alternative: alternative hypothesis
             = 'smaller' if p < hypothesized_proportion
             = 'larger'  if p > hypothesized_proportion
             = 'two-sided' if p != hypothesized_proportion
             
prop_var: By default, the standard error is calculated using the sample proportion and not hypothesised proportion.
          This is the classical approach and is not recommended. Hence we have set the value to the hypothesised proprotion = 0.2

'''

sample_proportion = 0.175
sample_size = 400
hypothesized_proportion = 0.2

https://www.statsmodels.org/stable/generated/statsmodels.stats.proportion.proportions_ztest.html

In [11]:
num_successes = sample_proportion*sample_size
num_trials = sample_size

z_statistic, p_value = proportions_ztest(num_successes, num_trials, value = hypothesized_proportion, alternative = 'two-sided', prop_var = 0.2)

### H0: $p$ = 0.2 <br>  Ha: $p$ $\neq$ 0.2

In [12]:
α = 0.05
if p_value < α:
    print('Reject H0')
else:
    print('Cannot reject H0')

Cannot reject H0


# 2 sample tests

## Comparing samples with unknown variances

### H0: $μ_{1}$ = $μ_{2}$ <br>  Ha: $μ_{1}$ $\neq$ $μ_{2}$

In [13]:
sample1 = [10.2, 9.7, 10.1, 10.3, 10.1, 9.8, 9.9, 10.4, 10.3, 9.8]
sample2 = [10.1, 9.2, 10.3, 11, 10.2, 9.1, 9.2, 10.3, 10.7, 9.3]

In [14]:
t_statistic, p_value = stats.ttest_ind(sample1, sample2)

In [15]:
α = 0.05
if p_value < α:
    print('P value = ', p_value)
    print('Reject H0')
else:
    print('P value = ', p_value)
    print('Cannot reject H0')

P value =  0.6099583638259121
Cannot reject H0


## Comparing proportions

The proportion of men and woman who selected watching television as their most leisure time have been summarised in the given table.
<br>
Use α .05 

### Solution

### H0: $p_{1}$ = $p_{2}$ <br>  Ha: $p_{1}$ $\neq$ $p_{2}$

In [16]:
import numpy as np
from statsmodels.stats import proportion

In [17]:
successes = np.array([63, 60])
sample_sizes = np.array([150, 200])

In [18]:
# Testing proportions for two samples
z_statistic, p_value = proportion.proportions_ztest(successes, sample_sizes, alternative = 'two-sided')

In [19]:
z_statistic

2.3270751647490346

In [20]:
α = 0.05
if p_value < α:
    print('P value = ', p_value)
    print('Reject H0')
else:
    print('P value = ', p_value)
    print('Cannot reject H0')

P value =  0.019961265070318403
Reject H0


#### Conclusion: The proportions statistically different

## Comparing Matched Samples

### H0: $μ_{1}$ $\leq$ $μ_{2}$ <br>  Ha: $μ_{1}$ > $μ_{2}$

In [21]:
sample_8hr = [6, 6, 7, 4, 3, 9, 7, 6]
sample_5hr = [5, 4, 7, 3, 5, 8, 5, 6]

In [22]:
from scipy.stats import ttest_rel

In [23]:
test_statistic, p_value = ttest_rel(sample_8hr, sample_5hr)

In [24]:
p_value = p_value/2

α = 0.05
if p_value < α:
    print('Reject H0')
else:
    print('Cannot reject H0')

Cannot reject H0


In [25]:
p_value

0.1084187728094503