In [1]:
import numpy as np
from scipy.stats import ttest_1samp, wilcoxon
from statsmodels.stats.power import ttest_power

### `Wilcoxon`
- The **one-sample Wilcoxon** signed-rank test is a non-parametric alternative to a one-sample t-test when the data cannot be assumed to be normally distributed. It's used to determine whether the median of the sample is equal to a known standard value (i.e. theoretical value).


### `Power of test`
- The **power of a binary hypothesis test** is the probability that the test rejects the null hypothesis (H0) when a specific alternative hypothesis (H1) is true — i.e., it indicates the probability of making a type II error. The statistical power ranges from 0 to 1, and as statistical power increases, the probability of making a type II error (wrongly failing to reject the null hypothesis) decreases.

**Power** = 1 − β = 1 − P(type II error) 

In [2]:
# daily intake of energy in kJ for 11 women
daily_intake = np.array([5260,5470,5640,6180,6390,6515,
                         6805,7515,7515,8230,8770])

In [3]:
# one sample t-test
# null hypothesis: expected value = 7725
t_statistic, p_value = ttest_1samp(daily_intake, 7725)
print(t_statistic,p_value)

-2.8207540608310198 0.018137235176105812


In [4]:
daily_intake = np.array([5260,5470,5640,6180,6390,6515,
                         6805,7515,7515,8230,8770])
mue = 7725
Tstatic = (np.mean(daily_intake) - mue)/(np.std(daily_intake,ddof=1)/np.sqrt(11))
print(Tstatic)

-2.8207540608310198


## Question_1
## How is t-statistic calculated(ttest_1samp) in above block(block_3). what is the formula used here? Manual numpy calc is not matching with answer. 

From the videos, I see the formula for t-statistic as 
(x_bar - mue)/(std_pop/sqrt(size of sample))

I calculated in two ways as shown below block . I did not get -2.8207540608310198 in both ways.
(please see z1,z2 in below block)
Also how is p_value(0.018137235176105812) calculated in the above example. Please explain.

## Question 2:
why in block 7, sqrt(n) not used in denominator.



In [9]:
daily_intake = np.array([5260,5470,5640,6180,6390,6515,
                         6805,7515,7515,8230,8770])
mue = 7725
z1 = (np.mean(daily_intake) - mue)/(np.std(daily_intake)/np.sqrt(11))
print(z1)

z2 = (np.mean(daily_intake) - mue)/(np.std(daily_intake))
print(z2)

mue = 7725
z1 = (np.mean(daily_intake) - mue)/(np.std(daily_intake)/np.sqrt(10))
print(z1)


mue = 7725
z1 = (np.mean(daily_intake) - mue)/(np.std(daily_intake,ddof=1)/np.sqrt(11))
print(z1)

mue = 7725
z1 = (np.mean(daily_intake) - mue)/(np.std(daily_intake,ddof=1)/np.sqrt(11))
print(z1)


t_statistic, p_value = ttest_1samp(daily_intake, 7725)
print(t_statistic,p_value)

-2.9584318175114594
-0.8920007551395173
-2.82075406083102
-2.8207540608310198
-2.8207540608310198
-2.8207540608310198 0.018137235176105812


In [5]:
# p_value < 0.05 => alternative hypothesis:
# data deviate significantly from the hypothesis that the mean
# is 7725 at the 5% level of significance
print ("one-sample t-test p-value=", p_value)

one-sample t-test p-value= 0.018137235176105812


In [6]:
# one sample wilcoxon-test
z_statistic, p_value = wilcoxon(daily_intake - 7725)
print ("one-sample wilcoxon-test p-value", p_value)

one-sample wilcoxon-test p-value 0.0244140625


In [7]:
# Calculating Power of Test
# Compute the difference in Means between Population and Sample and divide it by Standard Deviation
# Set the number of Observations
# Set the alpha value and alternative values 'two-sided' , 'larger' , 'smaller'

(np.mean(daily_intake) - 7725) / np.std(daily_intake)

-0.8920007551395173

In [8]:
print(ttest_power(-0.892, nobs=11, alpha=0.05, alternative='two-sided'))

0.760014659598261
