# Hypotheis Testing Exercise

## Calculate the p-value:

The p-value is calculated based on the chosen distribution and the test statistic value. 
- Lower-tailed test: p-value = CDF(test statistic). 
- Upper-tailed test: p-value = 1 - CDF(test statistic). 
- Two-tailed test: p-value = 2 * min{CDF(test statistic), 1 - CDF(test statistic)}. If the distribution is symmetric around 0, you can simplify this to: p-value = 2 * CDF(-|test statistic|). 

In [None]:
import numpy as np
from scipy import stats

results = []

# 1. One-sample z-test
n1,mean1,sample_mean1, std_dev1, alpha1 = 25,70,69,3,0.05
z1 =(sample_mean1 - mean1)/(std_dev1 / np.sqrt(n1))
p1 = 2 * (1 - stats.norm.cdf(abs(z1)))
results.append(("Q1", z1,p1,p1 < alpha1))

# 2. One-sample z-test
n2,mean2,sample_mean2, std_dev2, alpha2 = 100, 800,790,40,0.05
z2 = (sample_mean2 - mean2) / (std_dev2/np.sqrt(n2))
p2 = 2 * (1 - stats.norm.cdf(abs(z2)))
results.append(("Q2", z2,p2,p2 < alpha2))

# 3. One-sample z-test, left-tailed
n3,mean3,sample_mean3,std_dev3,alpha3 =  36,1200,1182,60,0.02
z3 = (sample_mean3 - mean3) / (std_dev3 / np.sqrt(n3))
p3 = stats.norm.cdf(z3)
results.append(("Q3", z3,p3,p3<alpha3))
results.append(("Q3",z3,p3, p3 < alpha3))

# 4. One-sample z-test
n4,mean4,sample_mean4,std_dev4,alpha4 = 20,6,6.05,0.2,0.05
z4 = (sample_mean4 -mean4) /(std_dev4 / np.sqrt(n4))
p4 = 2 * (1-stats.norm.cdf(abs(z4)))
results.append(("Q4", z4,p4,p4<alpha4))

# 5.One-sample t-test
sample5 = [64,57,65,59,66,58,68,67]
t5,p5 = stats.ttest_1samp(sample5,popmean=60)
results.append(("Q5", t5,p5,p5< 0.10))

# 6. One-sample t-test (right tailed)
sample6 = [392,396,386,309,388,384,403,397,401,391,400,402,394,394,406,406,400]
t6,p6 = stats.ttest_1samp(sample6, popmean=400, alternative="greater")
results.append(("Q6", t6,p6,p6 < 0.05))

# 7. One-proportion z-test
n7,x7,p0_7,alpha7 = 500,235,0.40,0.05
phat7 = x7/n7
z7 = (phat7 - p0_7)/np.sqrt(p0_7* (1-p0_7)/n7)
p7 = stats.norm.cdf(z7) # left tail
results.append(("Q7", z7,p7,p7<alpha7))

# 8. One-proportion z-test
n8,x8,p0_8, alpha8 = 900,243,0.30,0.05
phat8 = x8/n8
z8 = (phat8-p0_8) /np.sqrt(p0_8 * (1-p0_8)/n8)
p8 = 1-stats.norm.cdf(z8)
results.append(("Q8",z8,p8,p8<alpha8))

# 9. Paired t-test
s1 = [64,66,89,77]
s2 = [62,68,85,82]
t9,p9 = stats.ttest_rel(s1,s2)

# 10. Paired t-test
s1_10 = [68,65,66,66,67,66,66,64,69,63]
s2_10 = [65,62,64,65,65,4,59,63,65,58]
t10, p10 =stats.ttest_rel(s1_10, s2_10)
results.append(("Q10",t10,p10,p10<0.05))

print(results)

[('Q1', np.float64(-1.6666666666666667), np.float64(0.09558070454562939), np.False_), ('Q2', np.float64(-2.5), np.float64(0.012419330651552318), np.True_), ('Q3', np.float64(-1.8), np.float64(0.03593031911292579), np.False_), ('Q3', np.float64(-1.8), np.float64(0.03593031911292579), np.False_), ('Q4', np.float64(1.118033988749891), np.float64(0.2635524772829745), np.False_), ('Q5', np.float64(1.9540168418367887), np.float64(0.09162379533890302), np.True_), ('Q6', np.float64(-1.6519512342151643), np.float64(0.9409846004338415), np.False_), ('Q7', np.float64(3.195048252113467), np.float64(0.9993009626793942), np.False_), ('Q8', np.float64(-1.9639610121239295), np.float64(0.9752326932821865), np.False_), ('Q10', np.float64(1.5207949430752485), np.float64(0.1626352860001018), np.False_)]


In [18]:
import numpy as np
from scipy import stats
mu_0 = 400
n = 17
sample_data = [392, 396, 386, 309, 388, 384, 403, 397, 401, 391, 400, 402, 394, 394, 406, 406, 400]
alpha = 0.05
x_bar = sum(sample_data)/n
print(f"x_bar = {x_bar}")

s = np.sqrt(sum([pow(i - x_bar, 2) for i in sample_data])/(n-1))
print(f"s = {s}")

t = (x_bar-mu_0) / (s/np.sqrt(n))
print(f"t = {t}")

p_value =  1 - stats.t.cdf(t, n-1)
print(f"p_value = {p_value}")

x_bar = 391.11764705882354
s = 22.169467610153543
t = -1.6519512342151643
p_value = 0.9409846004338414


In [22]:
n7, x7, p0_7, alpha7 = 500, 235, 0.40, 0.05
phat7 = x7 / n7
print(f" phat = {phat7}")
z7 = (phat7 - p0_7) / np.sqrt(p0_7 * (1 - p0_7) / n7)
print(f"z = {z7}")
p7 = 1 - stats.norm.cdf(z7)  
print(f" p_value = {p7}")

 phat = 0.47
z = 3.195048252113467
 p_value = 0.0006990373206058331


In [None]:
n8,x8,p0_8, alpha8 = 900,243,0.30,0.05
phat8 = x8/n8
print(f" phat = {phat8}")

z8 = (phat8-p0_8) /np.sqrt(p0_8 * (1-p0_8)/n8)
print(f"z = {z8}")

p8 = 1-stats.norm.cdf(z8)
print(f" p_value = {p8}")


 phat = 0.27
z = -1.9639610121239295
 p_value = 0.9752326932821865


In [35]:
import numpy as np
from scipy import stats
s1 = [64,66,89,77]
s2 = [62,68,85,82]
n = 4
d = [a - b for a,b in zip (s1,s2)]
d_bar = sum(d) / n
print(f"d_bar = {d_bar}")
s_d = np.sqrt(sum([pow(i - d_bar,2) for i in d]) / (n -1))
print(f"s_d = {s_d}")
t = (d_bar - 0) / (s_d /np.sqrt(n))
print(f"t = {t}")
p_value = 2 * (1 - stats.t.cdf(abs(t), n-1))
print(f"p_value = {p_value}")


d_bar = -0.25
s_d = 4.031128874149275
t = -0.12403473458920847
p_value = 0.90913171228949


In [38]:
import numpy as np
from scipy import stats
s1 = [68,65,66,66,67,66,66,64,69,63]
s2 = [65,62,64,65,65,64,59,63,65,58]
n = 10
d = [a - b for a,b in zip (s1,s2)]
d_bar = sum(d) / n
print(f"d_bar = {d_bar}")
s_d = np.sqrt(sum([pow(i - d_bar,2) for i in d]) / (n -1))
print(f"s_d = {s_d}")
t = (d_bar - 0) / (s_d /np.sqrt(n))
print(f"t = {t}")
p_value = 2 * (1 - stats.t.cdf(abs(t), n-1))
print(f"p_value = {p_value}")


d_bar = 3.0
s_d = 1.8856180831641267
t = 5.031152949374528
p_value = 0.0007082552579338319


In [1]:
from scipy.stats import norm
print(norm.ppf(0.95))
print(norm.cdf(1.6448536269514722))

1.6448536269514722
0.95
