<a href="https://colab.research.google.com/github/arnavxyz/scaler_probability_statistics/blob/main/Lec_3_Z_test_contd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import random
from scipy.stats import norm

# Critical Value

In [3]:
# Observed Z-statistic
z = (1850-1800)/(100/50**0.5)
print(z)

p_val = 1 - norm.cdf(z)
print(p_val)

3.5355339059327378
0.00020347600872250293


In [4]:
# mu = 1800 std = 100 n = 50 alpha = 0.01
# step-1: compute Z-critical
z_cr = norm.ppf(1 - 0.01)
print(z_cr)

se = 100/np.sqrt(50)
x_cr = 1800 + (z_cr*se)
print(x_cr)

2.3263478740408408
1832.8995271426638


As avg. weekly sales for 50 stores is 1850 which is greater than 1832, we can reject the null hypothesis

# Confidence Interval

In [5]:
# Observed sales in 50 stores 1850, what can be the population mean
# pop. mean lies between [sample mean - Z99*SE,sample mean + Z99*SE] with 99% confidence
z = norm.ppf(0.99)
lb = 1850 - z*se
ub = 1850 + z*se
lb,ub

(1817.1004728573362, 1882.8995271426638)

# Power of Test

In [6]:
from statsmodels.stats import power

In [7]:
data = [55, 45, 52, 48, 55, 52, 52, 53, 48, 52, 53, 47, 54, 51, 52,
        51, 48, 52, 53, 54, 51, 51, 52, 54, 47, 52, 53, 48, 51, 54]
samp_mean = np.mean(data)
samp_std = np.std(data)
sample_size = len(data)
print(samp_mean, samp_std)

51.333333333333336 2.5342103744997613


In [8]:
hypo_mean = 50
alpha = 0.05
effect_size = (samp_mean - hypo_mean) / samp_std
print(effect_size)

0.5261336417646574


In [9]:
power = power.zt_ind_solve_power(effect_size=effect_size,
                                 nobs1=sample_size,
                                 alpha=alpha,
                                 ratio=0,
                                 alternative='two-sided')

print('Power of the test:', power)

Power of the test: 0.8216812302268112


# Two Sample Z-test

In [10]:
np.random.seed(123)

#generating 100 patients recovery time between 5days and 20days
sample1 = np.random.uniform(5, 20, 100)
#generating 90 patients recovery times between 5 and 30 days
sample2 = np.random.uniform(5, 30, 90)

sample1 = np.round(sample1,0)
sample2 = np.round(sample2,0)

print(sample1[:4])
print(sample2[:4])

[15.  9.  8. 13.]
[18. 22.  8.  8.]


In [11]:
x1 = np.mean(sample1)
x2 = np.mean(sample2)
s1 = np.std(sample1)
s2 = np.std(sample2)
n1= len(sample1)
n2 = len(sample2)

print(x1,s1)
print(x2,s2)

def test_stat(x1,x2,s1,s2,n1,n2):
  num = x1-x2
  den = np.sqrt((s1**2/n1) + (s2**2/n2))
  return num/den

z_stat = test_stat(x1,x2,s1,s2,n1,n2)
print(z_stat)

12.47 3.6890513685770223
17.92222222222222 7.169732333106659
-6.483126653564481


In [12]:
p_val = 2*(1 - norm.cdf(abs(z_stat)))
print(p_val)

8.984102350950707e-11


In [13]:
from statsmodels.stats import weightstats

z_score, pval = weightstats.ztest(x1 = sample1, x2 = sample2, value = 0, alternative = 'two-sided')
print("Z-score: ", z_score)
print("p-value: ", pval)



Z-score:  -6.649347653377499
p-value:  2.943948874914636e-11


In [14]:
# left tailed test
# H1: mu1<mu2
p_val = norm.cdf(z_stat)
p_val

4.492050878648636e-11

In [15]:
# left tailed test --> alternative = "smaller"
z_score, pval = weightstats.ztest(x1 = sample1, x2 = sample2, value = 0, alternative = 'smaller')
print("Z-score: ", z_score)
print("p-value: ", pval)

Z-score:  -6.649347653377499
p-value:  1.471974437457318e-11


In [16]:
# right tailed test --> alternative = "larger"
# h1: mu1>mu2
z_score, pval = weightstats.ztest(x1 = sample1, x2 = sample2, value = 0, alternative = 'larger')
print("Z-score: ", z_score)
print("p-value: ", pval)

Z-score:  -6.649347653377499
p-value:  0.9999999999852802


In [19]:
z_stat = test_stat(30,32,3,2.5,50,60)
print(z_stat)

-3.751832396884334


In [21]:
p_val = norm.cdf(z_stat)
print(p_val)

8.777340507285842e-05
