## One sample Z test

In [1]:
import statsmodels.api as sm
from scipy.stats import norm
import scipy.stats as stats
import numpy as np
import math

### Hypothesis Testing

In [65]:
population_proportion = 0.85

### Alpha

In [66]:
alpha = 0.05

In [4]:
# Sample data
success_count = 789  # Number of successes
fail_count = 189
total_count = 987   # Total number of trials
n=total_count

In [5]:
p_hat = success_count/total_count
print("p_hat", p_hat)

p_hat 0.7993920972644377


In [6]:
# Calculate the standard error
standard_error = math.sqrt((population_proportion * (1 - population_proportion)) / n)

# Calculate the test statistic (Z-score)
z_score = (p_hat - population_proportion) / standard_error

print(z_score)

-4.452683475222582


### P-value

In [7]:
p_value = 2 * (1 - norm.cdf(abs(z_score)))
print(p_value)

# Check if the p-value is less than alpha
if p_value < alpha:
    print(f"Reject the null hypothesis. There is evidence of a difference from the population proportion.")
else:
    print(f"Fail to reject the null hypothesis. There is no evidence of a difference from the population proportion.")

# Print the test statistics and p-value
print(f"Z-statistic: {z_score}")
print(f"P-value: {p_value}")

8.480372172492423e-06
Reject the null hypothesis. There is evidence of a difference from the population proportion.
Z-statistic: -4.452683475222582
P-value: 8.480372172492423e-06


### Critical Value

In [8]:

critical_value = norm.ppf(1 - alpha/2)  # Calculate the critical value

print(f"Critical Value: {critical_value}")

Critical Value: 1.959963984540054


### Confidence Interval

In [9]:
# Calculate the margin of error
margin_of_error = critical_value * (p_hat * (1 - p_hat) / n)**0.5

# Calculate the confidence interval
confidence_interval = (p_hat - margin_of_error, p_hat + margin_of_error)

print(f"Confidence Interval: {confidence_interval}")

Confidence Interval: (0.7744091622232083, 0.8243750323056671)


# Alternate Method

In [67]:
import math
from scipy.stats import stats

In [68]:
alpha=0.05

In [86]:
population_mean = 40  # Estimated population average
population_variance = 9  # Population variance

In [87]:
sample_mean = 40.7  # Average waist circumference in your sample
sample_variance = 10  # Sample variance
sample_size = 55

In [88]:

# Calculate the standard error of the sample mean
standard_error = math.sqrt(population_variance / sample_size)

# Calculate the Z-score
z_score = (sample_mean - population_mean) / (standard_error)

In [89]:
# Find the critical Z-value for a two-tailed test at alpha/2 level
critical_value = norm.ppf(1 - alpha / 2)


In [90]:
# Determine whether to reject the null hypothesis
if abs(z_score) > critical_value:
    print("Reject the null hypothesis. There is evidence that the average waist circumference is different from the estimated population average.")
else:
    print("Fail to reject the null hypothesis. There is no evidence that the average waist circumference is different from the estimated population average.")

# Print the Z-score, critical value, and significance level
print(f"Z-score: {z_score:.4f}")
print(f"Critical Z-value: {critical_value:.4f}")
print(f"Significance Level (alpha): {alpha}")

Fail to reject the null hypothesis. There is no evidence that the average waist circumference is different from the estimated population average.
Z-score: 1.7304
Critical Z-value: 1.9600
Significance Level (alpha): 0.05


In [91]:
# Calculate the margin of error
margin_of_error = critical_value * standard_error

# Calculate the confidence interval
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

print(f"Confidence Interval: {confidence_interval}")

Confidence Interval: (39.907155530714114, 41.49284446928589)


### Two Sample Z-test

In [1]:

from scipy.stats import norm
import scipy.stats as stats
import numpy as np
import math

### Hypothesis Testing

In [2]:
delta = 0

### Alpha

In [3]:
alpha = 0.05

In [4]:
# Sample data for the first group
s1 = 356.13 # Sample proportion for the first group
n1 = 1319        # Sample size for the first group

# Sample data for the second group
s2 = 461.65  # Sample proportion for the second group
n2 = 1319        # Sample size for the second group

In [5]:
p_hat_1 = s1/n1
p_hat_2 = s2/n2

print("P hat 1", p_hat_1)
print("P hat 2", p_hat_2)

P hat 1 0.27
P hat 2 0.35


### Z Stats

In [6]:
# Calculate the standard error for each group
se_1 = math.sqrt((p_hat_1 * (1 - p_hat_1)) / n1)
se_2 = math.sqrt((p_hat_2 * (1 - p_hat_2)) / n2)

# Calculate the test statistic (Z-score)
z_score = (p_hat_1 - p_hat_2-delta) / math.sqrt(se_1**2 + se_2**2)

print("z_score : ",z_score)


z_score :  -4.458845147244199


### P- Value

In [8]:
# Calculate the two-tailed p-value
p_value = 2 * (1 - norm.cdf(abs(z_score)))
print(f"P-value: {p_value}")

P-value: 8.240242421964084e-06


In [9]:
# Determine whether to reject the null hypothesis based on the p-value
if p_value < alpha:
    print(f"Reject the null hypothesis. There is evidence of a difference between the two proportions.")
else:
    print(f"Fail to reject the null hypothesis. There is no evidence of a difference between the two proportions.")

Reject the null hypothesis. There is evidence of a difference between the two proportions.


### Critical value

In [10]:
critical_value = norm.ppf(1 - alpha/2)
print("Critical Value :",critical_value )

Critical Value : 1.959963984540054


## Confidence Interval

In [17]:
# Calculate the confidence interval for each group
conf_interval = (p_hat_1-p_hat_2 - critical_value * math.sqrt(se_1**2 + se_2**2), p_hat_1-p_hat_2 + critical_value * math.sqrt(se_1**2 + se_2**2))

print("Confidence Interval : ", conf_interval)


Confidence Interval :  (-0.1151654102318653, -0.044834589768134626)


In [19]:
# Determine whether the confidence intervals overlap or not
if conf_interval[1] < delta or conf_interval[0] > delta:
    print(f"Reject the null hypothesis. There is evidence of a difference between the two proportions.")
else:
    print(f"Fail to reject the null hypothesis. There is no evidence of a difference between the two proportions.")


Reject the null hypothesis. There is evidence of a difference between the two proportions.


# Odds ratio

In [43]:
import scipy.stats as stats
import numpy as np
import math

In [44]:
alpha = 0.05

In [45]:
observed = np.array([[12612,9248],[500,752]])

### Odds Ratio

In [47]:
odds_ratio = (observed[0, 0] * observed[1, 1]) / (observed[0, 1] * observed[1, 0])
print("Odds Ratio : ", odds_ratio)

Odds Ratio :  2.0510865051903115


### P- value

In [48]:
odds_ratio, p_value = stats.fisher_exact(observed)
print("Odd ratio : ", odds_ratio)
print("p Value :",p_value)

Odd ratio :  2.0510865051903115
p Value : 1.6236973626213926e-34


### Critical Value

In [49]:
critical_value = norm.ppf(1 - alpha/2)
print("Critical Value :",critical_value )

Critical Value : 1.959963984540054


### Confidence Interval

In [50]:
ln_OR = np.log(odds_ratio)
se_ln_OR = np.sqrt(1 / observed[0, 0] + 1 / observed[0, 1] + 1 / observed[1, 0] + 1 / observed[1, 1])
z_alpha_2 = stats.norm.ppf(1 - alpha/2)

In [51]:
lower_bound = np.exp(ln_OR - z_alpha_2 * se_ln_OR)
upper_bound = np.exp(ln_OR + z_alpha_2 * se_ln_OR)

In [52]:
alpha = 0.05
z_test = (ln_OR - np.log(1)) / se_ln_OR 
print("Z Score :",z_test)

Z Score : 12.11292240283246


In [53]:
p_value = 2 * (1 - stats.norm.cdf(np.abs(z_test)))
print("p Value : ", p_value)

p Value :  0.0


In [55]:
# Output results
print(f"Odds Ratio: {odds_ratio:.8f}")
print(f"95% Confidence Interval for OR: ({lower_bound:.8f}, {upper_bound:.8f})")
print("P-value for the hypothesis test : ",p_value)


Odds Ratio: 2.05108651
95% Confidence Interval for OR: (1.82600756, 2.30390933)
P-value for the hypothesis test :  0.0


In [30]:
# Decide based on the p-value
if p_value < alpha:
    print("Reject the null hypothesis. There is evidence of a significant association.")
else:
    print("Fail to reject the null hypothesis. There is no evidence of a significant association.")

Reject the null hypothesis. There is evidence of a significant association.


## T- Test statistic

In [56]:
# Set the significance level (alpha)
alpha = 0.05

In [57]:
sample_mean = 11750  # Average white blood cell count
sample_size = 354    # Sample size
sample_stddev = 1725  # Standard deviation

In [58]:
population_mean = 11000  # Upper normal range

In [59]:
df = sample_size - 1
print("Degree of Freedom : ", df)

Degree of Freedom :  353


In [60]:
# Perform a one-sample t-test
t_statistic = (sample_mean - population_mean) / (sample_stddev / np.sqrt(sample_size))
print("Test Statistic : ", t_statistic)


Test Statistic :  8.180385966185556


In [61]:
p_value = 2 * (1 - stats.t.cdf(np.abs(t_statistic), df))
print("p value : ",p_value)

p value :  5.10702591327572e-15


### Critical Value

In [62]:
# Calculate the critical t-value for a two-tailed test
t_critical = stats.t.ppf(1 - alpha/2, df)
print(f"Critical t-value: {t_critical:.4f}")

Critical t-value: 1.9667


In [63]:
# Calculate the standard error of the mean
se_mean = sample_stddev / (sample_size ** 0.5)


In [39]:
# Calculate the confidence interval for the mean
margin_of_error = t_critical * se_mean
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)


In [40]:
# Output results
print(f"T-statistic: {t_statistic:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Critical t-value: {t_critical:.4f}")
print(f"95% Confidence Interval: ({confidence_interval[0]:.4f}, {confidence_interval[1]:.4f})")


T-statistic: 8.1804
P-value: 0.0000
Critical t-value: 1.9667
95% Confidence Interval: (11569.6870, 11930.3130)


In [64]:
# Decide based on the p-value
if p_value < alpha:
    print("Reject the null hypothesis. There is evidence that the average white blood cell count is different from 11,000.")
else:
    print("Fail to reject the null hypothesis. There is no evidence that the average white blood cell count is different from 11,000.")

Reject the null hypothesis. There is evidence that the average white blood cell count is different from 11,000.


# NcNemar test

In [42]:
from scipy.stats import chi2
from scipy.stats import chi2_contingency
import scipy.stats as stats
import numpy as np

In [43]:
alpha = 0.05

In [44]:
df=1

In [45]:
# Create a contingency table with paired data
table = np.array([[306, 142], [376, 709]])

In [46]:
# Perform McNemar's test
chi2_stat=(table[0,1] - table[1,0])**2 / (table[0,1] + table[1,0])
print(f"Chi-squared statistic: {chi2_stat:.8f}")

Chi-squared statistic: 105.70656371


In [47]:
# Perform the McNemar's test
p_value = 1 - stats.chi2.cdf(chi2_stat, df)
print(f"P-value: {p_value:.4f}")

P-value: 0.0000


### Critical Value

In [48]:
critical_value = stats.chi2.ppf(1 - alpha, df)
print("Critical value :",critical_value)

Critical value : 3.841458820694124


In [49]:

# Decide based on the p-value
if p_value < alpha:
    print("Reject the null hypothesis. There is evidence of a significant difference.")
else:
    print("Fail to reject the null hypothesis. There is no evidence of a significant difference.")

Reject the null hypothesis. There is evidence of a significant difference.


# Chi Square Test

In [92]:
import numpy as np
from scipy.stats import chi2_contingency
from scipy.stats import chi2
import scipy.stats as stats

In [93]:
alpha = 0.05

In [108]:
observed = np.array([[98,86,35],[29,47,23],[17,9,17]])
print(observed)

[[98 86 35]
 [29 47 23]
 [17  9 17]]


In [109]:
chi2, p, dof, expected = chi2_contingency(observed)

#### Critical value

In [110]:
critical_value = stats.chi2.ppf(1 - alpha, dof)
print("Critical value :",critical_value)

Critical value : 9.487729036781154


In [111]:
print("Chi Square Test Statistic :",chi2)

Chi Square Test Statistic : 19.449043202964845


In [112]:
print("p value :",p)

p value : 0.0006413156788376182


In [113]:
print("Degree of Freedom :", dof)

Degree of Freedom : 4


In [114]:
print("Expected values : \n",expected)

Expected values : 
 [[87.35734072 86.14404432 45.49861496]
 [39.49030471 38.94182825 20.56786704]
 [17.15235457 16.91412742  8.93351801]]
