# p- value one sample T-Test

In [4]:
import scipy.stats as stats

sample = [68, 72, 75, 80, 65, 78, 77, 73, 69, 74, 76, 71, 79, 66, 70]
population_mean = 75  # Hypothetical population mean

# Perform One-Sample T-Test
t_stat, p_value = stats.ttest_1samp(sample, population_mean)
print(f"T-Statistic: {t_stat}")
print(f"P-Value: {p_value}")
# Decision
if p_value < 0.05:
    print("Conclusion: Reject the null hypothesis (Significant difference).")
else:
    print("Conclusion: Fail to reject the null hypothesis (No significant difference).")

T-Statistic: -1.7623050920413947
P-Value: 0.09982632814120534
Conclusion: Fail to reject the null hypothesis (No significant difference).


# One Sample Z-Test

In [8]:
from scipy.stats import norm
#Given data
x_bar = 75   # Sample mean
mu = 70      # Population mean
sigma = 10   # Population standard deviation
n = 50       # Sample size
# Compute Z-score
z = (x_bar - mu) / (sigma / (n ** 0.5))
# Compute p-value (two-tailed test)
p_value = 2 * (1 - norm.cdf(abs(z)))
print(f"Z-score: {z}")
print(f"P-value: {p_value}")

Z-score: 3.5355339059327378
P-value: 0.00040695201744500586


# Two Sample Z-Test

In [9]:
from scipy.stats import norm
# Given data
x1, x2 = 100, 95  # Sample means
sigma1, sigma2 = 15, 12  # Population standard deviations
n1, n2 = 40, 35  # Sample sizes
# Compute Z-score
z = (x1 - x2) / ((sigma1**2 / n1 + sigma2**2 / n2) ** 0.5)
# Compute p-value
p_value = 2 * (1 - norm.cdf(abs(z)))
print(f"Z-score: {z}")
print(f"P-value: {p_value}")

Z-score: 1.602162088866968
P-value: 0.10911977103433701


# Z-Test for Proportions

In [12]:
from scipy.stats import norm
# Given data
x1, n1 = 120, 500  # Sample 1: 120 successes out of 500
x2, n2 = 100, 500  # Sample 2: 100 successes out of 500
# Compute proportions
p1, p2 = x1 / n1, x2 / n2
# Compute pooled proportion
p = (x1 + x2) / (n1 + n2)
# Compute Z-score
z = (p1 - p2) / ((p * (1 - p) * (1/n1 + 1/n2)) ** 0.5)
# Compute p-value
p_value = 2 * (1 - norm.cdf(abs(z)))
print(f"Z-score: {z}")
print(f"P-value: {p_value}")

Z-score: 1.5267620413811476
P-value: 0.1268201936774891


# T-Test

In [13]:
import numpy as np
import scipy.stats as stats

# Sample Data
# One-Sample T-Test: Comparing sample mean to a population mean
sample = [68, 72, 75, 80, 65, 78, 77, 73, 69, 74, 76, 71, 79, 66, 70]
population_mean = 75  

# Independent (Two-Sample) T-Test: Comparing two independent groups
group_A = [85, 87, 90, 92, 88, 91, 86, 84, 89, 93]
group_B = [78, 80, 75, 74, 79, 81, 76, 77, 82, 83]

# Paired T-Test: Comparing before and after (same subjects)
before = [65, 70, 72, 68, 74, 75, 69, 71, 73, 76]
after  = [78, 75, 80, 74, 79, 83, 77, 78, 80, 85]

# Perform One-Sample T-Test
t_stat_one, p_value_one = stats.ttest_1samp(sample, population_mean)

# Perform Independent (Two-Sample) T-Test
t_stat_ind, p_value_ind = stats.ttest_ind(group_A, group_B)

# Perform Paired T-Test
t_stat_paired, p_value_paired = stats.ttest_rel(before, after)

# Print results
print("\n=== One-Sample T-Test ===")
print(f"T-Statistic: {t_stat_one}, P-Value: {p_value_one}")
print("Conclusion: ", "Significant difference" if p_value_one < 0.05 else "No significant difference")

print("\n=== Independent Two-Sample T-Test ===")
print(f"T-Statistic: {t_stat_ind}, P-Value: {p_value_ind}")
print("Conclusion: ", "Significant difference" if p_value_ind < 0.05 else "No significant difference")

print("\n=== Paired T-Test ===")
print(f"T-Statistic: {t_stat_paired}, P-Value: {p_value_paired}")
print("Conclusion: ", "Significant difference" if p_value_paired < 0.05 else "No significant difference")


=== One-Sample T-Test ===
T-Statistic: -1.7623050920413947, P-Value: 0.09982632814120534
Conclusion:  No significant difference

=== Independent Two-Sample T-Test ===
T-Statistic: 7.385489458759963, P-Value: 7.503138172108932e-07
Conclusion:  Significant difference

=== Paired T-Test ===
T-Statistic: -10.363636363636362, P-Value: 2.6559167524918187e-06
Conclusion:  Significant difference


# chi-square Test

chi-square test of Independence

In [17]:
import scipy.stats as stats
import numpy as np
# Observed data (contingency table)
#          Like  Dislike
# Male     50      30
# Female   20      40

observed = np.array([[50, 30], [20, 40]])

# Perform Chi-Square Test of Independence
chi2_stat, p_value, dof, expected = stats.chi2_contingency(observed)

print(f"Chi-Square Statistic: {chi2_stat}")
print(f"P-Value: {p_value}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")

# Interpretation
if p_value < 0.05:
    print("Reject Null Hypothesis: There is a significant relationship.")
else:
    print("Fail to Reject Null Hypothesis: No significant relationship.")

Chi-Square Statistic: 10.529166666666667
P-Value: 0.0011750518530845063
Degrees of Freedom: 1
Expected Frequencies:
[[40. 40.]
 [30. 30.]]
Reject Null Hypothesis: There is a significant relationship.


chi-squre test of goodness of fit

In [19]:
import scipy.stats as stats
import numpy as np

# Observed frequencies (actual count in data)
observed = np.array([60, 40, 50])  # Math, Science, Arts

# Expected frequencies (based on expected percentage)
expected = np.array([50, 50, 50])  # Assuming equal preference

# Perform Chi-Square Goodness-of-Fit Test
chi2_stat, p_value = stats.chisquare(observed, expected)

print(f"Chi-Square Statistic: {chi2_stat}")
print(f"P-Value: {p_value}")

# Interpretation
if p_value < 0.05:
    print("Reject Null Hypothesis: The distribution is significantly different.")
else:
    print("Fail to Reject Null Hypothesis: The distribution follows the expected pattern.")

Chi-Square Statistic: 4.0
P-Value: 0.1353352832366127
Fail to Reject Null Hypothesis: The distribution follows the expected pattern.


# F-test and Anova

In [20]:
import scipy.stats as stats

# Sample data: Three groups representing different study techniques
group1 = [78, 85, 88, 92, 75]
group2 = [80, 83, 85, 90, 79]
group3 = [65, 70, 72, 68, 75]

# Perform One-Way ANOVA
F_statistic, p_value = stats.f_oneway(group1, group2, group3)

print(f"F-Statistic: {F_statistic}")
print(f"P-Value: {p_value}")

# Interpretation
if p_value < 0.05:
    print("Reject Null Hypothesis: At least one group is significantly different.")
else:
    print("Fail to Reject Null Hypothesis: No significant difference between groups.")

F-Statistic: 10.96750902527076
P-Value: 0.001955235028289677
Reject Null Hypothesis: At least one group is significantly different.


In [21]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Creating the dataset
data = pd.DataFrame({
    'Diet': ['Keto', 'Keto', 'Keto', 'Vegan', 'Vegan', 'Vegan', 'Low-Carb', 'Low-Carb', 'Low-Carb'],
    'Exercise': ['No Exercise', 'Moderate', 'High', 'No Exercise', 'Moderate', 'High', 'No Exercise', 'Moderate', 'High'],
    'WeightLoss': [2.1, 3.4, 5.0, 1.5, 2.8, 4.5, 2.0, 3.2, 4.8]
})

# Performing Two-Way ANOVA
model = ols('WeightLoss ~ C(Diet) + C(Exercise) + C(Diet):C(Exercise)', data=data).fit()
anova_table = sm.stats.anova_lm(model)

# Display results
print(anova_table)

                      df        sum_sq   mean_sq    F  PR(>F)
C(Diet)              2.0  5.088889e-01  0.254444  0.0     NaN
C(Exercise)          2.0  1.268222e+01  6.341111  0.0     NaN
C(Diet):C(Exercise)  4.0  1.111111e-02  0.002778  0.0     NaN
Residual             0.0  6.310887e-29       inf  NaN     NaN


  (model.ssr / model.df_resid))
