In [None]:
import numpy as np
from scipy import stats

In [None]:
# Hypothesis test for difference in two population means assuming the population variances are equal.
def equal_var_t_test(sample1, sample2, alpha):
    """
    Performs a two-sample t-test assuming equal population variances, without using the built-in function ttest_ind.

    Args:
      sample1: A list or numpy array representing the first sample.
      sample2: A list or numpy array representing the second sample.
      alpha: The significance level.

    Returns:
      A dictionary containing the t-statistic, p-value, degrees of freedom,
      and a conclusion based on the hypothesis test.
    """
    n1 = len(sample1)
    n2 = len(sample2)
    mean1 = np.mean(sample1)
    mean2 = np.mean(sample2)
    std1 = np.std(sample1, ddof=1)  # Sample standard deviation
    std2 = np.std(sample2, ddof=1)

    # Pooled standard deviation
    sp = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))

    # t-statistic
    t_stat = (mean1 - mean2) / (sp * np.sqrt(1/n1 + 1/n2))

    # Degrees of freedom
    df = n1 + n2 - 2

    # p-value (two-tailed)
    p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df))

    conclusion = "Fail to reject the null hypothesis."
    if p_value < alpha:
        conclusion = "Reject the null hypothesis."

    return {
        "t-statistic": t_stat,
        "p-value": p_value,
        "conclusion": conclusion,
    }


In [None]:
# Generate two normally distributed data

mu1 = 15
mu2 = 15

std1 = 1
std2 = 2
# Generate two normally distributed datasets
data1 = np.random.normal(mu1, std1, 10)
data2 = np.random.normal(mu2, std2, 10)

# Save to a text file
np.savetxt("sample_data1.txt", data1, fmt="%.5f")
np.savetxt("sample_data2.txt", data2, fmt="%.5f")

In [None]:
result = equal_var_t_test(data1,data2,0.05)
print("Test Results:")
print(f"t-statistic: {result['t-statistic']:.5f}")
print(f"p-value: {result['p-value']:.5f}")
print(f"Conclusion: {result['conclusion']}")

Test Results:
t-statistic: -0.02368
p-value: 0.98137
Conclusion: Fail to reject the null hypothesis.


In [None]:
# Hypothesis test for difference in two population means assuming the population variances are equal. Use a build-in function in scipy, ttest_ind.
t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=True)
print(f"t-stitistic = {t_stat:.5f}, p-value = {p_value:.5f}")

t-stitistic = -0.02368, p-value = 0.98137


In [None]:
# Hypothesis test for difference when two populations are related

def paired_t_test(sample1, sample2, alpha):
    """
    Performs a paired t-test.

    Args:
      sample1: A list or numpy array representing the first sample.
      sample2: A list or numpy array representing the second sample.
      alpha: The significance level.

    Returns:
      A dictionary containing the t-statistic, p-value, and a conclusion
      based on the hypothesis test.
    """
    # Calculate the differences between paired observations
    differences = np.array(sample1) - np.array(sample2)

    # Perform a one-sample t-test on the differences
    t_stat, p_value = stats.ttest_1samp(differences, 0)  # Test if the mean difference is zero

    conclusion = "Fail to reject the null hypothesis."
    if p_value < alpha:
        conclusion = "Reject the null hypothesis."

    return {
        "t-statistic": t_stat,
        "p-value": p_value,
        "conclusion": conclusion,
    }

In [None]:
# Generate an example of two datasets that are related
data_related1 = np.random.normal(50, 10, 20)
data_related2 = data_related1 + np.random.normal(0, 3, 20)

# Save to a text file
np.savetxt("sample_data3.txt", data_related1, fmt="%.4f")
np.savetxt("sample_data4.txt", data_related2, fmt="%.4f")

In [None]:
result = paired_t_test(data_related1, data_related2,0.05)
print("Paired t-test Results:")
print(f"t-statistic: {result['t-statistic']:.5f}")
print(f"p-value: {result['p-value']:.5f}")
print(f"Conclusion: {result['conclusion']}")

Paired t-test Results:
t-statistic: 1.24702
p-value: 0.22754
Conclusion: Fail to reject the null hypothesis.


In [None]:
# Hypothesis test for paired t-test use built-in function
t_statistic, p_value = stats.ttest_rel(data_related1, data_related2)

print(f"t-statistic: {t_statistic:.5f}")
print(f"p-value: {p_value:.5f}")


t-statistic: 1.24702
p-value: 0.22754
