In [13]:
import numpy as np

# One-Sample T-Test
The one-sample t-test tests wether the mean of a sample differs significantly 
from a known or hypothesized population mean. It assumes the data is normally 
distributed.

* $H_0$ is that the sample mean $\bar{x}$ is equal to the population mean $\mu$.
* $H_1$:   $\bar{x} \neq \mu$

Test statistic:

$$ t = \frac{\bar{x}-\mu}{s/\sqrt{n} }$$

In [26]:
from scipy import stats 
class TTest:

    def __init__(self):
        self.t_stat = None
        self.p_value = None
    
    def one_sample_t_test(self, sample: list, mu: float) -> float:
        """
        Perform a one-sample t-test to check if the sample mean is significantly
        different from the population mean

        Parameters:
            sample: array-like. 
                The sample data.
            mu: float.
                The population mean, 

        Returns:
            t-stat: float.
                The estimated t-statistic
            p-value: float.
                The estimated p-value for the t-test. This is the probability of 
                obtaining test results *at least as extreme* as the observed in
                the data
        """

        # GUarantee that sample is a numpy array
        X = np.array(sample)

        # Sample size n and degrees of freedom df
        n = len(X)
        df = n-1
        
        # Compute sample statistics 
        X_bar = sum(X)/n                  #Sample mean
        X_std = (sum( (X-X_bar)**2 )/(n-1))**0.5 # Sample standard deviation

        # t-statistic
        self.t_stat = (X_bar - mu)/(X_std/(n**0.5))
        
        # Two-Tailed P-Value 
        self.p_value = 2 * (1 - stats.t.cdf( abs(self.t_stat, df))   )

        return self.t_stat, self.p_value


In [27]:
sample = [2.5, 3.0, 2.8, 3.5, 3.1, 2.9, 3.0]
ttest = TTest()
t_stat, p_val = ttest.one_sample_t_test(sample, mu=3.0)
print("T-Statistic:", t_stat)
print("P-Value:", p_val)

T-Statistic: -0.2487080016869026
P-Value: 1.188119086490217


# Two-Sample T-Test (t-test for independent samples)
The two-sample t-test compares the means of two independent samples to determine 
if they come from populations with the same mean. It assumes that both samples 
are normally distributed and have equal variances.


* $H_0$:  The means of the two samples are equal ($\bar{x_1} = \bar{x_2}$)
* $H_1$:  ($\bar{x_1} = \bar{x_2}$)$

Test statistic:

$$ t = \frac{\bar{x_1}-\bar{x_2}}{  \sqrt{ \frac{s_1^2}{n_1} + \frac{s_2^2}{n_2} }  }$$

In [45]:
from scipy import stats 
class TTest:

    
    def two_sample_t_test(self, sample_1: list, sample_2: list) -> float:
        """
        Perform a two-sample t-test to check if the sample mean is significantly
        different from the population mean.
        Assumption: both samples have the same variance.

        Parameters:
            sample_1: array-like. 
                The first sample data.
            sample_2: array-like. 
                The second sample data.
            mu: float.
                The population mean, 

        Returns:
            t-stat: float.
                The estimated t-statistic
            p-value: float.
                The estimated p-value for the t-test. This is the probability of 
                obtaining test results *at least as extreme* as the observed in
                the data
        """

        # GUarantee that sample is a numpy array
        X1= np.array(sample_1)
        X2= np.array(sample_2)

        # Sample size n and degrees of freedom df
        n1, n2 = len(X1), len(X2)
        df = n1 + n2 -2
        
        # Compute sample statistics 
        X_bar1 = sum(X1)/n1                  #Sample mean
        X_bar2 = sum(X2)/n2                  #Sample mean

        X_std1 = (sum( (X1-X_bar1)**2 )/(n1-1))*0.5 # Sample standard deviation
        X_std2 = (sum( (X2-X_bar2)**2 )/(n2-1))*0.5 # Sample standard deviation

        # t-statistic
        denominator = ( X_std1**2/n1 + X_std2**2/n2 )**0.5
        self.t_stat = (X_bar1 - X_bar2)/denominator
        # p-value
        self.p_value = 2 * (1 - stats.t.cdf( abs(self.t_stat), df)   )

        return self.t_stat, self.p_value


In [46]:
# Example data
sample1 = [75, 78, 74, 72, 77]
sample2 = [68, 65, 70, 67, 69]

# Perform the t-test
ttest = TTest()
t_stat, p_val = ttest.two_sample_t_test(sample1, sample2)
print(f"T-Statistic: {t_stat}, P-Value: {p_val}")

T-Statistic: 4.869896747011, P-Value: 0.0012401343566106338


# Anova (Analysis of Variance)

# Chi-Squared Test for Independence

