In [15]:
import numpy as np
from scipy.special import gamma

def probability_function(my_list : float):
    sample_mean = sum(my_list)/len(my_list)

    all_x_vals = []
    for i in my_list: 
        x_vals = (i - sample_mean)**2
        all_x_vals.append(x_vals)

        std_dev = np.sqrt(((1/(len(my_list)-1))) * np.sum(all_x_vals))
        
    return sample_mean, std_dev


In [3]:
def finding_t0(sample_mean, population_mean, std_dev, my_list):
    t0 = (sample_mean - population_mean)/(std_dev / np.sqrt(len(my_list)))
    return t0

In [10]:
def t_distribution_pdf(x : float, nu : int):

    coeff = gamma((nu + 1) / 2) / (np.sqrt(nu * np.pi) * gamma(nu / 2))
    density = coeff * (1 + x**2 / nu) ** (-0.5 * (nu + 1))
   
    return density

def find_t_star(prob : float, nu : int, x_start :float = 0, x_end : float = 20, num_points=10000):
    # Define the x values
    x = np.linspace(x_start, x_end, num_points)
   
    # Apply the density function to the x values
    y = t_distribution_pdf(x, nu)
   
    # This next line is the integration (exercise: why does this work?)
    cdf = np.cumsum(y) * (x[1] - x[0])
    
    # Find the t-value where the cumulative probability reaches half of the required probability
    target_half_prob = prob / 2
    index = np.where(cdf >= target_half_prob)[0][0]
    
    return x[index]


In [16]:
#Insert any list, probability and expected mean
my_list = [92.64,79.00,84.79,97.41,93.68,65.23,84.50,73.49,73.97,79.11]
nu = len(my_list) - 1 
prob = 0.95
population_mean = 75

#Code to call on functions
sample_mean, std_dev = probability_function(my_list)
print(f"The sample mean is {sample_mean}")
print(f"The standard deviation is {std_dev}")

t0 = finding_t0(sample_mean, population_mean, std_dev, my_list)
print(f"t0 is {t0}")

t_star = find_t_star(prob, nu, x_start=0, x_end=20, num_points=10000)
print(f"t_star is {t_star}")

The sample mean is 82.382
The standard deviation is 10.193467189005581
t0 is 2.290087686017293
t_star is 2.2522252225222523


In [17]:
def comparing_t0_tstar():
    if -t_star <= t0 <= t_star:
        print("t0 is in the range of [-t_star, t_star]:", True)
    else:
        print("t0 is in the range of [-t_star, t_star]:", False) 

comparing_t0_tstar()

t0 is in the range of [-t_star, t_star]: False


In [None]:
#Because t0 is not in the range of [-t_star, t_star], there is statistical evidence to reject the null hypothesis. 
#This code concludes that there is a statistically different mean test score in this new test compared to the national average of 75 on the previous test. 
#Because t0 is in the right tail of the distribution (i.e. is a positive number), the average test scores on the new test will be greater than 75, making this 
# new test beneficial to students. 