In [29]:
## Kritik Assignment no. 7!!

In [30]:
import numpy as np
from scipy.special import gamma

def t_distribution_pdf(x, nu):
    """
    Compute the probability density of the t-distribution
    at a given point x with nu degrees of freedom.
    Parameters:
    x (float): The point at which to evaluate the density.
    nu (int): The degrees of freedom of the t-distribution.
    Returns:
    density (float): The probability density at point x for
    the t-distribution with nu degrees of freedom.
    """
    coeff = gamma((nu + 1) / 2) / (np.sqrt(nu * np.pi) * gamma(nu / 2))
    density = coeff * (1 + x**2 / nu) ** (-0.5 * (nu + 1))
    return density


In [31]:
t_scores = [92.64,79.00,84.79,97.41,93.68,65.23,84.50,73.49,73.97,79.11] # test scores

def mean_std(data):
    '''
    Based on a given set of data, this function calculates the mean value, as well
    as the standard deviation.
    '''
    sum_0 = 0 #intialize (for mean)
    sum_std = 0 #initialize (for std dev)
    
    for i in range(len(data)):
        sum_0 += data[i]
    mean = sum_0 / len(data) # mean
    
    for i in range(len(data)):
        sum_std += (data[i] - mean)**2
    std = np.sqrt((1/(len(data)-1))*sum_std) # std deviation
    
    return mean, std
    
print(mean_std(t_scores)) # based on this, average test score is 82.382% +/- 10.193%

(82.382, 10.193467189005581)


In [32]:
def t_0(mu, data):
    '''
    Calculates t0 for a set of data, based on a given µ value and a mean value 
    (calculacted using the function from the previous cell).
    '''
    t0 = ((mean_std(data)[0])-mu)/((mean_std(data)[1])/np.sqrt(len(data)))
    return t0
    
print(t_0(75, t_scores)) ## null hypothesis: mu = 75

2.290087686017293


In [33]:
def find_t_star(prob, nu, x_start=0, x_end=20, num_points=10000):
    """
    Find the t-value t* for a given cumulative probability
    and degrees of freedom.
    Parameters:
    prob (float): The cumulative probability (between 0 and 1).
    nu (int): The degrees of freedom of the t-distribution.
    x_start (float): The start point for numerical integration.
    x_end (float): The end point for numerical integration.
    20 will almost always be big enough.
    num_points (int): The number of points to use in
    the numerical integration.
    Returns:
    float: The t-value t* such that the area between [-t*, t*]
    equals the given probability.
    """
    
    # Define the x values
    x = np.linspace(x_start, x_end, num_points)
    
    # Apply the density function to the x values
    y = t_distribution_pdf(x, nu)
    
    # This next line is the integration (exercise: why does this work?)
    cdf = np.cumsum(y) * (x[1] - x[0])
    
    # Find the t-value where the cumulative probability reaches half of the
    #required probability
    target_half_prob = prob / 2
    index = np.where(cdf >= target_half_prob)[0][0]
    
    return x[index]

print(find_t_star(0.95, 9, x_start=0, x_end=20, num_points=1000)) 
# prob is 0.95 (based on the 95% confidence interval)
# nu is n-1: 10 data points, so nu = 9


2.1821821821821823


In [34]:
def true_false_det(mu, data):
    '''
    This functions purpose is to determine whether t0 is on the interval of [-t*, t*]. This is done by comparing
    the absolute value of t0 to t*. If the absolute value is greater than t*, that means that t0 is not located
    on the interval, and is either < -t* or > t*.
    '''
    t0_ = t_0(mu, data) # calculates t0 based on function from second cell
    t_star_ = find_t_star(0.95, 9, x_start=0, x_end=20, num_points=1000) # calculates t* based on function from third cell
    if abs(t0_) > t_star_:
        return "False"
    else:
        return "True"
    
print(true_false_det(75, t_scores)) ## based on mu=75 (predicted / national average) and our set of test scores

## Tells us that these test scores are different from the national average, in a
## statistically significant way.

False


In [35]:
'''

For the data given in the scenario, apply your homebrew t-test. What
is your conclusion? Can we conclude µ = 75? Based on the value of
t0, would you say that the statistical evidence suggests that the new
teaching techniques is beneficial, or detrimental?

'''

## My response: Based on the above t-test, we can conclude that µ =/= 75. In other words,
## the null hypothesis is incorrect and, as the researcher expected, the average for the 
## students taught using his new teaching technique is significantly different than the
## national average. Based on the value of t0, which is higher than t*, we can conclude
## that the averages for students taught with this new technique are *higher* than the 
## national average. Thus the technique is beneficial, and should be implemented. 



'\n\nFor the data given in the scenario, apply your homebrew t-test. What\nis your conclusion? Can we conclude µ = 75? Based on the value of\nt0, would you say that the statistical evidence suggests that the new\nteaching techniques is beneficial, or detrimental?\n\n'