In [1]:
import math
import statistics

In [41]:
# Sample data to measure the effect of caffine on muscle metabolism
placebo = [
    105,
    119,
    100,
    97,
    96,
    101,
    94,
    95,
    98
]

caffine = [
    96,
    99,
    94,
    89,
    96,
    93,
    88,
    105,
    88
]

In [38]:
# Functions

# Calculate the mean value of an array
def mean(array):
    """
    Standard mean calculation by dividing the sum of a sample
    by the number of samples
    """
    return (1/len(array)) * sum(array)

# Calculate the variance of a sample
def var(array):
    """
    To get the variance, you need to sum the result of subtracting the mean from
    each item in your sample and squaring it.
    
    v = E(x - M)^2
    """
    m = mean(array)
    return sum([math.pow(i - m, 2) for i in array])

# Standard Deviation of Population Mean
def sd_population(array):
    """
    Calculates the standard deviation of a population.
    Meaning, you have the entire sample, you are not estimating.
    
    Why do we square/square root back? https://www.mathsisfun.com/data/standard-deviation.html
    """
    m = mean(array) # mean
    N = len(array) # number of samples in population
    s = var(array) # variance
    return math.sqrt(s * (1 / N))

# Standard Deviation of Sample Mean (Bessel's Correction)
def sd_sample(array):
    """
    The only difference between this function and sd_population is that in this function,
    we divide by (n-1) rather than N. We do this to correct for the bias in the sample
    which tends to bring the standard deviation down.
    
    This video explains the theory behind it well: https://www.youtube.com/watch?v=ANsVodOu1Tg
    """
    m = mean(array) # sample mean
    n = len(array) - 1 # corrected n
    s = var(array) # variance
    return math.sqrt(s * (1 / n))

# Standard Error of the Mean
def sem(array):
    """
    The SEM is the distance between the population mean (the real mean) and
    the sample mean (a subset of the population). Meaning that if the population mean is 10,
    and the sample mean is 10.5, then the SEM is 0.5
    
    The SEM is inversely proportional to the sample population. The larger the sample, 
    the closer to the real mean the sample population will be. Meaning that as you 
    collect more data, your mean will get closer and closer to the true mean, and your
    SEM will shrink closer to zero. Bigger sample = smaller SEM
    
    SEM is an estimate, NOT the actual distance between the population and sample means.
    
    Another way of saying it is, the degree to which the sample mean is likely to vary
    from the true population mean.
    """
    o = sd_sample(array) # SE (Standard Error)
    n = len(array) # sample size
    return o / math.sqrt(n)

def variance_pooled(array_1, array_2):
    """
    Calculates an estimator for the pooled variance of two difference samples.
    """
    dof = len(array_1) + len(array_2) - 2 # Degrees of freedom
    var_1 = var(array_1)
    var_2 = var(array_2)
    return (var_1 + var_2) / dof
    

# T-Test
def ttest(array_1, array_2):
    """
    Independent samples ttest. Compares the means of two independent samples
    to give you a t value.
    """
    m1 = mean(array_1)
    m2 = mean(array_2)
    s = variance_pooled(array_1, array_2)
    n1 = len(array_1)
    n2 = len(array_2)
    return (m1 - m2) / math.sqrt((s / n1) + (s / n2))
    

In [16]:
print("Placebo mean: {}".format(mean(placebo)))
print("Caffine mean: {}".format(mean(caffine)))

Placebo mean: 100.55555555555554
Caffine mean: 94.22222222222221


In [17]:
print("Placebo standard deviation corrected: {}".format(sd_sample(placebo)))
print("Caffine standard deviation corrected: {}".format(sd_sample(caffine)))

Placebo standard deviation corrected: 7.69920630830073
Caffine standard deviation corrected: 5.6075346137535735


In [18]:
print(sem(placebo))

2.56640210276691


In [49]:
v_pooled = variance_pooled(placebo, caffine)
print(v_pooled)
print(mean([var(placebo), var(caffine)]))

45.3611111111111
362.8888888888888


In [39]:
print(ttest(placebo, caffine))

1.994788065026537
