# Student's t-test for significantly different means in Python:

In [1]:
import numpy as np
import timeit
import subprocess
import sys
import os


# Import Python functions from lib
sys.path.insert(0, '../lib_stat')
from lib_stat import avevar, betai

# Compile fortran source code (using a Python script)
os.system('python ../lib_stat/compile_fortran.py')

data1 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
data2 = np.array([1.1, 2, 3, 4, 5, 6, 7, 8, 9])

Function definition:

In [2]:
def ttest(data1, data2):
    # Given the arrays "data1" and "data2", returns the
    # Student's "t" and its significance as "prob".
    # Data are assumed to be drawn from populations with
    # the same true variance.
    # Small values of "prob" indicates that the arrays 
    # have different means.
    n1 = len(data1)
    n2 = len(data2)
    
    ave1, var1 = avevar(data1)
    ave2, var2 = avevar(data2)

    # print(ave1, ave2)
    # print(var1, var2)

    # degrees of freedom
    df = n1 + n2 - 2.
    # pooled variance
    var = ((n1 - 1.) * var1 + (n2 - 1) * var2) / df
    # Student's t
    t = (ave1 - ave2) / np.sqrt(var * (1/n1 + 1/n2))
    # significance
    prob = betai(0.5 * df, 0.5, df/(df + t**2))
    
    return t, prob

### Calling the ttest function in Python:

In [3]:
t, prob = ttest(data1, data2)

print(t, prob)

-0.008635140708713388 0.9932169925849851


## Timing against Fortran:

Test function:

In [4]:
def test():
    for i in range(1000):
        ttest(data1, data2)

#### Python:

In [5]:
%timeit test()

14.9 ms ± 60.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


#### Fortran:

In [6]:
%timeit subprocess.call(['../lib_stat/fmain_ttest'])

5.43 ms ± 53.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
