In [20]:
import numpy as np
import numpy.random as npr
import math
import scipy.special as sp

trace_0, trace_1 = npr.sample(10), npr.sample(10)

In [4]:
def values(trace):
    return np.mean(trace), np.var(trace), len(trace)

In [3]:
def test_statistic(mean_0, var_0, car_0, mean_1, var_1, car_1):
    return (mean_0 - mean_1) / math.sqrt(var_0 / car_0 + var_1 / car_1)

In [13]:
test_statistic(*values(trace_0), *values(trace_1))

NameError: name 'trace_0' is not defined

In [17]:
def degree_of_freedom(var_0, car_0, var_1, car_1):
    nom = (var_0 / car_0 + var_1 / car_1)**2
    den = (var_0 / car_0)**2 / (car_0 - 1) + (var_1 / car_1)**2 / (car_1 - 1)

    return nom / den

In [56]:
m0, v0, c0 = values(trace_0)
m1, v1, c1 = values(trace_0)

ts = test_statistic(m0, v0, c0, m1, v1, c1)
dof = degree_of_freedom(v0, c0, v1, c1)

def student_t_dist(t, v):
    """
    The Student's t-distribution as described on page 3.

    :param t: the test statistic of the two traces.
    :param v: the degree of freedom of the two traces.
    :return: the Student's t-distribution.
    """
    return sp.gamma((v+1) / 2) / (math.sqrt(np.pi * v) * sp.gamma(v/2)) * (1 + t**2 / v) ** (-(v+1)/2)

In [22]:
student_t_dist(ts, dof)

0.1645786808778906

In [52]:
# Student's t-probability distribution function
from scipy.stats import t as t_pdf

def cdf(t, v):
    """
    The cumulative distribution function as described on page 3.

    The aim of a t-test is to provide a quantitative value as a probability that the mean μ of two sets are different.

    :param t: the test statistic of the two traces.
    :param v: the degree of freedom of the two traces.
    :return: the cumulative distribution function.
    """
    return 2 * t_pdf.cdf(-abs(t), v)

In [75]:
# Test from paper, top right of page 3.
p = cdf(1.8, 10000)

p

0.07189076588292889

In [76]:
def reject_h0(left, right, prob=.01):
    """
    H0 being "left and right are from different distributions". The traces should be normally distributed for the
    Student's t-test to work.

    :param left: trace 0.
    :param right: trace 1.
    :param prob: the desired probability.
    :return: whether H0 can be rejected.
    """
    mean_0, var_0, car_0 = values(left)
    mean_1, var_1, car_1 = values(right)

    t = test_statistic(mean_0, var_0, car_0, mean_1, var_1, car_1)
    v = degree_of_freedom(var_0, car_0, var_1, car_1)
    if t > 4.5 and v > 1000:
        # Explanation at of section 2, page 3 bottom left.
        return True

    return cdf(t, v) < prob

In [74]:
reject_h0(trace_0, trace_0)

1.0


False