In [6]:
import numpy as np
from sklearn import datasets
from scipy.stats import f

from src.trace_set.database import Database
from src.trace_set.set_hw import TraceSetHW
from src.trace_set.transform import fixed_fixed
from src.tvla.tvla import prepare_tvla

In [None]:
def two_sample_t2_test(x, y):
    nx, p = x.shape
    ny, _ = y.shape

    delta = np.mean(x, axis=0) - np.mean(y, axis=0)

    sx = np.cov(x, rowvar=False)
    sy = np.cov(y, rowvar=False)

    s_pooled = ((nx - 1) * sx + (ny - 1) * sy) / (nx + ny - 2)
    t_squared = (nx * ny) / (nx + ny) * np.matmul(np.matmul(delta.transpose(), np.linalg.inv(s_pooled)), delta)

    statistic = t_squared * (nx + ny - p - 1) / (p * (nx + ny - 2))

    f_value = f(p, nx + ny - p - 1)
    p_value = 1 - f_value.cdf(statistic)

    print(f"Test statistic: {statistic}\nDegrees of freedom: {p} and {nx + ny - p - 1}\np-value: {p_value}")

    return statistic, p_value

iris = datasets.load_iris()
versicolor = iris.data[iris.target == 1, :2]
virginica = iris.data[iris.target == 2, :2]
two_sample_t2_test(versicolor, virginica)

## Test statistic: 15.82660099191812
## Degrees of freedom: 2 and 97
## p-value: 1.1259783253558808e-06

In [23]:
TRACE_SET = TraceSetHW(Database.ascad_none)
X, Y = prepare_tvla(*TRACE_SET.profile())

X_LIM = X
Y_RANDOM = Y.copy()
np.random.shuffle(Y_RANDOM)

A, B = X_LIM[~Y], X_LIM[Y]
A_RAND, B_RAND = X_LIM[~Y_RANDOM], X_LIM[Y_RANDOM]

two_sample_t2_test(A, B)
two_sample_t2_test(A_RAND, B_RAND)

Test statistic: 285.22198236337806
Degrees of freedom: 1977 and 143026
p-value: 1.1102230246251565e-16
Test statistic: 1.0009020559617412
Degrees of freedom: 1977 and 143026
p-value: 0.4846278378517652


(1.0009020559617412, 0.4846278378517652)