# TVLA using Hamming Weights

In [1]:
import numpy as np
from tqdm import tqdm

from src.data.slices import tvla_slice
from src.data.traceloader import AscadRandomKey

In [2]:
ascad = AscadRandomKey()

In [3]:
from src.tvla.t import make_t_test

def prepare_hw(make_test, samples):
    """
    Prepares traces for TVLA using Hamming weights as a key distance measure.
    """
    target = samples.filter_by_hw(True)
    rest = samples.filter_by_hw(False)

    slc_target = tvla_slice(target)
    slc_rest = tvla_slice(rest)

    def means(slc):
        return np.array([(s.mean(axis=0), s.var(axis=0)) for s in slc], dtype=np.float128)

    slc_mv = np.moveaxis(np.array([means(slc_target), means(slc_rest)]), 2, 3)

    test = make_test(slc_mv.shape[2])

    return test, slc_mv

In [4]:
def device_fails(left: np.array, right: np.array, p: float):
    return np.array(((left < p) * (right < p))).any()


def tvla(test, left, right, p=.1, debug=False):
    """
    Applies some given statistical test against the given samples.

    @param test: statistical test function that takes two samples and returns test values.
    @param left: Four sample sets containing s samples representing a power trace of length t.
    @param right: Two sample sets containing s samples representing a power trace of length t.
    @param p: Decide whether a device fails using (100 * p)%-confidence interval.
    @param debug: Set to True if this function should throw Exceptions upon improperly structured input.
    @return: Whether the device fails on (A against A, A against B).
    """
    if debug:
        assert len(left) == 4
        assert len(right) == 4

    # Four different samples from distribution A.
    a, b, c, d = left
    # Two different samples from distribution B. x and y are compared to a and b, respectively.
    _, _, x, y = right

    # Test A against A.
    aa1 = test(a, c)
    aa2 = test(b, d)

    # Test A against B.
    ab1 = test(a, x)
    ab2 = test(b, y)

    if debug:
        print("Median p-value, A vs. A: ", np.median((aa1, aa2)))
        print("Median p-value, A vs. B: ", np.median((ab1, ab2)))

    # This value for t lets p% of the devices fail.
    return device_fails(aa1, aa2, p), device_fails(ab1, ab2, p)

### Tackling varying p-values

The range of $p$-values for which A vs. A fails (and A vs. B does not) in the non-randomized dataset varies. It does not always overlap,
therefore there is no one fixed $p$-value for which leakage is detected.

#### Solution 1

Find the lowest $p$-value for which A vs. A fails.

In [5]:
def lowest_fail(test, slices):
    pv = 0.
    step = 1
    while step > 10 ** -10:
        step /= 10

        while not tvla(test, slices[0], slices[1], p=pv)[0]:
            pv += step

        pv -= step

    return pv + step

And run for 1000 times:

In [6]:
def run_lowest_fail(times, debug=False):
    acc_tp, acc_fp = 0, 0

    for _ in tqdm(range(times)):
        default_test, default_slices = prepare_hw(make_t_test, ascad.default.profile)
        random_test, random_slices = prepare_hw(make_t_test, ascad.random.profile)

        p_value = lowest_fail(default_test, default_slices)

        if debug:
            print(f"p-value: {p_value}\n")

        tvla_default = tvla(default_test, default_slices[0], default_slices[1], p=p_value, debug=debug)
        if debug:
            print("Default TVLA:", tvla_default, "\n")

        tvla_random = tvla(random_test, random_slices[0], random_slices[1], p=p_value, debug=debug)
        if debug:
            print("Random TVLA:", tvla_random, "\n")

        acc_tp += tvla_default[0] * (not tvla_default[1])
        acc_fp += tvla_random[0] * (not tvla_random[1])

    return acc_tp, acc_fp

run_lowest_fail(1000)

  return np.array([traces[s] for s in np.array_split(select, 4)])
  1%|          | 11/1000 [00:21<32:41,  1.98s/it]


KeyboardInterrupt: 

Example result for 1000 runs: `(427, 65)`

#### Solution 2

Taking more than 1 random slice. `TODO`
