# General trace loader
Interface for loading various traces. Tasks include:
- Resolving class imbalance (TODO)
- Noise reduction (TODO)
- General trace format (DOING)

In [1]:
class TraceLoader:
    def __init__(self, trace_data):
        pass

In [2]:
from settings.ascad import ASCADData
from collections import Counter
import pandas as pd

ascad_h5 = ASCADData.random_key()

In [3]:
import numpy as np

class TraceGroup:
    def __init__(self, trace_group, trace_range):
        self.t_range = trace_range

        self.traces = trace_group["traces"]
        self.labels = trace_group["labels"]

        self.tk_cache = {}
        self.ct_cache = {}

    def filter_traces(self, label):
        if label not in self.tk_cache:
            ixs = np.where(np.array(self.labels) == label)[0]
            self.tk_cache[label] = np.array(self.traces[ixs])

        return self.tk_cache[label]

    def contingency_table(self, label):
        """
        Builds a contingency table from traces from the dataset for a given label.

        :param label: the label for which traces the contingency table will be build.
        :param limit: limits the number of traces on which the contingency table is based.
        :return: the contingency table as a numpy array.
        """
        if label not in self.ct_cache:
            df = pd.DataFrame([Counter(bins) for bins in self.filter_traces(label)])
            res = df.sum().sort_index().reindex(self.t_range, fill_value=0).values
            self.ct_cache[label] = np.array(res, dtype=int)

        return self.ct_cache[label]


In [4]:
prof_group = TraceGroup(ascad_h5["Profiling_traces"], ASCADData.data_range)
prof_group.traces.shape

(200000, 1400)

In [5]:
att_group = TraceGroup(ascad_h5["Attack_traces"], ASCADData.data_range)
att_group.traces.shape

(100000, 1400)

In [6]:
prof_group.filter_traces(1).shape

(768, 1400)

In [7]:
from settings.nbloader import NotebookLoader
nb = NotebookLoader().load_module("tvla.classic_la")

importing Jupyter notebook from None


TypeError: expected str, bytes or os.PathLike object, not NoneType

### $t$ test
The aim of a t-test is to provide a quantitative value as a probability that the mean μ of two sets are different.

For the $t$-test, high $p$ values give reason to reject H0 being "left and right are from different distributions". The traces should be normally distributed for the
    Student's t-test to work.


In [None]:
print("exact: ", nb.calc_t([prof_group.contingency_table(1), prof_group.contingency_table(1)]))
print("same:  ", nb.calc_t([prof_group.contingency_table(1), att_group.contingency_table(1)]))
print("diff:  ", nb.calc_t([prof_group.contingency_table(1), att_group.contingency_table(2)]))

### $\chi^2$ test

For $\chi^2$, small p-values give reason to reject $H_0$ =
"the occurrences of these observations are independent".
For $t$, large p-values give reason to reject $H_0$ =
"the samples in both sets are drawn from the same population".

In [None]:
print("exact: ", nb.calc_chi([prof_group.contingency_table(1), prof_group.contingency_table(1)]))
print("same:  ", nb.calc_chi([prof_group.contingency_table(1), att_group.contingency_table(1)]))
print("diff:  ", nb.calc_chi([prof_group.contingency_table(1), att_group.contingency_table(2)]))