# ASCAD with countermeasures

In [8]:
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from scipy.stats import pearsonr
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.python.keras.optimizer_v2.adam import Adam
from tqdm.keras import TqdmCallback

from src.dlla.hw import NUM_CLASSES, plot_predictions, dlla_hw, dlla_p_gradient, prepare_traces_dl
from src.dlla.wegener import make_mlp_wegener, wegener_p_gradient
from src.tools.lists import random_divide, randomize
from src.tools.plotter import init_plots, plot_p_gradient, PALETTE_GRADIENT, store_sns
from src.trace_set.database import Database
from src.trace_set.pollution import Pollution, PollutionType
from src.trace_set.set_hw import TraceSetHW
from src.trace_set.transform import reduce_fixed_fixed
from src.tvla.tvla import Tvla

init_plots()

TRACE_LENGTH = 1400

In [9]:
# ERROR = 'delay, $\sigma = 0.125$'
ERROR = 'jitter, $\lambda$ = 0.75'

POLL_TYPES = {
    'delay, $\sigma = 0.125$': Pollution(PollutionType.delay, .05),
    'jitter, $p$ = 0.1': Pollution(PollutionType.jitter, .75)
}

ascad = TraceSetHW(Database.ascad).open('r')
TRACES, HW = np.array(ascad['profile/traces']), np.array(ascad['profile/hw'])[:200000]
TRACES_ATT, HW_ATT = np.array(ascad['attack/traces']), np.array(ascad['attack/hw'])

X9, Y9, X9_ATT, Y9_ATT = prepare_traces_dl(TRACES, HW, TRACES_ATT, HW_ATT)
(X2, Y2), (X2_ATT, Y2_ATT) = reduce_fixed_fixed(X9, Y9), reduce_fixed_fixed(X9_ATT, Y9_ATT)

ascad.close()

### Model creation

With model hyper-parameters to be optimized.

In [10]:
def make_mlp(x, y, x_attack, y_attack, params):
    mdl = Sequential()
    mdl.add(Dense(100, activation=params['activation'], input_shape=(x.shape[1],)))
    mdl.add(Dense(100, activation=params['activation']))
    mdl.add(Dense(100, activation=params['activation']))
    mdl.add(Dense(100, activation=params['activation']))
    mdl.add(Dense(NUM_CLASSES, activation='softmax'))

    mdl.compile(optimizer=params['optimizer'], loss=params['losses'], metrics=['accuracy'])

    out = mdl.fit(x, y, shuffle=True, validation_data=(x_attack, y_attack), batch_size=params['batch_size'],
                  epochs=params['epochs'], verbose=False, callbacks=[TqdmCallback(verbose=0)])

    return out, mdl

### Brute-force Hyper parameter optimization

Talos brute-force scan for optimal hyperparameters

In [11]:
# import talos
#
# scan_obj = talos.Scan(x=x_prof,    y=y_prof,    x_val=x_att,    y_val=y_att,    model=make_mlp,    params=model_parameters,    experiment_name=".cache/talos", print_params=True)

### Fit model based on the optimal model found by Talos.

In [12]:
NUM_PROFILING = 50000

best_model = make_mlp(X9[:NUM_PROFILING], Y9[:NUM_PROFILING], X9_ATT, Y9_ATT, {
    'activation':'relu',
    'optimizer': Adam(learning_rate=0.001),
    'losses': 'categorical_crossentropy',
    'batch_size': 150,
    'epochs': 5
})[1]

0epoch [00:00, ?epoch/s]

### HW Prediction

Predict the hamming weight by taking the weighted mean for the predicted probabilities for each class.

#### Example: Some prediction

Probabilities for each class:

In [13]:
best_model.predict(X9_ATT[:1])[0]

array([0.00131694, 0.01984817, 0.09218685, 0.18328694, 0.29731563,
       0.28010947, 0.0969699 , 0.02709987, 0.00186622], dtype=float32)

**Predicted hamming weight label**, calculated by taking the weighted mean using the predicted probabilities.

In [14]:
np.sum(best_model.predict(X9_ATT[:1])[0] * range(8 + 1))

4.130340748466551

In [15]:
dlla_hw(best_model, X9_ATT, Y9_ATT)

(0.471489353630537, 0.4148896239744506)

### Plot p-gradient.

In [16]:
def split_traces(x, y):
    a = x[np.where(y < 4)]
    b = x[np.where(y > 4)]

    min_len = min(len(a), len(b))
    return a[:min_len], b[:min_len]

A_ATT, B_ATT = split_traces(TRACES_ATT, HW_ATT)

In [17]:
MAX_LEN = round(len(A_ATT) / 2)
TVLA_AB = Tvla(A_ATT.shape[1])
TVLA_AB.add(A_ATT[:MAX_LEN], B_ATT)
TVLA_BB = Tvla(TRACE_LENGTH)
TVLA_BB.add(*random_divide(B_ATT))


Computing Central Moments:   0%|          | 0/6 [00:00<?, ?it/s][A
Computing Central Moments:  67%|██████▋   | 4/6 [00:00<00:00,  9.20it/s][A
Computing Central Moments:  83%|████████▎ | 5/6 [00:00<00:00,  5.83it/s][A
Computing Central Moments: 100%|██████████| 6/6 [00:01<00:00,  5.32it/s][A

Computing Central Moments:   0%|          | 0/6 [00:00<?, ?it/s][A
Computing Central Moments:  17%|█▋        | 1/6 [00:00<00:01,  4.53it/s][A
Computing Central Moments:  50%|█████     | 3/6 [00:00<00:00,  9.27it/s][A
Computing Central Moments:  83%|████████▎ | 5/6 [00:01<00:00,  2.45it/s][A
Computing Central Moments: 100%|██████████| 6/6 [00:02<00:00,  2.46it/s][A

Computing min-p gradients:   0%|          | 0/3 [00:00<?, ?it/s][A
Computing min-p gradients:  33%|███▎      | 1/3 [00:16<00:32, 16.05s/it][A
Computing min-p gradients:  67%|██████▋   | 2/3 [00:38<00:19, 19.90s/it][A
Computing min-p gradients: 100%|██████████| 3/3 [01:14<00:00, 24.91s/it][A

Computing Central Moments:   0%|

ValueError: could not broadcast input array from shape (1000,) into shape (1400,)

In [None]:
plot_p_gradient(dict([(f"$\\mu_{{{d}}}$", TVLA_AB.p_gradient(d)) for d in range(1, 4)]),
                "TVLA performance, first 3 statistical moment orders\nMasked AES with 1400 sample points", palette=PALETTE_GRADIENT)

In [None]:
G = TVLA_AB.plot_p_values(2)
store_sns(G, "p-values-ascad")

In [None]:
sns.lineplot(data=np.bincount(np.round(TVLA_AB.min_p[2] * 1000).astype(int), minlength=100))

In [None]:
MAX_WEIGHTS = np.max(best_model.layers[0].get_weights()[0], axis=1)

sns.lineplot(data=MAX_WEIGHTS)
AX2 = plt.twinx()
G = sns.lineplot(data=TVLA_AB.min_p[2], ax=AX2, color=sns.color_palette()[1], alpha=.5)
G.set(yscale="log")
G.invert_yaxis()

In [None]:
print("p-value for uncorrelated system producing these datasets")
pearsonr(MAX_WEIGHTS, TVLA_AB.min_p[2])[1]

In [None]:
plot_p_gradient({
    "A vs. B": TVLA_AB.p_gradient(2),
    "FP check": TVLA_BB.p_gradient(2)
}, "TVLA performance validation ($t$-test for $\\mu_2$ with min-$p$)\nMasked AES with 1400 sample points",
    palette=PALETTE_GRADIENT,
    file_name="tvla-validation-mu2")

PG_LEN = len(TVLA_AB.p_gradient(2))

In [None]:
PG_DLLA_9 = dlla_p_gradient(best_model, X9_ATT, Y9_ATT)["A vs. B"]
PG_DLLA_9_RANDOM = dlla_p_gradient(best_model, X9_ATT, randomize(Y9_ATT))["A vs. B"]

plot_p_gradient({
    "A vs. B": np.array(PG_DLLA_9),
    "FP check": np.array(PG_DLLA_9_RANDOM),
}, "DL-LA performance validation (9-class)\nMasked AES with 1400 sample points",
    palette=PALETTE_GRADIENT,
    file_name="dlla-9-validation"
)

In [None]:
X_PROF_2, Y_PROF_2 = reduce_fixed_fixed(X9, Y9)
X_ATT_2, Y_ATT_2 = reduce_fixed_fixed(X9_ATT, Y9_ATT)
Y_ATT_2_RANDOM = randomize(Y_ATT_2)

In [None]:
MODEL_WEGENER_AB = make_mlp_wegener(X_PROF_2[:NUM_PROFILING], Y_PROF_2[:NUM_PROFILING], X_ATT_2, Y_ATT_2)

PG_DLLA_2_AB = wegener_p_gradient(MODEL_WEGENER_AB, X_ATT_2[:PG_LEN], Y_ATT_2[:PG_LEN])
PG_DLLA_2_RANDOM =  wegener_p_gradient(MODEL_WEGENER_AB, X_ATT_2[:PG_LEN], Y_ATT_2_RANDOM[:PG_LEN])

plot_p_gradient({
    "A vs. B": np.array(PG_DLLA_2_AB),
    "FP check": np.array(PG_DLLA_2_RANDOM),
}, "DL-LA performance validation (Wegener)\nMasked AES with 1400 sample points",
    palette=PALETTE_GRADIENT,
    file_name="dlla-2-validation"
)

### Experiment - performance using t-test distinguisher

In [None]:
# PG_DLLA_2_AB_T = wegener_t_test_p_gradient(MODEL_WEGENER_AB, X_ATT_2[:PG_LEN], Y_ATT_2[:PG_LEN])
# PG_DLLA_2_RANDOM_T = wegener_t_test_p_gradient(MODEL_WEGENER_AB, X_ATT_2[:PG_LEN], Y_ATT_2_RANDOM[:PG_LEN])

# plot_p_gradient({
#     "A vs. B": np.array(PG_DLLA_2_AB_T),
#     "FP check": np.array(PG_DLLA_2_RANDOM_T),
# }, "DL-LA performance validation (Wegener) on ASCAD.\nEvaluation using $t$-test",
#     palette=PALETTE_GRADIENT,
#     file_name="dlla-2-validation"
# )

# plot_p_gradient({
#     "Binomial test": np.array(PG_DLLA_2_AB[:4000]),
#     "t-test": np.array(PG_DLLA_2_AB_T[:4000]),
# }, "DL-LA performance (Wegener) on ASCAD.\nEvaluation using different statistical tests\n")

# plot_p_gradient({
#     "DL-LA (Wegener, binomial test)": np.array(PG_DLLA_2_AB),
#     "TVLA $\\mu_2$": TVLA_AB.p_gradient(2)
# },"LA method performance\nmasked AES with 1400 sample points\n")

In [None]:
plot_p_gradient({
    "DL-LA (9-class)": np.array(PG_DLLA_9),
    "DL-LA (Wegener)": np.array(PG_DLLA_2_AB),
    "TVLA $\\mu_2$ (min-$p$)": TVLA_AB.p_gradient(2)
},"LA method performance\nmasked AES with 1400 sample points",
    file_name="ascad-default-all"
)