# Lecture 4: Password CPA Attack - Attack

In this example we want to improve the password check again to be resistant against the attack from the last tutorial.

## Improving the code

Let's first recap the password checking loop from the last lecture:
```c
for(uint8_t i = 0; i < sizeof(stored_password); i++)
{
    if (stored_password[i] != passwd[i])
    {
        password_wrong = 1;
    }
}
```

The differences attack discussed in the last example worked because of the different power consumption when executing the code inside the if clause. This is addressed by the following code.

```c
uint8_t password_wrong = 0;
for(uint8_t i = 0; i < sizeof(stored_password); i++)
{
    password_wrong |= stored_password[i] ^ passwd[i];
}
```

This is an excerpt from `4_password_fixed.c`.

In [None]:
import securec
from securec import util
scope, target = util.init()

In [None]:
securec.util.compile_and_flash('./4_password_fixed.c')

In [None]:
import struct
import time
import warnings
import numpy as np

scope.default_setup()

def capture(attempt, samples=500):
    scope.adc.samples = samples
    if isinstance(attempt, str):
        attempt = attempt.encode('iso-8859-1')
    elif isinstance(attempt, int):
        attempt = bytes([attempt])
    traces = []
    scope.arm()
    target.simpleserial_write('p', attempt + b'\x00' * (10 - len(attempt)))
    result = target.simpleserial_read('p', 1)
    return np.array(util.capture()), not bool(result[0])

In [None]:
import math
import pandas as pd
from bokeh.plotting import figure, show 
from bokeh.io import output_notebook
from bokeh.models import CrosshairTool, LinearColorMapper
from bokeh.palettes import Category10_10, Turbo256, Inferno256
from bokeh.models import Span, Label, BoxAnnotation
from bokeh.layouts import column, row
from bokeh.transform import linear_cmap

output_notebook()

## Pearson correlation coefficient
An interesting statistical formula to face this problem is given by the *Pearson correlation coefficient*. For two random variables $X, Y$ it is defined as

$$\rho_{X,Y} := \frac{\mathrm{Cov}(X, Y)}{\sqrt{\mathrm{Var}(X)} \sqrt{\mathrm{Var}(Y)}} \ \in [-1, 1]\,.$$

For two samples of finite length $x = {x_1, ..., x_n}$, $y = {y_1, ..., y_n}$ it can be defined as 

$$r_{x,y} := \frac{\sum_{i=1}^n (x_i - \bar x)(y_i - \bar y)}{\sqrt{\sum_{i=1}^n (x_i - \bar x)^2}\sqrt{\sum_{i=1}^n (y_i - \bar y)^2}} \ \in [-1, 1]\,,$$

where $\bar x := \frac{1}{n} \sum_{i=1}^n x_i$ is the mean of a sample $x$.

## Preparation

In [None]:
import numpy as np

HW = [bin(n).count("1") for n in range(0, 256)]

def hw(n):
    if isinstance(n, str):
        return HW[ord(n)]
    return HW[n]

hw_vec = np.vectorize(hw)

In the later attack we will need to compute the correlation on a pointwise basis, thus we define `pearson_pointwise` as a function taking a $(n, m)$-matrix `traces` and a $n$-sized vector `intermediates` and computes a $m$-sized vector containing the Pearson correlation coefficient for `intermediates` and `traces[:, i]` for all $i=1..m$.

In [None]:
def pearson_pointwise(traces, intermediates):
    intermediates_diff = intermediates - np.mean(intermediates)
    intermediates_sqrt = np.sqrt(np.sum(intermediates_diff ** 2))
    traces_diff = traces - np.mean(traces, axis=0)
    
    return np.sum(traces_diff * intermediates_diff[:, None], axis=0) / (
        np.sqrt(np.sum(traces_diff ** 2, axis=0)) * intermediates_sqrt
    )

In [None]:
import numpy as np

def pearson(x, y):
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    return sum((x - x_mean) * (y - y_mean)) / np.sqrt(sum((x - x_mean) ** 2) * sum((y - y_mean) ** 2))

## Capture

In [None]:
import random
import tqdm
import tqdm.notebook

trace_samples = 500
trace_nums = 1000

traces = []
attempts = []
for _ in tqdm.notebook.tqdm(range(trace_nums)):
    attempt = bytes([random.randint(0, 255) for _ in range(10)])
    traces.append(capture(attempt, samples=trace_samples)[0])
    attempts.append(attempt)
traces = np.array(traces)
attempts = np.array([list(a) for a in attempts])

## Attack

In [None]:
import itertools

def attack_cpa_bestof(attempts, traces, charlist='abcdefghijklmnopqrstuvwxyz'):
    bestfits = []
    for idx in range(8):
        maxpearsons = []
        for guess in charlist:
            maxpearsons.append((max(abs(pearson_pointwise(traces, hw_vec(attempts[:, idx] ^ ord(guess))))), guess))
        maxchars = list(sorted(maxpearsons, reverse=True))
        bestfits.append(maxchars[:4])
        print(idx, maxchars[:4])

    for attempt in tqdm.notebook.tqdm(itertools.product(*map(lambda maxchars: [b[1] for b in maxchars], bestfits))):
        attempt = ''.join(attempt)
        if capture(attempt)[1]:
            return attempt
    
attack_cpa_bestof(attempts, traces)

## Plot correlations

In [None]:
colormap = LinearColorMapper(
    palette='Viridis256',
    low=0,
    high=1,
)

chars = list('abcdefghijklmnopqrstuvwxyz')

for idx in range(8):
    pearsons = [abs(pearson_pointwise(traces, hw_vec(attempts[:, idx] ^ ord(i)))) for i in chars]
    df = pd.DataFrame(pearsons, index=chars)
    df = df.stack().reset_index()
    df.columns=['char', 'point', 'value']


    p = figure(
        y_range=chars, 
        height=400,
        sizing_mode='stretch_width',
        tooltips=[
            ("char", "@char"),
            ("corr", "@value"),
        ],
        title=f'Correlations for guessing position {idx}'
    )

    p.rect(
        width=1,
        height=1,
        source=df,
        x='point',
        y='char',
        fill_color={'field': 'value', 'transform': colormap},
        line_color=None,
    )
    show(p)