This notebook is responsible for: computing the test vector leakage assessment (tvla) results.

Below are descriptions of the input parameters:

- input_path: The directory containing the dataset that will be used for tvla calculations is specified by input path. Unlike other uses of an "input_parameter," this should include the file name itself. I.e, ensure the path to and including the file is passed.
- target_byte: The attack byte is designated as target byte.
- start_idx: Starting index of the attack window.
- end_idx: Last index of the attack window.
- n: Size of power traces. Ensure the value is equivalent to the size of the.npz input file.
- path_to_save_tvla: Path to save the tvla results to.

In [1]:
import os, sys
from math import sqrt

# setting the utilities required for loading the data
sys.path.append('../utilities/')
from loadDataUtility import *

%load_ext autotime

In [2]:
data_params = {
    "input_path": '../../TripletPower-data/xmega-unmasked/PC1_CB1_TDX1_K1_U_20k/PC1_CB1_TDX1_K1_U_20k_0317.npz', # path to the input file
    "target_byte": 0, # byte on which analysis is to be performed
    "start_idx": 1800, # These values depend on the architecture being tested
    "end_idx": 2800,
    "n": 20000, # This value should be equivalent to the number of power traces in the input file
    "path_to_save_tvla":"../results/unmasked-aes-xmega/test-vector-leakage-assessment/PC1_CB1_TDX1_K1_U_20k/" # path to save csv file containing time and t value
}

# The directory to save the files is created.
model_dir_path = data_params["path_to_save_tvla"]
if not os.path.isdir(model_dir_path):
    os.makedirs(model_dir_path)

In [3]:
def data_info(data):
    """
    This function prints the information of the dataset.
    """
    # loading the dataset
    power_traces, plain_text, key = data['power_trace'], data['plain_text'], data['key']

    print('shape of the power traces: ', power_traces.shape)
    print('shape of the plaintext : ', plain_text.shape)
    print('Ground Truth for the key : ', key)

In [4]:
def load_data_tvla(params):
    """
    This function loads the dataset required.
    """
    print('preparing data ...')
    target_byte = params['target_byte']
    start_idx, end_idx = params["start_idx"], params["end_idx"]
    file_name = params["input_path"]
    
    try:
        train_data_whole_pack = np.load(file_name)
    except OSError:
        print("could not access {}".format(file_name))
        sys.exit()
    else:
        data_info(train_data_whole_pack)

    print('-'*80)
    print('processing data...')
    power_traces, labels, key_byte_value = gen_features_and_labels_256_tvla(train_data_whole_pack,
                                                         target_byte,
                                                        start_idx, end_idx)

    power_traces = power_traces[:params["n"], :]
    labels = labels[:params["n"]]

    print('reshaped power traces: ', power_traces.shape)
    print('shape of the labels: ', labels.shape)

    return power_traces, labels, key_byte_value

In [5]:
def gen_features_and_labels_256_tvla(data, input_target_byte, start_index, end_index):
    """
    This function generates features and labels for the dataset.
    Although similar, this function differs somewhat from the one present in the Step 2.1 notebook.
    It differs from the corresponding function in the NICV notebook as well.
    """
    # loading the dataset
    power_traces, plain_text, key = data['power_trace'], data['plain_text'], data['key']
    
    # Getting the key_byte_value AKA label to split the power traces
    key_byte_value = key[input_target_byte]

    print('generating features and labels for the key byte value: ', key_byte_value)

    labels = []
    for i in range(plain_text.shape[0]):
        text_i = plain_text[i]
        label = aes_internal(int(text_i[input_target_byte]), key_byte_value) #key[i][input_key_byte]
        labels.append(label)

    labels = np.array(labels)
    if not isinstance(power_traces, np.ndarray):
        power_traces = np.array(power_traces)
    power_traces = power_traces[:, start_index:end_index]

    return power_traces, labels, key_byte_value

In [6]:
def calculate_tvla_values(arr):
    '''
    This functions returns the values needed for the test vector leakage assessment calculation.
    '''
    return np.average(arr), np.var(arr, ddof=1), len(arr)

In [7]:
def save_test_vector_leakage_assessment(power_traces, t_vals, str_target_byte, str_key_byte, path_to_save_tvla):
    '''
    This function saves the tvla results into a csv file.
    '''
    # The file name is of the format: "target-byte-x-key-byte-y"
    # The thought is that the parent directories will provide the necessary information as to what this file name represents
    f_name = "target-byte-" + str_target_byte + "-byte-value-" + str_key_byte
    tvla_file_path = os.path.join(path_to_save_tvla, f_name + '.csv')
    
    # data is an iterator of tuples. These tuples contain the time (incremented by 1) and the corresponding t-value.
    data = zip(range(data_params["start_idx"] + 1, data_params["end_idx"] + 1), t_vals)
    tvla_df = pd.DataFrame(data)
    tvla_df.to_csv(tvla_file_path, index=False, header=["time", "t-value"])
    print("Test vector leakage assessment results sucessfully saved to csv file: {}".format(tvla_file_path))

In [8]:
def print_testing_test_vector_leakage_assessment_results(mean, variance, size, list_name, name):
    '''
    This function prints variables associated with tvla calculation.
    '''
    print("\tFor set: {}".format(name))
    print("\t\tSet Elements: {}".format(list_name))
    print("\t\tThe mean is: {}".format(mean))
    print("\t\tThe variance is: {}".format(variance))
    print("\t\tThe size is: {}".format(size))

In [9]:
def compute_test_vector_leakage_assessment(power_traces, labels, key_byte_value, debug=False):
    '''
    This function computes the test vector leakage assessments.
    '''
    t_vals = []
    for j in range(np.shape(power_traces)[1]): # Each column (time sample) of the power_traces array is analyzed.
        curr_power_traces_col = power_traces[:,j]
        Q0 = [] # Lists Q_0 and Q_1 are created.
        Q1 = []
        for k in range(np.shape(curr_power_traces_col)[0]): # Each row of the current power_traces column is analyzed.
            if (labels[k] == key_byte_value): # The key_byte_value will be used to determine the sets.
                Q0.append(curr_power_traces_col[k])
            else:
                Q1.append(curr_power_traces_col[k])
        u0, v0, n0 = calculate_tvla_values(np.array(Q0)) # Once the sets have been created, then calculations are performed.
        u1, v1, n1 = calculate_tvla_values(np.array(Q1))
        t = (u0 - u1)/(sqrt((v0/n0)+(v1/n1))) # The t value is calculated and appended to a list of t_vals.
        t_vals.append(t) # This list contains t_vals for every time sample.
        if debug: # If debug is enabled, additional information will be printed to the screen.
            print("Round {}".format(j+1))
            print_testing_test_vector_leakage_assessment_results(u0, v0, n0, Q0, "Q0")
            print_testing_test_vector_leakage_assessment_results(u1, v1, n1, Q1, "Q1")
            print("\tThe test vector leakage result is: {}".format(t))
    if not(debug): # If debug is disabled, the results will be saved to a file
        print("Saving test vector leakage assessment results to csv file...")
        save_test_vector_leakage_assessment(power_traces, t_vals, 
                                            str(data_params["target_byte"]), str(key_byte_value), data_params["path_to_save_tvla"])

In [10]:
debug = False # Var allows debugging of "toy" examples if necessary.

if debug:
    # The below code represents the toy example provided in the original document.
    # This block was created for testing purposes.
    
    # For debugging purposes, one of the elements of the below list must be passed
    # as the key_byte_value.
    key_byte_values = [0x00, 0x01, 0x02]
    key_byte_value = key_byte_values[0]
    power_traces = np.array([
        [2, 3, 4, 5],
        [6, 4, 6, 8],
        [1, 3, 4, 5],
        [5, 3, 4, 5],
        [3, 3, 5, 6],
        [3, 2, 2, 3]
    ])
    labels = np.array([
        [0x00],
        [0x01],
        [0x02],
        [0x00],
        [0x01],
        [0x02]
    ])
else:
    power_traces, labels, key_byte_value = load_data_tvla(data_params)
compute_test_vector_leakage_assessment(power_traces, labels, key_byte_value, debug)

preparing data ...
shape of the power traces:  (20000, 5000)
shape of the plaintext :  (20000, 16)
Ground Truth for the key :  [149  25 127 102 176 109 110  33 103 138 250 143 135 169 100 229]
--------------------------------------------------------------------------------
processing data...
generating features and labels for the key byte value:  149
reshaped power traces:  (20000, 1000)
shape of the labels:  (20000,)
Saving test vector leakage assessment results to csv file...
Test vector leakage assessment results sucessfully saved to csv file: ../results/unmasked-aes-xmega/test-vector-leakage-assessment/PC1_CB1_TDX1_K1_U_20k/target-byte-0-byte-value-149.csv
