This notebook is responsible for: computing the normalized inter-class variance (NICV) results. This notebook is very similar to the notebook used to compute TVLA.

Below are descriptions of the input parameters:

- input_path: The directory containing the dataset that will be used for NICV calculations is specified by input path. Unlike other uses of an "input_parameter," this should include the file name itself. I.e, ensure the path to and including the file is passed.
- target_byte: The attack byte is designated as target byte.
- start_idx: Starting index of the attack window.
- end_idx: Last index of the attack window.
- n: Size of power traces. Ensure the value is equivalent to the size of the .npz input file.
- path_to_save_nicv: Path to save the NICV results to.

In [1]:
import os, sys
from math import sqrt

# setting the utilities required for loading the data
sys.path.append('../utilities/')
from loadDataUtility import *

%load_ext autotime

In [2]:
data_params = {
    "input_path": '../../TripletPower-data/stm32f-unmasked/PC2_CB2_TDS3_K0_U_200k/train_same_key.npz', # path to the input file
    "target_byte": 5, # byte on which analysis is to be performed
    "start_idx": 1200, # These values depend on the architecture being tested
    "end_idx": 2200,
    "n": 200000, # This value should be equivalent to the number of power traces in the input file
    "path_to_save_nicv":"../results/unmasked-aes-stm32f/normalized-inter-class-variance/PC2_CB2_TDS3_K0_U_200k/" # path to save csv file containing time and nicv value
}

# The directory to save the files is created.
model_dir_path = data_params["path_to_save_nicv"]
if not os.path.isdir(model_dir_path):
    os.makedirs(model_dir_path)

In [3]:
def data_info(data):
    """
    This function prints the information of the dataset.
    """
    # loading the dataset
    power_traces, plain_text, key = data['power_trace'], data['plain_text'], data['key']

    print('shape of the power traces: ', power_traces.shape)
    print('shape of the plaintext : ', plain_text.shape)
    print('Ground Truth for the key : ', key)

In [4]:
def load_data_nicv(params):
    """
    This function loads the dataset required.
    """
    print('preparing data ...')
    target_byte = params['target_byte']
    start_idx, end_idx = params["start_idx"], params["end_idx"]
    file_name = params["input_path"]
    
    try:
        train_data_whole_pack = np.load(file_name)
    except OSError:
        print("could not access {}".format(file_name))
        sys.exit()
    else:
        data_info(train_data_whole_pack)

    print('-'*80)
    print('processing data...')
    power_traces, labels = gen_features_and_labels_256_nicv(train_data_whole_pack,
                                                         target_byte,
                                                        start_idx, end_idx)

    power_traces = power_traces[:params["n"], :]
    labels = labels[:params["n"]]

    print('reshaped power traces: ', power_traces.shape)
    print('shape of the labels: ', labels.shape)

    return power_traces, labels

In [5]:
def gen_features_and_labels_256_nicv(data, input_target_byte, start_index, end_index):
    """
    This function generates features and labels for the dataset.
    Although similar, this function differs somewhat from the one present in the Step 2.1 notebook.
    It differs from the corresponding function in the TVLA notebook as well.
    """
    # loading the dataset
    power_traces, plain_text, key = data['power_trace'], data['plain_text'], data['key']
    
    # Getting the key_byte_value AKA label
    key_byte_value = key[input_target_byte]

    print('generating features and labels for the key byte value: ', key_byte_value)

    labels = []
    for i in range(plain_text.shape[0]):
        text_i = plain_text[i]
        label = aes_internal(text_i[input_target_byte], key_byte_value) #key[i][input_key_byte]
        labels.append(label)

    labels = np.array(labels)
    if not isinstance(power_traces, np.ndarray):
        power_traces = np.array(power_traces)
    power_traces = power_traces[:, start_index:end_index]

    return power_traces, labels

In [6]:
def calculate_nicv_values(labels_arr, Y_var):
    '''
    This function computes the nicv values (mean, variance, NICV) of the labels_arr
    '''
    Z = np.zeros((labels_arr.shape[0])) # A 1D array containing the means of each label (row) is instantiated (AKA Z array).
    for i in range(np.shape(labels_arr)[0]): # Each row (power traces with specific label) is iterated through.
        non_zero_elements = labels_arr[i][labels_arr[i] != 0] # The non-zero elements of the current row are saved.
        if not(len(non_zero_elements)): # If there is a label with no power traces, the mean is set to 0.
            Z[i] = 0
        else: # Else, the average of the current row's non-zero elements are calculated.
            Z[i] = np.average(non_zero_elements)
    Z_var = np.var(Z, ddof=1) # The variance of the Z array is calculated.
    return Z_var/Y_var # NICV is returned

In [7]:
def save_NICV(power_traces, NICV_vals, str_target_byte, path_to_save_nicv):
    '''
    This function saves the nicv results to a csv file.
    '''
    # The file name is of the format: "target-byte-x"
    # The thought is that the parent directories will provide the necessary information as to what this file name represents.
    f_name = "target-byte-" + str_target_byte
    nicv_file_path = os.path.join(path_to_save_nicv, f_name + '.csv')
    
    # data is an iterator of tuples. These tuples contain the time (incremented by 1) and the corresponding t-value.
    data = zip(range(1, np.shape(power_traces)[1] + 1), NICV_vals)
    nicv_df = pd.DataFrame(data)
    nicv_df.to_csv(nicv_file_path, index=False, header=["time", "nicv-value"])
    print("Normalized Inter-Class Variance results sucessfully saved to csv file: {}".format(nicv_file_path))

This block will explain a few details about the below code block that is too verbose for a comment:

label_arrs: 
- An array of size 256 x n is instantiated with zeros, where n is equal to the number of power traces. Each row corresponds to a specific label and each column corresponds to a specific power trace (E.g. label_arrs[x][y] are the coordinates that contain the yth power trace of the column whose label is equivalent to label x). Theoretically, it's possbile for every power trace to have the same label, hence why the the number of columns is equal to the number of power traces.
- This array will be updated by inserting elements into positions that overwrite the initial zeros.
- This array is reinitialzied for each column (time sample) as its values are specific to the current column being examined.

inside the "for j..." loop:
- We already know the label of each power trace (row) with labels[j]. So, we could simply insert the current power trace's value into labels_arr at index [j][k] where j is the label of the current row and k is the current row's index. If you recall from the above label_arrs section, [j] represents the label (row) and [k] represents the kth power trace (column).
- E.g. the FIRST power trace of the current column has a value of 9 and a label of 5. At index [5,0] of label_arrs, 9 will be inserted (5 is the label, 0 is the index of the current power trace, 9 is the value).
- The second power trace has a value of 7 and a label of 5. At index [5,1] of label_arrs, 7 will be inserted.

In [8]:
def compute_normalized_inter_class_variance(power_traces, labels, debug=False):
    '''
    This function computes the normalized inter-class variance.
    '''
    NICV_vals = []
    for i in range(np.shape(power_traces)[1]): # Each column (time) of the power_traces array is analyzed.
        curr_power_traces_col = power_traces[:,i]
        var_curr_power_traces_col = np.var(curr_power_traces_col, ddof=1) # The variance of the current column is calculated for NICV.
        labels_arr = np.zeros((256, power_traces.shape[0])) # NOTE: For debugging, replace the "256" with the length of debug key_byte_values (3)
        for j in range(np.shape(curr_power_traces_col)[0]): # Each row of the current power traces column is analyzed.
            labels_arr[labels[j]][j] = curr_power_traces_col[j]
        NICV = calculate_nicv_values(labels_arr, var_curr_power_traces_col)
        NICV_vals.append(NICV)
        
        if debug: # If debug is enabled, additional information will be printed to the screen.
            print("Round {}".format(i+1))
            print("\tThe nicv result is: {}".format(NICV))
    if not(debug):
        print("Saving test vector leakage assessment results to csv file...")
        save_NICV(power_traces, NICV_vals, str(data_params["target_byte"]), data_params["path_to_save_nicv"])

In [9]:
debug = False # Var allows debugging of "toy" examples if necessary.

if debug:
    # The below code represents the toy example provided in the original document.
    # This block was created for testing purposes.
    key_byte_values = [0x00, 0x01, 0x02]
    key_byte_value = key_byte_values[0]
    power_traces = np.array([
        [2, 3, 4, 5],
        [6, 4, 6, 8],
        [1, 3, 4, 5],
        [5, 3, 4, 5],
        [3, 3, 5, 6],
        [3, 2, 2, 3]
    ])
    labels = np.array([
        0x00,
        0x01,
        0x02,
        0x00,
        0x01,
        0x02
    ])
else:
    power_traces, labels = load_data_nicv(data_params)
compute_normalized_inter_class_variance(power_traces, labels, debug)

preparing data ...
shape of the power traces:  (200000, 5000)
shape of the plaintext :  (200000, 16)
Ground Truth for the key :  [ 43 126  21  22  40 174 210 166 171 247  21 136   9 207  79  60]
--------------------------------------------------------------------------------
processing data...
generating features and labels for the key byte value:  174
reshaped power traces:  (200000, 1000)
shape of the labels:  (200000,)
Saving test vector leakage assessment results to csv file...
Normalized Inter-Class Variance results sucessfully saved to csv file: ../results/unmasked-aes-stm32f/normalized-inter-class-variance/PC2_CB2_TDS3_K0_U_200k/target-byte-5.csv
