## Preparing SV Dataset

In [2]:
%matplotlib inline

import numpy as np
import matplotlib.pylab as plt

np.set_printoptions(formatter={'int':hex})

import os
files = os.listdir('sv_traces/')

### reads an sv_trace file into 2 structures:
### pcs: a list of 1001 16-bit pc values
### mem: a list of 1001 65542-byte memory vectors
def read_trace(sv_file):
    with open(sv_file, 'rb') as f:
        lines = f.read()
        lines = np.frombuffer(lines, dtype=np.uint8)
        lines = lines.reshape(1001,-1)
        pcs = lines[:, 0:2]
        mem = lines[:, 2:]
        pcs = [np.uint16(i[1]<<8 | i[0]) for i in pcs]
        return mem, pcs
    
### reads trace data into a dataset of mem-to-pc mappings, where
### mem_current: full memory trace - final memory instance &
### pcs_next: full pcs trace - first pc instance
def read_dataset(file):
    mem_trace = dict_traces[file][0]
    mem_current = mem_trace[0:len(mem_trace)-1]
    pcs_trace = dict_traces[file][1]
    pcs_next = pcs_trace[1:]
    return mem_current, pcs_next

### dict_traces[file][0]: full memory trace of <file>
### dict_traces[file][1]: full pc trace of <file>
dict_traces = {}
for file in files[0:20]:
    dict_traces[file] = read_trace(os.path.join('sv_traces/', file))
    
### dict_dataset[file][0]: predecessor-memory trace of <file>
### dict_dataset[file][1]: successor-pc trace of <file>
dict_dataset = {}
for trace in dict_traces:
    dict_dataset[trace] = read_dataset(trace)

In [3]:
dict_traces

{'167:149:9.svbinttrc': (array([[0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         ...,
         [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x14, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x14, 0x0, 0x0, ..., 0x2, 0x0, 0x0]], dtype=uint8),
  [512,
   513,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   52

In [4]:
dict_dataset

{'167:149:9.svbinttrc': (array([[0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         ...,
         [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x14, 0x0, 0x0, ..., 0x2, 0x0, 0x0]], dtype=uint8),
  [513,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   52

## Defining Neural Net

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
def format_label(l, width=16):
    val = bin(l)[2:]
    N = len(val)
    return [0.] * (width-N) + [float(i) for i in val]

In [7]:
features, labels = dict_dataset[list(dict_dataset.keys())[0]]
features = features - features.mean(axis=0)

net = nn.Sequential(
    nn.Linear(features.shape[1], 2**12),
    nn.ReLU(), #rectified linear unit
    nn.Linear(2**12, 2**12),
    nn.ReLU(),
    nn.Linear(2**12, 2**12),
    nn.ReLU(),
    nn.Linear(2**12, 16),
    nn.Sigmoid()
)

criterion = nn.BCELoss()

optimizer = optim.Adam(net.parameters(), lr=1e-2)

def train(n_epochs, net, features, labels, criterion, optimizer, freq=1, debug=False):
    for i in range(n_epochs): #number of passes over full dataset
        #step 1: get features and use net to make predictions
        preds = net(torch.from_numpy(features).float())
        #print(preds)
        #print(preds.shape)
        
        #step 2: compute loss/error
        labels_torch = torch.tensor([format_label(l) for l in labels])
        #print(labels_torch)
        #print(labels[10], labels_torch[10])

        if debug: print('\n------')
        if debug: print(preds)
        loss = criterion(preds, labels_torch)
        if i % freq == 0:
            print('epoch:', i, 'loss:', loss)

        #step 3: backprop to update weights
        # compute gradients/derivatives - backprop
        # use gradients to update weights - gradient descent - w = w - 0.1 * deriv. loss w.r.t. w

        optimizer.zero_grad() #set previous buffers to zero
        loss.backward() #backprop
        optimizer.step() #update weights        
        
    return net

### Training NN

In [8]:
net = train(100, net, features, labels, criterion, optimizer, debug=False)

epoch: 0 loss: tensor(0.6922, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 1 loss: tensor(11.4625, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 2 loss: tensor(11.4625, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 3 loss: tensor(11.4625, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 4 loss: tensor(11.4625, grad_fn=<BinaryCrossEntropyBackward0>)


KeyboardInterrupt: 