## Preparing SV Dataset

In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pylab as plt

np.set_printoptions(formatter={'int':hex})

import os
files = os.listdir('sv_traces/')

### reads an sv_trace file into 2 structures:
### pcs: a list of 1001 16-bit pc values
### mem: a list of 1001 65542-byte memory vectors
def read_trace(sv_file):
    with open(sv_file, 'rb') as f:
        lines = f.read()
        lines = np.frombuffer(lines, dtype=np.uint8)
        lines = lines.reshape(1001,-1)
        pcs = lines[:, 0:2]
        mem = lines[:, :]
        pcs = [np.uint16(i[1]<<8 | i[0]) for i in pcs]
        return mem, pcs
    
### reads trace data into a dataset of mem-to-pc mappings, where
### mem_current: full memory trace - final memory instance &
### pcs_next: full pcs trace - first pc instance
def read_dataset(file):
    mem_trace = dict_traces[file][0]
    mem_current = mem_trace[0:len(mem_trace)-1]
    pcs_trace = dict_traces[file][1]
    pcs_next = pcs_trace[1:]
    return mem_current, pcs_next

### dict_traces[file][0]: full memory trace of <file>
### dict_traces[file][1]: full pc trace of <file>
dict_traces = {}
for file in files[0:20]:
    dict_traces[file] = read_trace(os.path.join('sv_traces/', file))
    
### dict_dataset[file][0]: predecessor-memory trace of <file>
### dict_dataset[file][1]: successor-pc trace of <file>
dict_dataset = {}
for trace in dict_traces:
    dict_dataset[trace] = read_dataset(trace)

In [2]:
dict_traces

{'167:149:9.svbinttrc': (array([[0x0, 0x2, 0x0, ..., 0x2, 0x0, 0x0],
         [0x1, 0x2, 0x0, ..., 0x2, 0x0, 0x0],
         [0x3, 0x2, 0x0, ..., 0x2, 0x0, 0x0],
         ...,
         [0x3, 0x2, 0x7e, ..., 0x2, 0x0, 0x0],
         [0x5, 0x2, 0x14, ..., 0x2, 0x0, 0x0],
         [0x7, 0x2, 0x14, ..., 0x2, 0x0, 0x0]], dtype=uint8),
  [512,
   513,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   52

In [3]:
dict_dataset

{'167:149:9.svbinttrc': (array([[0x0, 0x2, 0x0, ..., 0x2, 0x0, 0x0],
         [0x1, 0x2, 0x0, ..., 0x2, 0x0, 0x0],
         [0x3, 0x2, 0x0, ..., 0x2, 0x0, 0x0],
         ...,
         [0x10, 0x2, 0x7e, ..., 0x2, 0x0, 0x0],
         [0x3, 0x2, 0x7e, ..., 0x2, 0x0, 0x0],
         [0x5, 0x2, 0x14, ..., 0x2, 0x0, 0x0]], dtype=uint8),
  [513,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   5

## Defining Neural Net

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

In [5]:
def format_label(l, width=16):
    val = bin(l)[2:]
    N = len(val)
    return [0.] * (width-N) + [float(i) for i in val]

In [6]:
features, labels = dict_dataset[list(dict_dataset.keys())[0]]
for file in list(dict_dataset.keys())[1:]:
    features = np.append(features, dict_dataset[file][0], axis=0)
    labels= np.append(labels, dict_dataset[file][1], axis=0)

In [7]:
print(features.shape[0]) # num_files * 1000 (rows/file)
print(labels.shape[0]) # //

20000
20000


In [8]:
print(len(dict_dataset.keys())) # num_files
print(len(dict_dataset.keys()) * 1000) # total dataset points

20
20000


In [59]:
features, labels = dict_dataset[list(dict_dataset.keys())[0]]
#features = features - features.mean(axis=0)
# temp
features_temp = features#[9:15, ]
labels_temp = labels#[9:15]
# end temp

net = nn.Sequential(
    nn.Linear(features.shape[1], 2**12),
    nn.ReLU(), #rectified linear unit
    nn.Linear(2**12, 2**10),
    nn.ReLU(),
    nn.Linear(2**10, 2**10), # new
    nn.ReLU(), # new
    #nn.Linear(2**5, 2**10),
    #nn.ReLU(),
    nn.Linear(2**10, 16),
    nn.Sigmoid()
)

criterion = nn.BCELoss()

optimizer = optim.Adam(net.parameters(), lr=1e-4)

def train(n_epochs, net, features, labels, criterion, optimizer, freq=1, debug=False):
    for i in range(n_epochs): #number of passes over full dataset
        #step 1: get features and use net to make predictions
        preds = net(torch.from_numpy(features).float())
        #print(preds)
        #print(preds.shape)
        
        #step 2: compute loss/error
        labels_torch = torch.tensor([format_label(l) for l in labels])
        #print(labels_torch)
        #print(labels[10], labels_torch[10])

        if debug: print('\n------')
        if debug: print(preds)
        loss = criterion(preds, labels_torch)
        if i % freq == 0:
            print('epoch:', i, 'loss:', loss)

        #step 3: backprop to update weights
        # compute gradients/derivatives - backprop
        # use gradients to update weights - gradient descent - w = w - 0.1 * deriv. loss w.r.t. w

        optimizer.zero_grad() #set previous buffers to zero
        loss.backward() #backprop
        optimizer.step() #update weights        
        
    return net

In [60]:
labels_temp

[513,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526,
 527,
 528,
 515,
 517,
 519,
 520,
 521,
 522,
 523,
 524,
 525,
 526

### Training NN

In [61]:
optimizer = optim.Adam(net.parameters(), lr=1e-3)
net = train(30, net, features_temp, labels_temp, criterion, optimizer, debug=False)

epoch: 0 loss: tensor(0.6610, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 1 loss: tensor(0.3051, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 2 loss: tensor(1.3845, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 3 loss: tensor(0.5389, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 4 loss: tensor(0.2762, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 5 loss: tensor(0.2896, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 6 loss: tensor(0.2592, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 7 loss: tensor(0.2656, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 8 loss: tensor(0.2362, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 9 loss: tensor(0.2004, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 10 loss: tensor(0.1929, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 11 loss: tensor(0.1989, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 12 loss: tensor(0.1934, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 13 loss: tensor(0.1955, grad_fn=<BinaryCrossEntropyBackward0>)
epoch: 14 loss: tensor(0.1907,

In [12]:
np.abs(features[5, :]-features[0,:]).sum()

434.0

In [13]:
np.abs(features_temp[0, :]-features_temp[1,:]).sum()

4.0

In [14]:
net(torch.from_numpy(features_temp[0]).float())

tensor([1.0883e-06, 9.8613e-07, 9.3474e-07, 9.9733e-07, 9.8962e-07, 1.0277e-06,
        1.0000e+00, 1.0398e-06, 1.0926e-06, 9.3731e-07, 1.1336e-06, 8.9215e-07,
        9.6516e-07, 4.9837e-01, 4.1206e-01, 1.0000e+00],
       grad_fn=<SigmoidBackward0>)

In [15]:
net(torch.from_numpy(features_temp[1]).float())

tensor([1.1826e-06, 1.0709e-06, 1.0165e-06, 1.0828e-06, 1.0776e-06, 1.1155e-06,
        1.0000e+00, 1.1316e-06, 1.1868e-06, 1.0203e-06, 1.2330e-06, 9.7203e-07,
        1.0505e-06, 5.0247e-01, 4.0923e-01, 1.0000e+00],
       grad_fn=<SigmoidBackward0>)

In [16]:
torch.bernoulli(net(torch.from_numpy(features[0]).float()))

tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1.],
       grad_fn=<BernoulliBackward0>)

In [17]:
format_label(labels_temp[0])

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0]

In [18]:
format_label(labels_temp[1])

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0]

In [55]:
labels_temp

[525, 526, 527, 528, 515, 517]

In [57]:
features_temp[1][0:2]

array([0xd, 0x2], dtype=uint8)

In [58]:
features_temp[1][524+8:524+8+12]


array([0xea, 0xea, 0xea, 0xea, 0x4c, 0x3, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0],
      dtype=uint8)

In [46]:
hex(526)

'0x20e'