In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pylab as plt

np.set_printoptions(formatter={'int':hex})

import os
files = os.listdir('sv_traces/')

In [2]:
### reads an sv_trace file into 2 structures:
### pcs: a list of 1001 16-bit pc values
### mem: a list of 1001 65542-byte memory vectors
def read_trace(sv_file):
    with open(sv_file, 'rb') as f:
        lines = f.read()
        lines = np.frombuffer(lines, dtype=np.uint8)
        lines = lines.reshape(1001,-1)
        pcs = lines[:, 0:2]
        mem = lines[:, 2:]
        pcs = [np.uint16(i[1]<<8 | i[0]) for i in pcs]
        return mem, pcs

In [3]:
### dict_traces[file][0]: full memory trace of <file>
### dict_traces[file][1]: full pc trace of <file>
dict_traces = {}
for file in files[0:20]:
    print(file)
    dict_traces[file] = read_trace(os.path.join('sv_traces/', file))

167:149:9.svbinttrc
121:187:13.svbinttrc
144:101:13.svbinttrc
91:117:3.svbinttrc
308:221:8.svbinttrc
387:118:13.svbinttrc
391:23:13.svbinttrc
103:2:11.svbinttrc
387:177:6.svbinttrc
221:53:2.svbinttrc
347:229:9.svbinttrc
245:222:0.svbinttrc
317:48:5.svbinttrc
151:96:12.svbinttrc
232:123:4.svbinttrc
276:162:5.svbinttrc
21:209:8.svbinttrc
322:139:3.svbinttrc
150:162:8.svbinttrc
117:148:3.svbinttrc


In [4]:
dict_traces

{'167:149:9.svbinttrc': (array([[0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         ...,
         [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x14, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
         [0x14, 0x0, 0x0, ..., 0x2, 0x0, 0x0]], dtype=uint8),
  [512,
   513,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   526,
   527,
   528,
   515,
   517,
   519,
   520,
   521,
   522,
   523,
   524,
   525,
   52

In [5]:
### reads trace data into a dataset of mem-to-pc mappings, where
### mem_current: full memory trace - final memory instance &
### pcs_next: full pcs trace - first pc instance
def read_dataset(file):
#    for trace in dict_traces:
        mem_trace = dict_traces[file][0]
        mem_current = mem_trace[0:len(mem_trace)-1]
        pcs_trace = dict_traces[file][1]
        pcs_next = pcs_trace[1:]
        return mem_current, pcs_next

In [6]:
### dict_dataset[file][0]: predecessor-memory trace of <file>
### dict_dataset[file][1]: successor-pc trace of <file>
dict_dataset = {}
for trace in dict_traces:
    dict_dataset[trace] = read_dataset(trace)

In [7]:
dict_dataset['167:149:9.svbinttrc'][0][0] # mem at instant 0
#dict_dataset['167:149:9.svbinttrc'][1][0] # pc at instant 1

array([0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0], dtype=uint8)

In [8]:
dict_dataset['167:149:9.svbinttrc'][1][0] # pc at instant 1

513

## Example Neural Net

In [9]:
list(dict_dataset.keys()) # sv files

['167:149:9.svbinttrc',
 '121:187:13.svbinttrc',
 '144:101:13.svbinttrc',
 '91:117:3.svbinttrc',
 '308:221:8.svbinttrc',
 '387:118:13.svbinttrc',
 '391:23:13.svbinttrc',
 '103:2:11.svbinttrc',
 '387:177:6.svbinttrc',
 '221:53:2.svbinttrc',
 '347:229:9.svbinttrc',
 '245:222:0.svbinttrc',
 '317:48:5.svbinttrc',
 '151:96:12.svbinttrc',
 '232:123:4.svbinttrc',
 '276:162:5.svbinttrc',
 '21:209:8.svbinttrc',
 '322:139:3.svbinttrc',
 '150:162:8.svbinttrc',
 '117:148:3.svbinttrc']

In [10]:
list(dict_dataset.keys())[0] # 0th sv file

'167:149:9.svbinttrc'

In [11]:
dict_dataset[list(dict_dataset.keys())[0]] # 0th dataset: mem_current -> pcs_next (in 0th sv trace)

(array([[0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
        [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
        [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
        ...,
        [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
        [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
        [0x14, 0x0, 0x0, ..., 0x2, 0x0, 0x0]], dtype=uint8),
 [513,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,
  520,
  521,
  522,
  523,
  524,
  525,
  526,
  527,
  528,
  515,
  517,
  519,

In [12]:
features, labels = dict_dataset[list(dict_dataset.keys())[0]]

In [13]:
features

array([[0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
       [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
       [0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
       ...,
       [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
       [0x7e, 0x0, 0x0, ..., 0x2, 0x0, 0x0],
       [0x14, 0x0, 0x0, ..., 0x2, 0x0, 0x0]], dtype=uint8)

In [14]:
labels[0:10]

[513, 515, 517, 519, 520, 521, 522, 523, 524, 525]

In [15]:
assert(len(features)==len(labels))

#### Step 1: Can we construct a net that reads in a feature vector and outputs a PC?

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim

In [21]:
features.shape[1]

65542

In [22]:
layer = nn.Linear(features.shape[1], 3)

In [23]:
features[0]

array([0x0, 0x0, 0x0, ..., 0x2, 0x0, 0x0], dtype=uint8)

numpy <-> pytorch

ndarrays <-> tensors

ndarray.sum() <-> tensor.sum()

no GPU <-> GPU

In [24]:
type(features[0])

numpy.ndarray

In [25]:
type(torch.from_numpy(features[0]))

  type(torch.from_numpy(features[0]))


torch.Tensor

In [26]:
layer(features[0]) #error

TypeError: linear(): argument 'input' (position 1) must be Tensor, not numpy.ndarray

In [27]:
layer(torch.from_numpy(features[0]).float())

tensor([0.7168, 0.6429, 1.4577], grad_fn=<AddBackward0>)

In [28]:
nn.ReLU()(layer(torch.from_numpy(features[0]).float()))

tensor([0.7168, 0.6429, 1.4577], grad_fn=<ReluBackward0>)

#### Option 1:
Multi-layer perceptron (simplest architecture)

2 intermediate layers (not input layer, not output layer)

Output: 16 bits
could also be: each choice has an implication for the output layer of the net
* 1 integer - 1 value, between 0 and 2**16 -1 
* 2 bytes - 2 values, each between 0 and 255
* 4 hex values - 4 values, each between 0 and 15
* 16 bit values - 16 sigmoid values

1 hex values:
- classification way - one output per unique value (16) - 16 probabilities (*)
- regression way - 1 output per hex - no probs - it can predict 13.5, 19, -5.3

revisit this

In [29]:
net = nn.Sequential(
    nn.Linear(features.shape[1], 2**12),
    nn.ReLU(), #rectified linear unit
    nn.Linear(2**12, 2**12),
    nn.ReLU(),
    nn.Linear(2**12, 2**10),
    nn.ReLU(),
    nn.Linear(2**10, 16),
    nn.Sigmoid()
)

In [30]:
net(torch.from_numpy(features[0]).float())

tensor([0.5472, 0.4342, 0.4739, 0.5293, 0.5150, 0.4598, 0.5138, 0.4838, 0.4699,
        0.4620, 0.5112, 0.4592, 0.4927, 0.5064, 0.5214, 0.4837],
       grad_fn=<SigmoidBackward0>)

Have a neural net that can predict 16 bits (probabilities) from each input vector

### Training

Step 1: each label has to be converted to a sequence of bits

In [31]:
def format_label(l, width=16):
    val = bin(l)[2:]
    N = len(val)
    return [0.] * (width-N) + [float(i) for i in val]

In [100]:
#step 1: get features and use net to make predictions
preds = net(torch.from_numpy(features).float())
print(preds)
print(preds.shape)

tensor([[0.4782, 0.4933, 0.5220,  ..., 0.4952, 0.5061, 0.4896],
        [0.4782, 0.4933, 0.5220,  ..., 0.4952, 0.5061, 0.4896],
        [0.4783, 0.4933, 0.5220,  ..., 0.4952, 0.5060, 0.4896],
        ...,
        [0.4874, 0.4969, 0.5334,  ..., 0.4988, 0.5154, 0.4934],
        [0.4874, 0.4969, 0.5334,  ..., 0.4988, 0.5154, 0.4934],
        [0.4796, 0.4969, 0.5294,  ..., 0.4987, 0.5077, 0.4920]],
       grad_fn=<SigmoidBackward0>)
torch.Size([1000, 16])


In [111]:
#step 2: compute the error/loss - compare preds to labels
criterion = nn.BCELoss()

labels_torch = torch.tensor([format_label(l) for l in labels])
print(labels_torch)
print(labels[10], labels_torch[10])

print('\n------')
print(preds)
loss = criterion(preds, labels_torch)
print(loss)

tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.],
        ...,
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 0.,  ..., 1., 1., 1.]])
526 tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0.])

------
tensor([[0.4782, 0.4933, 0.5220,  ..., 0.4952, 0.5061, 0.4896],
        [0.4782, 0.4933, 0.5220,  ..., 0.4952, 0.5061, 0.4896],
        [0.4783, 0.4933, 0.5220,  ..., 0.4952, 0.5060, 0.4896],
        ...,
        [0.4874, 0.4969, 0.5334,  ..., 0.4988, 0.5154, 0.4934],
        [0.4874, 0.4969, 0.5334,  ..., 0.4988, 0.5154, 0.4934],
        [0.4796, 0.4969, 0.5294,  ..., 0.4987, 0.5077, 0.4920]],
       grad_fn=<SigmoidBackward0>)
tensor(0.6780, grad_fn=<BinaryCrossEntropyBackward0>)


In [79]:
criterion = nn.BCELoss()

In [98]:
criterion(torch.tensor(1e-44), torch.tensor(1.)) #-\ln(first argument) - log likelihood

tensor(100.)

In [118]:
list(net.parameters())[1].shape

torch.Size([4096])

In [119]:
#step 3: backprop to update weights
# compute gradients/derivatives - backprop
# use gradients to update weights - gradient descent - w = w - 0.1 * deriv. loss w.r.t. w
optimizer = optim.Adam(net.parameters(), lr=1e-2)

optimizer.zero_grad() #set previous buffers to zero
loss.backward() #backprop
optimizer.step() #update weights

In [120]:
preds = net(torch.from_numpy(features).float())

In [122]:
loss = criterion(preds, labels_torch)

In [123]:
print(loss)

tensor(11.4625, grad_fn=<BinaryCrossEntropyBackward0>)


In [124]:
print(preds)
print(labels_torch)

tensor([[0., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.]], grad_fn=<SigmoidBackward0>)
tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.],
        ...,
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 0.,  ..., 1., 1., 1.]])


At this stage:

* know data can be passed through net 

Don't know:
* is the problem learnable?

* is the net architecture appropriate?

* is the data representation appropriate?

* is optimization well-haved?

In [None]:
net = nn.Sequential(
    nn.Linear(features.shape[1], 2**12),
    nn.ReLU(), #rectified linear unit
    nn.Linear(2**12, 2**12),
    nn.ReLU(),
    nn.Linear(2**12, 2**10),
    nn.ReLU(),
    nn.Linear(2**10, 16),
    nn.Sigmoid()
)

features, labels = dict_dataset[list(dict_dataset.keys())[0]]

criterion = nn.BCELoss()

optimizer = optim.Adam(net.parameters(), lr=1e-2)


'''
To do:
features, labels shouldn't be input arguments

create one giant numpy array with each row and corresponding column

-> translate this into a torch tensor
-> use a torch dataloader to randomly (uniformly) sample B rows

'''
def train(n_epochs, net, features, labels, criterion, optimizer):
    for i in range(n_epochs): #number of passes over full dataset
        #step 1: get features and use net to make predictions
        preds = net(torch.from_numpy(features).float())
        print(preds)
        print(preds.shape)
        
        #step 2: compute loss/error
        labels_torch = torch.tensor([format_label(l) for l in labels])
        print(labels_torch)
        print(labels[10], labels_torch[10])

        print('\n------')
        print(preds)
        loss = criterion(preds, labels_torch)
        print(loss)

        #step 3: backprop to update weights
        # compute gradients/derivatives - backprop
        # use gradients to update weights - gradient descent - w = w - 0.1 * deriv. loss w.r.t. w
        optimizer = optim.Adam(net.parameters(), lr=1e-2)

        optimizer.zero_grad() #set previous buffers to zero
        loss.backward() #backprop
        optimizer.step() #update weights        
        
    return net

In [None]:
def train_final(n_epochs, net, dl_train, dl_test, criterion, optimizer):
    net = net.train()
    
    for i in range(n_epochs): #number of passes over full dataset
        
        for idx, (features, labels) in dl_train: #1 pass over full dataset
        
            #step 1: get features and use net to make predictions
            preds = net(torch.from_numpy(features).float())
            print(preds)
            print(preds.shape)

            #step 2: compute loss/error
            labels_torch = torch.tensor([format_label(l) for l in labels])
            print(labels_torch)
            print(labels[10], labels_torch[10])

            print('\n------')
            print(preds)
            loss = criterion(preds, labels_torch)
            print(loss)

            #step 3: backprop to update weights
            # compute gradients/derivatives - backprop
            # use gradients to update weights - gradient descent - w = w - 0.1 * deriv. loss w.r.t. w
            optimizer.zero_grad() #set previous buffers to zero
            loss.backward() #backprop
            optimizer.step() #update weights   
            
            if counter % print_freq == 0: #TODO
                test_loss = compute_test(net, dl_test) #TODO
                print(f'Test Loss = {test_loss}')
        
    return net

### example of a dataloader

In [126]:
from torch.utils.data import Dataset, DataLoader

In [128]:
X = np.random.uniform(size=(1000, 3))
y = np.random.randint(5, size=1000)

In [146]:
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = X #self.X = torch.from_numpy(X)
        self.y = y #self.y = torch.from_numpy(np.array(y))
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        #return (self.X[idx], self.y[idx])
        return (torch.from_numpy(self.X[idx]), torch.from_numpy(np.array(self.y[idx])))
        #return (self.X[idx], self.y[idx])

In [147]:
mydata = MyDataset(X, y)

In [148]:
mydata[4]

(tensor([0.9971, 0.6187, 0.8452], dtype=torch.float64), tensor(3))

In [149]:
mydataloader = DataLoader(mydata, batch_size=32)

In [153]:
it = iter(mydataloader)

In [154]:
a,b = next(it)

In [156]:
a.shape

torch.Size([32, 3])

In [157]:
b.shape

torch.Size([32])

In [159]:
for idx, (features, labels) in enumerate(mydataloader):
    print(idx, features.shape, labels.shape)

0 torch.Size([32, 3]) torch.Size([32])
1 torch.Size([32, 3]) torch.Size([32])
2 torch.Size([32, 3]) torch.Size([32])
3 torch.Size([32, 3]) torch.Size([32])
4 torch.Size([32, 3]) torch.Size([32])
5 torch.Size([32, 3]) torch.Size([32])
6 torch.Size([32, 3]) torch.Size([32])
7 torch.Size([32, 3]) torch.Size([32])
8 torch.Size([32, 3]) torch.Size([32])
9 torch.Size([32, 3]) torch.Size([32])
10 torch.Size([32, 3]) torch.Size([32])
11 torch.Size([32, 3]) torch.Size([32])
12 torch.Size([32, 3]) torch.Size([32])
13 torch.Size([32, 3]) torch.Size([32])
14 torch.Size([32, 3]) torch.Size([32])
15 torch.Size([32, 3]) torch.Size([32])
16 torch.Size([32, 3]) torch.Size([32])
17 torch.Size([32, 3]) torch.Size([32])
18 torch.Size([32, 3]) torch.Size([32])
19 torch.Size([32, 3]) torch.Size([32])
20 torch.Size([32, 3]) torch.Size([32])
21 torch.Size([32, 3]) torch.Size([32])
22 torch.Size([32, 3]) torch.Size([32])
23 torch.Size([32, 3]) torch.Size([32])
24 torch.Size([32, 3]) torch.Size([32])
25 torch.S