In [6]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from collections import defaultdict

In [23]:
with open("test1.out") as f:
    addrs = f.readlines()
hits, addrs = zip(*(l.rstrip().split(' ') for l in addrs))
hits = [int(hit) for hit in hits]
addrs = [int(addr, 16) for addr in addrs]

In [24]:
hits[:10], addrs[:10]

([0, 0, 0, 0, 3, 3, 0, 3, 0, 3],
 [536870744,
  536870752,
  805385384,
  805385392,
  805385376,
  805385400,
  536870728,
  536870720,
  536870712,
  536870704])

In [112]:
# y (target sequence) is binary hit
y = np.asarray(hits)
y = (y > 0).astype(np.int_)
# standard normalization to put vectors in the same range
#y = (y - y.mean()) / (y.std())
# X (features) is address (minus the lowest seen)
# TODO: ideally we'd want PCs or to separate address space
X = np.asarray(addrs)
# normalize X
X = (X - X.mean()) / (X.std())

In [113]:
X[:10]

array([-1.05364059, -1.05364053,  0.94539084,  0.9453909 ,  0.94539079,
        0.94539096, -1.05364071, -1.05364077, -1.05364083, -1.05364089])

In [114]:
y[:10]

array([0, 0, 0, 0, 1, 1, 0, 1, 0, 1])

In [115]:
y[16]

np.int64(0)

In [116]:
def build_seqs(X, y, n):
    assert len(X) == len(y)
    xs = []
    ys = []
    for i in range(len(X)-n):
        x_sample = X[i:(i+n)]
        y_sample = y[i+n]
        xs.append(x_sample)
        ys.append(y_sample)
    return np.array(xs), np.array(ys)

In [117]:
Xs, ys = build_seqs(X, y, 15)

In [118]:
Xs[0]

array([-1.05364059, -1.05364053,  0.94539084,  0.9453909 ,  0.94539079,
        0.94539096, -1.05364071, -1.05364077, -1.05364083, -1.05364089,
        0.9453859 , -1.05364089,  0.94514165,  0.94514207,  0.94514249])

In [119]:
ys[0]

np.int64(0)

In [120]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [121]:
trainX = torch.tensor(Xs[:, :, None], dtype=torch.float32).to(device)
trainY = torch.tensor(ys[:, None], dtype=torch.float32).to(device)

In [122]:
trainX

tensor([[[-1.0536],
         [-1.0536],
         [ 0.9454],
         ...,
         [ 0.9451],
         [ 0.9451],
         [ 0.9451]],

        [[-1.0536],
         [ 0.9454],
         [ 0.9454],
         ...,
         [ 0.9451],
         [ 0.9451],
         [-1.0536]],

        [[ 0.9454],
         [ 0.9454],
         [ 0.9454],
         ...,
         [ 0.9451],
         [-1.0536],
         [-1.0536]],

        ...,

        [[-1.0537],
         [ 0.9454],
         [-1.0537],
         ...,
         [-1.0537],
         [-1.0537],
         [-1.0537]],

        [[ 0.9454],
         [-1.0537],
         [ 0.9450],
         ...,
         [-1.0537],
         [-1.0537],
         [-1.0537]],

        [[-1.0537],
         [ 0.9450],
         [ 0.9450],
         ...,
         [-1.0537],
         [-1.0537],
         [ 0.9454]]], device='cuda:0')

In [123]:
trainY

tensor([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]], device='cuda:0')

In [124]:
trainX.shape

torch.Size([4999985, 15, 1])

In [125]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, h0=None, c0=None):
        if h0 is None or c0 is None:
            h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
            c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
        
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out, hn, cn

In [126]:
model = LSTMModel(input_dim=1, hidden_dim=100, layer_dim=1, output_dim=1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [127]:
batch_size = 16384
dataset = TensorDataset(trainX, trainY)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [None]:
num_epochs = 10
h0, c0 = None, None

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    for i, batch in enumerate(dataloader):
        x_batch, y_batch = batch
        outputs, h0, c0 = model(x_batch, h0, c0)

        accuracy = ((outputs > 0.5) == y_batch).sum() / x_batch.shape[0]
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
        h0 = h0.detach()
        c0 = c0.detach()
        if i % 10 == 0:
            print(f"Epoch {epoch}, Batch {i}, Batch Loss: {loss.item():.4f}, Batch Accuracy {accuracy}")
    
    #print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch 0, Batch 0, Batch Loss: 2.1020, Batch Accuracy 0.95220947265625
Epoch 0, Batch 10, Batch Loss: 1.6311, Batch Accuracy 0.9525146484375
Epoch 0, Batch 20, Batch Loss: 0.9525, Batch Accuracy 0.952392578125
Epoch 0, Batch 30, Batch Loss: 0.2766, Batch Accuracy 0.94952392578125
Epoch 0, Batch 40, Batch Loss: 0.0541, Batch Accuracy 0.9522705078125
Epoch 0, Batch 50, Batch Loss: 0.4869, Batch Accuracy 0.04791259765625
Epoch 0, Batch 60, Batch Loss: 1.4118, Batch Accuracy 0.051025390625
Epoch 0, Batch 70, Batch Loss: 2.4502, Batch Accuracy 0.0484619140625
Epoch 0, Batch 80, Batch Loss: 3.0851, Batch Accuracy 0.04656982421875
Epoch 0, Batch 90, Batch Loss: 2.9042, Batch Accuracy 0.04669189453125
Epoch 0, Batch 100, Batch Loss: 1.9665, Batch Accuracy 0.04510498046875
Epoch 0, Batch 110, Batch Loss: 0.8310, Batch Accuracy 0.04681396484375
Epoch 0, Batch 120, Batch Loss: 0.1280, Batch Accuracy 0.95355224609375
Epoch 0, Batch 130, Batch Loss: 0.1571, Batch Accuracy 0.95086669921875
Epoch 0, B