In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from collections import defaultdict

In [2]:
with open("mem_stream.1") as f:
    addrs = f.readlines()
pairs = [[int(c, 16) for c in l.rstrip().split('\t')] for l in addrs]

In [3]:
pairs

[[1, 536870744],
 [0, 536870752],
 [1, 805385384],
 [1, 805385392],
 [1, 805385376],
 [1, 805385400],
 [1, 536870728],
 [1, 536870720],
 [1, 536870712],
 [1, 536870704],
 [0, 805384720],
 [0, 536870704],
 [1, 805351912],
 [1, 805351968],
 [1, 805352024],
 [1, 536870688],
 [1, 536870648],
 [1, 536870656],
 [1, 536870664],
 [1, 536870672],
 [0, 536870640],
 [1, 536870640],
 [0, 805384708],
 [0, 805384704],
 [0, 805384704],
 [1, 805384704],
 [1, 536870616],
 [1, 536870624],
 [1, 536870608],
 [0, 536870608],
 [1, 536870584],
 [1, 536870592],
 [1, 536870600],
 [1, 536870576],
 [0, 805384728],
 [0, 536870576],
 [0, 536870560],
 [1, 536870560],
 [0, 805384568],
 [1, 805384568],
 [0, 536870560],
 [1, 536870536],
 [1, 536870544],
 [1, 536870552],
 [1, 536870528],
 [1, 536870504],
 [1, 536870512],
 [1, 536870496],
 [0, 536870528],
 [1, 536870488],
 [1, 536870496],
 [1, 536870504],
 [1, 536870512],
 [1, 536870520],
 [0, 536870480],
 [1, 536870480],
 [0, 536870432],
 [1, 536870440],
 [1, 536870448

In [4]:
aggregated = defaultdict(list)
for miss, addr in pairs:
    aggregated[addr].append(miss)

In [5]:
aggregated

defaultdict(list,
            {536870744: [1],
             536870752: [0],
             805385384: [1, 0],
             805385392: [1],
             805385376: [1],
             805385400: [1],
             536870728: [1],
             536870720: [1],
             536870712: [1],
             536870704: [1, 0],
             805384720: [0, 1],
             805351912: [1],
             805351968: [1],
             805352024: [1],
             536870688: [1, 1],
             536870648: [1, 1],
             536870656: [1, 1],
             536870664: [1, 1],
             536870672: [1, 1],
             536870640: [0, 1, 0, 0, 0, 1],
             805384708: [0, 0],
             805384704: [0, 0, 1, 0, 0, 1],
             536870616: [1, 1],
             536870624: [1, 1],
             536870608: [1, 0, 0, 0, 1, 0, 0, 0],
             536870584: [1, 1],
             536870592: [1, 1],
             536870600: [1, 1],
             536870576: [1, 0, 1, 0],
             805384728: [0,
           

In [6]:
y, X = zip(*pairs)

In [7]:
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data)-seq_length):
        x = data[i:(i+seq_length)]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [8]:
Xs, ys = create_sequences(X, 10)

In [9]:
ys

array([805384720, 536870704, 805351912, ..., 536869072, 805385440,
       536869084], shape=(4999990,))

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [11]:
trainX = torch.tensor(Xs[:, :, None], dtype=torch.float32).to(device)
trainY = torch.tensor(ys[:, None], dtype=torch.float32).to(device)

In [12]:
trainY

tensor([[8.0538e+08],
        [5.3687e+08],
        [8.0535e+08],
        ...,
        [5.3687e+08],
        [8.0539e+08],
        [5.3687e+08]], device='cuda:0')

In [13]:
trainX.shape

torch.Size([4999990, 10, 1])

In [14]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, h0=None, c0=None):
        if h0 is None or c0 is None:
            h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
            c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
        
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out, hn, cn

In [15]:
model = LSTMModel(input_dim=1, hidden_dim=100, layer_dim=1, output_dim=1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [16]:
batch_size = 2048
dataset = TensorDataset(trainX, trainY)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [17]:
num_epochs = 100
h0, c0 = None, None

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    for i, batch in enumerate(dataloader):
        x_batch, y_batch = batch
        outputs, h0, c0 = model(x_batch, h0, c0)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
        h0 = h0.detach()
        c0 = c0.detach()
        if i % 1000 == 0:
            print(f"Epoch {epoch}, Batch {i}, Loss: {loss.item():.4f}")
    
    #print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch 0, Batch 0, Loss: 572127095942021120.0000
Epoch 0, Batch 1000, Loss: 430885756383264768.0000
Epoch 0, Batch 2000, Loss: 430006147081043968.0000


RuntimeError: Expected hidden[0] size (1, 822, 100), got [1, 2048, 100]