In this notebook, we load our data (from files containing load sequences) and use it to train a RNN. We take the load history and turn it into sequences of size k, and use it to predict the RRPV for the last address in the sequence. We train the model using the actual RRPV, which is calculated using the full load history.

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import bisect
import math

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# load the data
batches = []
files = ("drive/MyDrive/loads.txt", "drive/MyDrive/loads_concat.txt") # files containing load sequences
for file in files:
  with open(file) as raw_data:
      curr_batch = []
      for line in raw_data:
          if line.strip():
              curr_batch.append(int(line.strip(), 16))
          else:
              batches.append(curr_batch)
              curr_batch = []

long_sequence = []
for batch in batches:
    long_sequence += batch

# 1D sequence of loads
original_input = np.array(long_sequence)
print(len(original_input))

In [None]:
# make a map from each address to a list of its occurrences
occurrence_map = {}
for i in range(len(original_input)):
  addr = original_input[i]
  if addr not in occurrence_map:
    occurrence_map[addr] = []
  occurrence_map[addr].append(i)

In [None]:
# given an address and its position, will return the next instance of the address
def find_occurrence_distance(addr, location):
  occurrences = occurrence_map[addr]
  next_occurrence_index = bisect.bisect(occurrences, location)
  if next_occurrence_index >= len(occurrences):
    return -1
  else:
    return occurrences[next_occurrence_index] - location

In [None]:
#function to normalize target values
#input: number of loads till target is loaded [1, inf]
#output: floating pt greater than eq to zero (greater value indicates sooner access)
def normalize(num_loads):
  #return (num_loads if num_loads >= 0 else 100000)
  if num_loads == -1:
    return 1/(1+math.log10(130000))
  return 1/(1+math.log10(num_loads))

In [None]:
input_seq = []
target_seq = []
target_seq_raw = []

input_engineered = []

k = 100
# parition into arrays of size k
seq_start = 0
seq_end = (seq_start + k) - 1
while (seq_end < len(original_input)):
  #pull last k loads
  load_history = original_input[seq_start:seq_end+1]
  load_addr = load_history[-1]

  # history is encoded - 1 if value == target address, 0 otherwsise
  load_history = [1 if x == load_addr else 0 for x in load_history]

  # remove values with no histrory, not useful for training
  if sum(load_history) == 1:
    seq_start += 1
    seq_end += 1
    continue
  input_seq.append(load_history)
  # use occurrence distance as target
  next_occurrence = find_occurrence_distance(load_addr, seq_end)
  if next_occurrence == -1:
      seq_start += 1
      seq_end += 1

  target_seq.append(normalize(next_occurrence))
  target_seq_raw.append(next_occurrence)
  seq_start += 1
  seq_end += 1

input_seq = np.array(input_seq)
target_seq = np.array(target_seq).reshape(len(target_seq),1)
print(target_seq)
print(target_seq.shape)

In [None]:
# use gpu
is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

In [None]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        batch_size = 50


        #Initializing hidden state for first input using method defined below
        hidden = self.init_hidden()

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden

In [None]:
# Instantiate the model with hyperparameters
model = Model(input_size=k, output_size=1, hidden_dim=10, n_layers=1)
# We'll also set the model to the device defined earlier (default is CPU)
model = model.to(device)

# Define hyperparameters
n_epochs = 10
lr=0.01

# Define Loss, Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)


In [None]:
#split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(input_seq, target_seq, random_state = 20, shuffle=False, train_size = 0.9)

X_train = torch.from_numpy(X_train)
y_train = torch.from_numpy(y_train)
X_test = torch.from_numpy(X_test)
y_test = torch.from_numpy(y_test)

X_train = X_train.to(torch.float32)
y_train = y_train.to(torch.float32)
X_test = X_test.to(torch.float32)
y_test = y_test.to(torch.float32)

X_train = torch.nn.functional.normalize(X_train)
X_test = torch.nn.functional.normalize(X_test)

In [None]:
train = TensorDataset(X_train,y_train)
test = TensorDataset(X_test,y_test)

train_loader = DataLoader(train, batch_size = 100, shuffle = False)
test_loader = DataLoader(test, batch_size = 10000, shuffle = False)

In [None]:
# Training Run
for epoch in range(1, n_epochs + 1):

    for (features, target) in train_loader:
      features = Variable(features)
      target = Variable(target)

      features = features.to(device)

      optimizer.zero_grad() # Clears existing gradients from previous epoch
      output, hidden = model(features)
      output = output.to(device)
      target = target.to(device)
      loss = criterion(output, target.view(-1, 1))
      loss.backward() # Does backpropagation and calculates gradients
      optimizer.step() # Updates the weights accordingly
    
    print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

In [None]:
#validate

for (features, target) in test_loader:
  features = Variable(features)
  target = Variable(target)

  features = features.to(device)
  outputs = model(features)

target = target.cpu().detach().numpy().reshape(len(target))
outputs = outputs[0].cpu().detach().numpy().reshape(len(target))
plt.scatter(target,outputs)

identity_line = np.linspace(max(min(target), min(outputs)),min(max(target), max(outputs)))
plt.plot(identity_line, identity_line, color="black", linewidth=1.0)

print(outputs)
print(np.corrcoef(target, outputs))

In [None]:
# saves model to drive file that is used by cache module for prediction
torch.save(model.state_dict(), "drive/MyDrive/model")