In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data
import matplotlib.pyplot as plt
import altair as alt
import math
import time
import os
from torch.utils.data.dataset import Dataset
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
from google.colab import drive
if not os.path.exists("/content/drive"):
  drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ALTAIR SAVE
!pip install altair_saver
!apt-get install chromium-chromedriver
import altair_saver

plotsPath = "/content/drive/MyDrive/U/4t/CBI/sketchModelling/images/"

Collecting altair_saver
[?25l  Downloading https://files.pythonhosted.org/packages/71/d6/2c2f1b5aa281557674eb85505d75a937a9edd6ef033b463b0d8693ed3839/altair_saver-0.5.0-py3-none-any.whl (89kB)
[K     |████████████████████████████████| 92kB 3.6MB/s 
[?25hCollecting selenium
[?25l  Downloading https://files.pythonhosted.org/packages/80/d6/4294f0b4bce4de0abf13e17190289f9d0613b0a44e5dd6a7f5ca98459853/selenium-3.141.0-py2.py3-none-any.whl (904kB)
[K     |████████████████████████████████| 911kB 7.1MB/s 
Collecting altair-viewer
[?25l  Downloading https://files.pythonhosted.org/packages/04/a4/c3ddcd67e7929109f40b4a6d8afc56f358fc3231569ff22207e8befc8912/altair_viewer-0.3.0-py3-none-any.whl (562kB)
[K     |████████████████████████████████| 563kB 13.8MB/s 
[?25hCollecting altair-data-server>=0.4.0
  Downloading https://files.pythonhosted.org/packages/e7/a3/0e7651adce146c17eea516ffcb530f7ee769671e59395bc10838eca827db/altair_data_server-0.4.1-py3-none-any.whl
Installing collected packages:

In [None]:
# !pip install -i https://test.pypi.org/simple/ skcm==0.0.1
# from sktm.sketches import EH

In [None]:
torch.manual_seed(888)

<torch._C.Generator at 0x7efdf7c50b58>

In [None]:
class electricityDataSet(Dataset):
  def __init__(self, pdDataFrame):
    # assumes target is last columns
    self.data = pdDataFrame.loc[:, pdDataFrame.columns != 'class'].to_numpy()
    # for each element, label will be [1, 0] if element has class 0, [0, 1]
    # if it has class 1
    self.labels = np.array([[1, 0] if label == 0 else [0, 1] for label in pdDataFrame['class'].values])

  def __len__(self):
      return len(self.data)
  
  def __getitem__(self, index):
      return torch.FloatTensor(self.data[index]), torch.FloatTensor(self.labels[index])

In [None]:
elec = pd.read_csv('/content/drive/MyDrive/U/4t/CBI/sketchModelling/data/rawStreams/electricity/electricity.csv')
elec['class'] = elec['class'].apply(lambda t: 1 if t == 'UP' else 0)

# let us use only the price in nsw and victoria
# elec = elec.loc[:,['nswprice', 'vicprice', 'class']]

In [None]:
trainPct = 0.7
valPct = 0.15
# split into test and train, having in mind that validation and test is the future
elecTrain = elec.iloc[:round(trainPct*len(elec)),:]
elecVal = elec.iloc[round(trainPct*len(elec)):round((trainPct+valPct)*len(elec)),:]
elecTest = elec.iloc[round((trainPct+valPct)*len(elec)):,:]

In [None]:
# Let's define some hyper-parameters
hparams = {
    'batch_size':32,
    'num_epochs':10,
    'test_batch_size':32,
    'hidden_size':4,
    'num_classes':2,
    'num_inputs':784,
    'learning_rate':1e-3,
    'log_interval':100,
    'input_size':8
}

# we select to work on GPU if it is available in the machine, otherwise
# will run on CPU
hparams['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# initialize DataLoaders
trainLoaderElec = torch.utils.data.DataLoader(electricityDataSet(elecTrain), batch_size=hparams['batch_size'], shuffle=False)
valLoaderElec = torch.utils.data.DataLoader(electricityDataSet(elecVal), batch_size=hparams['batch_size'], shuffle=False)
testLoaderElec = torch.utils.data.DataLoader(electricityDataSet(elecTest), batch_size=hparams['batch_size'], shuffle=False)

In [None]:
class RNN(nn.Module):
    """
    The RNN model will be a RNN followed by a linear layer,
    i.e. a fully-connected layer
    """
    def __init__(self, seq_len, num_classes, input_size, hidden_size, num_layers):
        super().__init__()
        self.seq_len = seq_len
        self.num_layers = num_layers
        self.input_size = input_size
        self.num_classes = num_classes
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, num_classes)
        self.Softmax = nn.Softmax(dim=1)

    def forward(self, x):
        batch_size = x.size()[0]
        # assuming batch_first = True for RNN cells
        hidden = self._init_hidden(batch_size)
        hidden = hidden.to(hparams['device'])
        x = x.view(batch_size, self.seq_len, self.input_size)
        
        # apart from the output, rnn also gives us the hidden
        # cell, this gives us the opportunity to pass it to
        # the next cell if needed; we won't be needing it here
        # because the nn.RNN already computed all the time steps
        # for us. rnn_out will of size [batch_size, seq_len, hidden_size]
        rnn_out, _ = self.rnn(x, hidden)
        linear_out = self.linear(rnn_out.view(-1, self.hidden_size))
        return self.Softmax(linear_out)

    def _init_hidden(self, batch_size):
        """
        Initialize hidden cell states, assuming
        batch_first = True for RNN cells
        """
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)

In [None]:
def get_nn_nparams(net):
  """ https://discuss.pytorch.org/t/how-do-i-check-the-number-of-parameters-of-a-model/4325/6 """
  pp=0
  for p in list(net.parameters()):
      nn=1
      for s in list(p.size()):
          nn = nn*s
      pp += nn
  return pp


# (1) obtain the maximum for each predicted element in the batch to get the class (it is the maximum index of the num_classes array per batch sample) (look at torch.argmax in the PyTorch documentation)
# (2) compare the predicted class index with the index in its corresponding neighbor within label_batch 
# (3) sum up the number of affirmative comparisons and return the summation
def correct_predictions(predicted_batch, label_batch):
  return (torch.Tensor([True 
                       if pred.argmax() == labels.argmax() 
                       else False 
                       for pred, labels 
                       in zip(predicted_batch, label_batch)]
                      ).sum()
                       .item())

In [None]:
def train_epoch(train_loader, network, optimizer, loss_fn, hparams, epoch):

  # Activate the train=True flag inside the model
  network.train()
  
  device = hparams['device']
  avg_loss = None
  avg_weight = 0.1
  acc = 0

  # For each batch
  for batch_idx, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()

      output = network(data)
      
      loss = loss_fn(output, target)
      
      loss.backward()
      
      if avg_loss:
        avg_loss = avg_weight * loss.item() + (1 - avg_weight) * avg_loss
      else:
        avg_loss = loss.item()

      optimizer.step()

      # compute number of correct predictions in the batch
      acc += correct_predictions(output, target)

      if batch_idx % hparams['log_interval'] == 0:
          print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
              epoch, batch_idx * len(data), len(train_loader.dataset),
              100. * batch_idx / len(train_loader), loss.item()))
      # Average accuracy across all correct predictions batches
  train_acc = 100. * acc / len(train_loader.dataset)
  print('Train accuracy: {:.6f}'.format(train_acc))

  return avg_loss, train_acc

In [None]:
def val_epoch(val_loader, network, hparams, loss_fn):

    # Dectivate the train=True flag inside the model
    network.eval()
    
    device = hparams['device']
    val_loss = 0
    acc = 0
    with torch.no_grad():
        for data, target in val_loader:

            # Load data and feed it through the neural network
            data, target = data.to(device), target.to(device)
            data = data.view(data.shape[0], -1)
            output = network(data)

            val_loss += loss_fn(output, target, reduction='sum').item() # sum up batch loss
            # WARNING: If you are using older Torch versions, the previous call may need to be replaced by
            # test_loss += loss_fn(output, target, size_average=False).item()

            # compute number of correct predictions in the batch
            acc += correct_predictions(output, target)

    # Average accuracy across all correct predictions batches now
    val_loss /= len(val_loader.dataset)
    val_acc = 100. * acc / len(val_loader.dataset)
    print('\nVal set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, acc, len(val_loader.dataset), val_acc,
        ))
    return val_loss, val_acc

In [None]:
def model_experiment(model, hparams, modelPath, experimentName, seed, train_function, 
                     val_function, save_models=False, trainLoader = trainLoaderElec, 
                     valLoader = valLoaderElec, testLoader = testLoaderElec):
  
  "Binary classification"

  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)

  model = model.to(hparams['device'])

  optimizer = optim.RMSprop(model.parameters(), lr = hparams['learning_rate'])
  loss_fn = F.binary_cross_entropy

  print(model)
  print('Num params: ', get_nn_nparams(model))

  # Init lists to save the evolution of the training & test losses/accuracy.
  train_losses = []
  val_losses = []
  val_accs = []
  best_val_loss = np.inf
  best_val_acc = -np.inf

  total_time = 0

  # For each epoch
  for epoch in range(1, hparams['num_epochs'] + 1):
    startTime = time.time()
    # Compute & save the average training loss for the current epoch
    train_loss = train_function(trainLoader, model, optimizer, loss_fn, hparams, epoch)
    endTime = time.time()
    train_losses.append(train_loss)

    total_time += (endTime - startTime)

    # TIP: Review the functions previously defined to implement the train/test epochs 
    val_loss, val_accuracy = val_function(valLoader, model, hparams, loss_fn)
    val_losses.append(val_loss)
    val_accs.append(val_accuracy)

    # save the model weights
    if val_accuracy > best_val_acc:
      if save_models:
        checkpoint = {'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}
        torch.save(checkpoint, modelPath + experimentName + '.pth')
      best_val_loss = val_loss
      best_val_acc = val_accuracy
      best_model = model

  print("Test accuracy of best model:")

  # use best checkpoint based on val accuracy to compute test accuracy
  test_loss, test_accuracy = val_function(testLoader, best_model, hparams, loss_fn)

  avg_training_time = total_time / hparams['num_epochs']
        
  return best_val_acc, test_accuracy, avg_training_time

## Exponential Histogram

In [None]:
class EHRNN(nn.Module):
    """ Keeps track of the mean of a pooled version of the hidden states across times. """

    def __init__(self, seq_len, num_classes, input_size, hidden_size, 
                 num_layers, EHeps, EHlengths, useMean=True, 
                 useVariance=False, inputToLinear='all'):
        # todo try batchnorm in hidden states

        super().__init__()
        self.seq_len = seq_len
        self.num_layers = num_layers
        self.input_size = input_size
        self.num_classes = num_classes
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.Softmax = nn.Softmax(dim=1)
        self.EHeps = EHeps
        self.EHlengths = EHlengths
        self.useVariance = useVariance
        self.useMean = useMean
        self.inputToLinear = inputToLinear

        # each hidden size will be reduced to size sqrt(hidden_size). Then,
        # each element in it will go to an EH.
        self.avgKernelSize = int(np.floor(np.sqrt(hidden_size)))
        # from https://pytorch.org/docs/stable/generated/torch.nn.AvgPool1d.html
        self.numEHs = math.floor(((self.hidden_size - self.avgKernelSize) / self.avgKernelSize) + 1)
        self.avgPool = nn.AvgPool1d(kernel_size=self.avgKernelSize)

        if not useMean and not useVariance:
          raise Exception("At least one type of estimate must be used.")
        else:
          self.numberOfEstimates = 2 if useMean and useVariance else 1

        # EHs[i][j] is EH over pooled element i of some length EHlengths[j]
        self.EHs = [[VarEH(len, eps=EHeps, maxValue=1) for len in EHlengths] for _ in range(self.numEHs)]

        # linear: its input size depends on hidden size, how many EH we maintain
        # and how many estimates we query
        if self.inputToLinear == 'all':
          self.linear = nn.Linear(hidden_size + self.numEHs * len(EHlengths) * self.numberOfEstimates, num_classes)
        elif self.inputToLinear == 'estimates':
          self.linear = nn.Linear(self.numEHs * len(EHlengths) * self.numberOfEstimates, num_classes)
        else:
          raise Exception("Input to linear must be either 'all' or 'estimates'")

    def forward(self, x):
        linearInput = self.hidden_states(x)
        linear_out = self.linear(linearInput)
        return self.Softmax(linear_out)

    def hidden_states(self, x):
        batch_size = x.size()[0]
        # assuming batch_first = True for RNN cells
        hidden = self._init_hidden(batch_size)
        hidden = hidden.to(hparams['device'])
        x = x.view(batch_size, self.seq_len, self.input_size)
        
        # apart from the output, rnn also gives us the hidden
        # cell, this gives us the opportunity to pass it to
        # the next cell if needed; we won't be needing it here
        # because the nn.RNN already computed all the time steps
        # for us. rnn_out will of size [batch_size, seq_len, hidden_size]
        # rnn_out: B x 1 x H
        rnn_out, _ = self.rnn(x, hidden)

        # add hidden states to EHs, getting the mean each time so as to not have
        # future hidden states.
        allMeans = torch.tensor([])
        rnn_pooled = torch.squeeze(self.avgPool(rnn_out)) # B x numEH
        for pointIndex, point in enumerate(rnn_pooled):
          for i, element in enumerate(point):
            for j in range(len(self.EHs[i])):
              self.EHs[i][j].add(element.item())

          # get the estimates at this point so as to not look into the future
          if self.useVariance and self.useMean:
            estimates = []
            for i in range(self.numEHs):
              for j in range(len(self.EHs[i])):
                estimates.append(self.EHs[i][j].get_mean_estimate())
                estimates.append(self.EHs[i][j].get_var_estimate())
          elif self.useMean:
            estimates = [self.EHs[i][j].get_mean_estimate() for i in range(self.numEHs) for j in range(len(self.EHs[i]))]
          elif self.useVariance:
            estimates = [self.EHs[i][j].get_var_estimate() for i in range(self.numEHs) for j in range(len(self.EHs[i]))]

          allMeans = torch.cat((allMeans, torch.tensor(estimates)))

        
        allMeans = allMeans.to(hparams['device'])

        if self.inputToLinear == 'all':
          # rnn_out: B x (H + self.numEHs * len(EHlengths) * self.numberOfEstimates)
          linearInput = torch.cat((torch.squeeze(rnn_out), allMeans.view(batch_size, self.numEHs * len(self.EHlengths) * self.numberOfEstimates)), 1)
          linearInput = linearInput.view(batch_size, self.hidden_size + self.numEHs * len(self.EHlengths) * self.numberOfEstimates)
        else:
          # rnn_out: B x (self.numEHs * len(EHlengths) * self.numberOfEstimates)
          linearInput = allMeans.view(batch_size, self.numEHs * len(self.EHlengths) * self.numberOfEstimates)

        return linearInput

    def _init_hidden(self, batch_size):
        """
        Initialize hidden cell states, assuming
        batch_first = True for RNN cells
        """
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)

In [None]:
def EH_train_epoch(train_loader, network, optimizer, loss_fn, hparams, epoch, taskType='classification'):
  # custom training with resetting of EHs in each epoch.

  # Activate the train=True flag inside the model
  network.train()

  # reset EHs
  network.EHs = [[VarEH(len, eps=network.EHeps, maxValue=1) for len in network.EHlengths] for _ in range(network.numEHs)]
  
  device = hparams['device']
  avg_loss = None
  avg_weight = 0.1
  acc = 0

  mse = 0

  # For each batch
  for batch_idx, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()

      output = network(data)
      
      loss = loss_fn(output, target)
      
      loss.backward()
      
      if avg_loss:
        avg_loss = avg_weight * loss.item() + (1 - avg_weight) * avg_loss
      else:
        avg_loss = loss.item()

      if taskType == 'classification':
        # compute number of correct predictions in the batch
        acc += correct_predictions(output, target)

      optimizer.step()

      if batch_idx % hparams['log_interval'] == 0:
          print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
              epoch, batch_idx * len(data), len(train_loader.dataset),
              100. * batch_idx / len(train_loader), loss.item()), )
          
  # Average accuracy across all correct predictions batches
  train_acc = 100. * acc / len(train_loader.dataset)
  print('Train accuracy: {:.6f}'.format(train_acc))

  return avg_loss, train_acc

In [None]:
def EH_val_epoch(val_loader, network, hparams, loss_fn):
    # custom training with resetting of EHs in each epoch.

    # Deactivate the train=True flag inside the model
    network.eval()

    # reset EHs
    network.EHs = [[VarEH(len, eps=network.EHeps, maxValue=1) for len in network.EHlengths] for _ in range(network.numEHs)]
    
    device = hparams['device']
    val_loss = 0
    acc = 0
    with torch.no_grad():
        for data, target in val_loader:

            # Load data and feed it through the neural network
            data, target = data.to(device), target.to(device)
            data = data.view(data.shape[0], -1)
            output = network(data)

            val_loss += loss_fn(output, target, reduction='sum').item() # sum up batch loss
            # WARNING: If you are using older Torch versions, the previous call may need to be replaced by
            # val_loss += loss_fn(output, target, size_average=False).item()

            # compute number of correct predictions in the batch
            acc += correct_predictions(output, target)

    # Average accuracy across all correct predictions batches now
    val_loss /= len(val_loader.dataset)
    val_acc = 100. * acc / len(val_loader.dataset)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, acc, len(val_loader.dataset), val_acc,
        ))
    return val_loss, val_acc

### Experiments for EHRNN Performance over various datasets

To change datasets simply overwrite trainLoader, valLoader and testLoader

### ELECTRICITY

In [None]:
hiddenSizes = [16, 32, 64]
batchSizes = [32, 64]
# [mean, var]
summaryToUse = [[0,1], [1,0], [1,1]]

# first row are short term, second long term, third short and long term.
# fourth row are extras (individuals and combo with 48)
EHLengths = [[4], [4, 8], [4, 8, 16], [4, 8, 16, 32], [4, 8, 16, 32, 64],
             [32], [32, 64], [32, 64, 128], [32, 64, 128, 256], [32, 64, 128, 256, 512],
             [128], [4, 128], [4, 16, 128], [4, 16, 32, 128], [4, 16, 32, 128, 256],
             [32, 48, 128], [64], [8], [16], [256], [48]]

inputsToLinear = ['all', 'estimates']

hparams['learning_rate'] = 0.01

resultsName = 'optiResScaledVar.csv'
experimentName = 'EHRNN_ScaledVar'

###########################################################################

hiddenSizes = [4, 8, 16, 32, 64, 128]
batchSizes = [32]
# [mean, var]
summaryToUse = [[1,1]]

##########################################################################
hiddenSizes = [32]
batchSizes = [32]
# [mean, var]
summaryToUse = [[1,0], [0,1]]

# first row are short term, second long term, third short and long term.
# fourth row are extras (individuals and combo with 48)
EHLengths = [[4], [8], [16], [32], [48], [64], [128], [256]]

inputsToLinear = ['all']

hparams['learning_rate'] = 0.01

# We are first going to optimize using both the mean and the variance
# and then, with the best model we are going to find the performance of just
# using the mean or the variance or just using the estimates as input to the 
# fully connected layer

resultsName = 'optiResScaledVar_MeanVar.csv'



In [None]:
# We fix learning rate after some experiments that determine that overall, for
# all other parameters fixed, a starting lr = 0.01 performs good.
# Optimizer is always RMSProp.

hiddenSizes = [32]
batchSizes = [32]
# [mean, var]
summaryToUse = [[1,1]]

# first row are short term, second long term, third short and long term.
# fourth row are extras (individuals and combo with 48)
EHLengths = [[48]]

inputsToLinear = ['all']

hparams['learning_rate'] = 0.01

# We are first going to optimize using both the mean and the variance
# and then, with the best model we are going to find the performance of just
# using the mean or the variance or just using the estimates as input to the 
# fully connected layer

resultsName = 'optiResScaledVar_epsilons.csv'
experimentName = 'EHRNN_ScaledVar'

modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/' + experimentName + '/'

try:
  # try reading previous results
  results = pd.read_csv(modelPath+resultsName, index_col=0)
  dfColumns = results.columns
except:
  # or generate new results if none existed before
  dfColumns = ['hiddenSize', 'batchSize', 'EHLengths', 'NEpochs', 'EHeps', 'lr', 
               'summarization', 'inputToLinear', 'nParams', 'Best Val Accuracy', 
               'Test Accuracy', 'avgEpochTime' , 'experimentID']
  results = pd.DataFrame(columns=dfColumns)

i = 168
usingMean = True
usingVar = True
saveModels = False
epsilons=[0.01, 0.3, 0.05, 0.1, 0.2, 0.4, 0.6]

hparams['num_epochs'] = 15

for hiddenSize in hiddenSizes:
  for batchSize in batchSizes:
    # Reload electricity with different batch sizes
    trainLoader = torch.utils.data.DataLoader(electricityDataSet(elecTrain), 
                                              batch_size=batchSize, 
                                              shuffle=False)
    valLoader = torch.utils.data.DataLoader(electricityDataSet(elecVal), 
                                            batch_size=batchSize, 
                                            shuffle=False)
    testLoader = torch.utils.data.DataLoader(electricityDataSet(elecTest), 
                                             batch_size=batchSize, 
                                             shuffle=False)
    for EHLength in EHLengths:
      for inputToLinear in inputsToLinear:
        for summaries in summaryToUse:
          for eps in epsilons:

            usingMean = summaries[0]
            usingVar = summaries[1]
            if usingMean and usingVar:
              summarisation = 'VarAndMean'
            elif usingMean:
              summarisation = 'Mean'
            elif usingVar:
              summarisation = 'Var'

            print(hiddenSize, batchSize, str(EHLength), hparams['num_epochs'],
                    eps, hparams['learning_rate'], summarisation, inputToLinear)

            i = i + 1
            experimentID = experimentName + str(i)
            hparams['hidden_size'] = hiddenSize
            hparams['batch_size'] = batchSize

            ehrnn = EHRNN(seq_len=1, 
                          num_classes=hparams['num_classes'], 
                          input_size=hparams['input_size'], 
                          hidden_size=hparams['hidden_size'], 
                          num_layers=1,
                          EHeps=eps,
                          EHlengths=EHLength,
                          useMean=usingMean,
                          useVariance=usingVar,
                          inputToLinear=inputToLinear)
            
            nparams = get_nn_nparams(ehrnn)
            
            data = [hiddenSize, batchSize, str(EHLength), hparams['num_epochs'],
                    eps, hparams['learning_rate'], summarisation, inputToLinear,
                    nparams]

            bestValidationAcc, testAcc, avgEpochTrainingTime = model_experiment(ehrnn, hparams, modelPath, 
                                                experimentID, 888, EH_train_epoch, 
                                                EH_val_epoch, saveModels)

            data.append(bestValidationAcc)
            data.append(testAcc)
            data.append(avgEpochTrainingTime)
            data.append(experimentID)

            dataDf = pd.DataFrame([data],columns=dfColumns)
            results = pd.concat([results, dataDf])
            # save partial results in case of failure
            results.to_csv(modelPath+resultsName)
      
# TODO EHRNN vs VanillaRNN in terms of time, memory and accuracy
# (mirar mejores resultados de 'optiResScaledVar') y repetir con esas config
# para vanillaRNN (quizas diferentes configs de EHRNN para tener en cuenta mas o menos EHs y su tamaño)

32 32 [48] 15 0.01 0.01 VarAndMean all
EHRNN(
  (rnn): RNN(8, 32, batch_first=True)
  (Softmax): Softmax(dim=1)
  (avgPool): AvgPool1d(kernel_size=(5,), stride=(5,), padding=(0,))
  (linear): Linear(in_features=44, out_features=2, bias=True)
)
Num params:  1434
Train accuracy: 69.547260

Validation set: Average loss: 1.1546, Accuracy: 4687.0/6797 (69%)

Train accuracy: 76.994136

Validation set: Average loss: 1.0533, Accuracy: 4984.0/6797 (73%)

Train accuracy: 80.364462

Validation set: Average loss: 0.9491, Accuracy: 5225.0/6797 (77%)

Train accuracy: 83.242954

Validation set: Average loss: 1.1445, Accuracy: 4897.0/6797 (72%)

Train accuracy: 84.447317

Validation set: Average loss: 1.0355, Accuracy: 5112.0/6797 (75%)

Train accuracy: 85.598083

Validation set: Average loss: 1.0473, Accuracy: 5155.0/6797 (76%)

Train accuracy: 86.430418

Validation set: Average loss: 0.7872, Accuracy: 5547.0/6797 (82%)

Train accuracy: 87.272211

Validation set: Average loss: 0.8528, Accuracy: 5456.

### Experiments for comparison of EHRNN vs vanilla RNN

In [None]:
# INPUt TO NETWORK IS PLAIN ELECTRICITY NORMALIZED

hiddenSizes = [4, 8, 16, 32, 64, 128]
batchSizes = [32]
learningRates = [0.01]
hparams['num_epochs'] = 15

# just using the estimates as input to the fully connected layer

resultsName = 'optiResVanillaRNN_pureInput.csv'
experimentName = 'vanillaRNN_pure_input'

modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/' + experimentName + '/'

try:
  # try reading previous results
  results = pd.read_csv(modelPath+resultsName, index_col=0)
  dfColumns = results.columns
except:
  # or generate new results if none existed before
  dfColumns = ['hiddenSize', 'batchSize', 'NEpochs', 'lr', 'nParams', 'Best Val Accuracy', 
               'Test Accuracy', 'avgEpochTime' , 'experimentID']
  results = pd.DataFrame(columns=dfColumns)

i = 0
saveModels = False

for batchSize in batchSizes:
  trainLoader = torch.utils.data.DataLoader(electricityDataSet(elecTrain), 
                                            batch_size=batchSize, 
                                            shuffle=False)
  valLoader = torch.utils.data.DataLoader(electricityDataSet(elecVal), 
                                          batch_size=batchSize, 
                                          shuffle=False)
  testLoader = torch.utils.data.DataLoader(electricityDataSet(elecTest), 
                                            batch_size=batchSize, 
                                            shuffle=False)
  for hiddenSize in hiddenSizes:    
    for learningRate in learningRates:

        i = i + 1
        experimentID = experimentName + str(i)
        hparams['learning_rate'] = learningRate
        hparams['hidden_size'] = hiddenSize
        hparams['batch_size'] = batchSize

        print(hiddenSize, batchSize, hparams['num_epochs'],
              hparams['learning_rate'])

        rnn = RNN(seq_len=1, 
                      num_classes=hparams['num_classes'], 
                      input_size=hparams['input_size'], 
                      hidden_size=hparams['hidden_size'], 
                      num_layers=1)
        
        nparams = get_nn_nparams(rnn)
        
        data = [hiddenSize, batchSize, hparams['num_epochs'],
                hparams['learning_rate'], nparams]

        bestValidationAcc, testAcc, avgEpochTrainingTime = model_experiment(rnn, hparams, modelPath, 
                                                                            experimentID, 888, train_epoch, 
                                                                            val_epoch, saveModels)

        data.append(bestValidationAcc)
        data.append(testAcc)
        data.append(avgEpochTrainingTime)
        data.append(experimentID)

        dataDf = pd.DataFrame([data],columns=dfColumns)
        results = pd.concat([results, dataDf])
        # save partial results in case of failure
        results.to_csv(modelPath+resultsName)


4 32 15 0.01
RNN(
  (rnn): RNN(8, 4, batch_first=True)
  (linear): Linear(in_features=4, out_features=2, bias=True)
  (Softmax): Softmax(dim=1)
)
Num params:  66
Train accuracy: 66.457532

Val set: Average loss: 1.2804, Accuracy: 3845.0/6797 (57%)

Train accuracy: 73.330601

Val set: Average loss: 1.2813, Accuracy: 4141.0/6797 (61%)

Train accuracy: 75.556466

Val set: Average loss: 1.3045, Accuracy: 4245.0/6797 (62%)

Train accuracy: 77.063497

Val set: Average loss: 1.3310, Accuracy: 4290.0/6797 (63%)

Train accuracy: 78.179583

Val set: Average loss: 1.3425, Accuracy: 4330.0/6797 (64%)

Train accuracy: 78.892112

Val set: Average loss: 1.3392, Accuracy: 4404.0/6797 (65%)

Train accuracy: 79.216848

Val set: Average loss: 1.3358, Accuracy: 4452.0/6797 (65%)

Train accuracy: 79.528974

Val set: Average loss: 1.3362, Accuracy: 4485.0/6797 (66%)

Train accuracy: 79.781197

Val set: Average loss: 1.3389, Accuracy: 4499.0/6797 (66%)

Train accuracy: 79.986128

Val set: Average loss: 1.342

### Vanilla RNN with modified input 
(added features representing means and variances of different windows)

In [None]:
windows = [[4], [4, 8], [4, 8, 16], [4, 8, 16, 32], [4, 8, 16, 32, 64],
            [32], [32, 64], [32, 64, 128], [32, 64, 128, 256], [32, 64, 128, 256, 512],
            [48]]

# generating csv names. Data has been modified with utils from sckm
fileNames = []
for window in windows:
  windowName = 'AppendedMeanVar'
  first = True
  for winLength in window:
    if first:
      windowName += str(winLength)
      first = False
    else:
      windowName += '_' + str(winLength)
  fileNames.append(windowName)

In [None]:
# INPUT TO NETWORK IS ELECTRICITY WITH MEAN AND VARIANCE MULTIRESOLUTION FEATURES

hiddenSizes = [16, 32, 64]
batchSizes = [32, 64]
learningRates = [0.1, 0.01, 0.001]
hparams['num_epochs'] = 15

# just using the estimates as input to the fully connected layer

resultsName = 'optiResVanillaRNN_multiresInput.csv'
experimentName = 'vanillaRNN_multires_input'

modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/' + experimentName + '/'

try:
  # try reading previous results
  results = pd.read_csv(modelPath+resultsName, index_col=0)
  dfColumns = results.columns
except:
  # or generate new results if none existed before
  dfColumns = ['hiddenSize', 'batchSize', 'NEpochs', 'lr', 'nParams', 'Best Val Accuracy', 
               'Test Accuracy', 'avgEpochTime' , 'windowsOfInput', 'experimentID']
  results = pd.DataFrame(columns=dfColumns)

i = 0
saveModels = False

for fileName in fileNames:
  # Load data
  elecWindowed = pd.read_csv('/content/drive/MyDrive/U/4t/CBI/sketchModelling/data/processedStreams/electricity/' + fileName + '.csv', index_col=0)
  elecWindowed['class'] = elecWindowed['class'].apply(lambda t: 1 if t == 'UP' else 0)
  trainPct = 0.7
  valPct = 0.15
  # split into test and train, having in mind that validation and test is the future
  elecWinTrain = elecWindowed.iloc[:round(trainPct*len(elecWindowed)),:]
  elecWinVal = elecWindowed.iloc[round(trainPct*len(elecWindowed)):round((trainPct+valPct)*len(elecWindowed)),:]
  elecWinTest = elecWindowed.iloc[round((trainPct+valPct)*len(elecWindowed)):,:]

  hparams['input_size'] = len(elecWindowed.columns) - 1

  for batchSize in batchSizes:
    trainLoader = torch.utils.data.DataLoader(electricityDataSet(elecWinTrain), 
                                              batch_size=batchSize, 
                                              shuffle=False)
    valLoader = torch.utils.data.DataLoader(electricityDataSet(elecWinVal), 
                                            batch_size=batchSize, 
                                            shuffle=False)
    testLoader = torch.utils.data.DataLoader(electricityDataSet(elecWinTest), 
                                              batch_size=batchSize, 
                                              shuffle=False)
    for hiddenSize in hiddenSizes:    
      for learningRate in learningRates:

          i = i + 1
          experimentID = experimentName + str(i)
          hparams['learning_rate'] = learningRate
          hparams['hidden_size'] = hiddenSize
          hparams['batch_size'] = batchSize

          print(hiddenSize, batchSize, hparams['num_epochs'],
                hparams['learning_rate'])

          rnn = RNN(seq_len=1, 
                        num_classes=hparams['num_classes'], 
                        input_size=hparams['input_size'], 
                        hidden_size=hparams['hidden_size'], 
                        num_layers=1)
          
          nparams = get_nn_nparams(rnn)
          
          data = [hiddenSize, batchSize, hparams['num_epochs'],
                  hparams['learning_rate'], nparams]

          bestValidationAcc, testAcc, avgEpochTrainingTime = model_experiment(rnn, hparams, modelPath, 
                                              experimentID, 888, train_epoch, 
                                              val_epoch, saveModels)

          data.append(bestValidationAcc)
          data.append(testAcc)
          data.append(avgEpochTrainingTime)
          data.append(fileName)
          data.append(experimentID)

          dataDf = pd.DataFrame([data],columns=dfColumns)
          results = pd.concat([results, dataDf])
          # save partial results in case of failure
          results.to_csv(modelPath + resultsName)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Train accuracy: 81.102213

Val set: Average loss: 0.8781, Accuracy: 5462.0/6797 (80%)

Train accuracy: 82.003909

Val set: Average loss: 0.8453, Accuracy: 5490.0/6797 (81%)

Train accuracy: 82.672300

Val set: Average loss: 0.8193, Accuracy: 5508.0/6797 (81%)

Train accuracy: 83.167287

Val set: Average loss: 0.7981, Accuracy: 5524.0/6797 (81%)

Train accuracy: 83.627593

Val set: Average loss: 0.7806, Accuracy: 5545.0/6797 (82%)

Train accuracy: 83.930260

Val set: Average loss: 0.7662, Accuracy: 5570.0/6797 (82%)

Train accuracy: 84.336970

Val set: Average loss: 0.7534, Accuracy: 5593.0/6797 (82%)

Train accuracy: 84.658554

Val set: Average loss: 0.7421, Accuracy: 5612.0/6797 (83%)

Train accuracy: 84.885554

Val set: Average loss: 0.7320, Accuracy: 5628.0/6797 (83%)


Val set: Average loss: 0.6010, Accuracy: 5919.0/6797 (87%)

16 32 15 0.001
RNN(
  (rnn): RNN(56, 16, batch_first=True)
  (linear): Linear(in_features=1

## PLOT: Time series nature of hidden states

In [None]:
# Time series nature of the hidden states:
experimentName = "temporal_nature"

modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/' + experimentName + '/'
experimentID = "1"

hparams['hidden_size'] = 2

In [None]:
eps = 0.05
ehrnn = EHRNN(seq_len=1, 
              num_classes=hparams['num_classes'], 
              input_size=hparams['input_size'], 
              hidden_size=hparams['hidden_size'], 
              num_layers=1,
              EHeps=eps,
              EHlengths=[48, 128],
              useMean=True,
              useVariance=True,
              inputToLinear='all')

In [None]:
bestValidationAcc, testAcc, avgEpochTrainingTime = model_experiment(ehrnn, hparams, modelPath, 
                                            experimentID, 888, EH_train_epoch, 
                                            EH_val_epoch, save_models=True)

In [None]:
# load the best model and obtain hidden states for training and validation
ehrnn.load_state_dict(torch.load(modelPath + experimentID + '.pth')['model_state_dict'])
ehrnn.eval()

# reset EHs
ehrnn.EHs = [[VarEH(len, eps=ehrnn.EHeps, maxValue=1) for len in ehrnn.EHlengths] for _ in range(ehrnn.numEHs)]

elecLoader = torch.utils.data.DataLoader(electricityDataSet(elec), batch_size=hparams['batch_size'], shuffle=False)

device = hparams['device']
with torch.no_grad():
    first = True
    for data, target in elecLoader:

        # Load data and feed it through the neural network
        data, target = data.to(device), target.to(device)
        data = data.view(data.shape[0], -1)
        output = ehrnn.hidden_states(data)
        # each row of the output contains: H0, H1, (Mean0_0, Var0_0, Mean0_1, Var0_1), 
        # (Mean1_0, Var1_0, Mean1_1, Var1_1)
        if first:
          allHidden = output.numpy()
          first = False
        else:
          allHidden = np.concatenate((allHidden, output.numpy()), axis=0)

In [None]:
plotsName = "hidden_states"

alt.data_transformers.enable('default', max_rows=None)
n = 100

# Hidden
hidden1 = pd.DataFrame(allHidden[:n,1], columns=['hidden_state'])
hidden1['index'] = hidden1.index

alt.Chart(hidden1, title='First 100 hidden states, 2nd neuron').mark_line().encode(
    x = alt.X('index:Q', title='Observation'),
    y = alt.Y('hidden_state:Q', title='Hidden state'),
    color=alt.value("#D11010")
)#.save(plotsPath + plotsName + "/hidden_state_1.png", scale_factor=2.0)

In [None]:
# Mean window estimation (48 and 128)
meanDf = pd.DataFrame(allHidden[:n,[6, 8]], columns=['48', '128'])
meanDf['index'] = meanDf.index

meanDf = pd.melt(meanDf, id_vars=['index'], value_vars=['48', '128'], value_name='mean', var_name="WL")

In [None]:
meanPlot = alt.layer(alt.Chart(meanDf, title='First 100 hidden states, 2nd neuron').mark_line(strokeDash=[3,1]).encode(
    x = alt.X('index:Q', title='Observation'),
    y = alt.Y('mean:Q', title='Hidden state'),
    color = alt.Color('WL', title="W.L. (mean)")
), alt.Chart(hidden1, title='First 100 hidden values').mark_line().encode(
    x = alt.X('index:Q', title='Observation'),
    y = alt.Y('hidden_state:Q', title='Hidden state'),
    color=alt.value("#D11010")
)).resolve_scale(color='independent')

# meanPlot.save(plotsPath + plotsName + "/mean_state_1.png", scale_factor=2.0)
meanPlot

In [None]:
# Variance window estimation (48 and 128)
varDf = pd.DataFrame(allHidden[:n,[7, 9]], columns=['48', '128'])
varDf['index'] = varDf.index

varDf = pd.melt(varDf, id_vars=['index'], value_vars=['48', '128'], value_name='variance', var_name="WL")

In [None]:
varPlot = (alt.layer(alt.Chart(hidden1, title='First 100 hidden states, 2nd neuron').mark_line().encode(
    x = alt.X('index:Q', title='Observation'),
    y = alt.Y('hidden_state:Q', title='Hidden state'),
    color=alt.value("#D11010")
), alt.Chart(varDf, title='First 100 hidden values').mark_line(strokeDash=[3,1]).encode(
    x = alt.X('index:Q', title='Observation'),
    y = alt.Y('variance:Q', title='Variance'),
    color = alt.Color('WL', title="W.L. (Variance)")
)).resolve_scale(y='independent')
  .configure_axisLeft(labelColor='#D11010', titleColor='#D11010'))

# varPlot.save(plotsPath + plotsName + "/var_state_1.png", scale_factor=2.0)
varPlot

## PLOTS OF COMPARISON OF MODELS

In [None]:
modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/'
data = pd.read_csv(modelPath + 'EHRNN_ScaledVar/optiResScaledVar.csv', index_col=0)
data.index = np.linspace(1, len(data.index), len(data.index)).astype(int)
data.columns = [name.replace(" ", "_") for name in data.columns]
data.head()

Unnamed: 0,hiddenSize,batchSize,EHLengths,NEpochs,EHeps,lr,summarization,inputToLinear,nParams,Best_Val_Accuracy,Test_Accuracy,avgEpochTime,experimentID
1,16,32,[4],15,0.05,0.01,VarAndMean,all,466,65.690746,52.567309,5.653085,EHRNN_ScaledVar1
2,16,32,[4],15,0.05,0.01,VarAndMean,estimates,434,62.92482,58.864205,5.078737,EHRNN_ScaledVar2
3,16,32,"[4, 8]",15,0.05,0.01,VarAndMean,all,482,70.383993,61.085773,6.935832,EHRNN_ScaledVar3
4,16,32,"[4, 8]",15,0.05,0.01,VarAndMean,estimates,450,63.204355,62.277475,6.446488,EHRNN_ScaledVar4
5,16,32,"[4, 8, 16]",15,0.05,0.01,VarAndMean,all,498,70.854789,70.545829,8.312449,EHRNN_ScaledVar5


### PLOT: ACCURACY VS EHLENGTH BY HIDDEN SIZE, ALL INPUTS TO LINEAR, BS 32, MEAN AND VAR

In [None]:
data1 = data.iloc[0:len(data.index)-6,:].copy()
rep = lambda length: int(length.replace("[","").replace("]",""))
# only use one EH over each hidden state
oneHist = np.array(["," not in length for length in data1['EHLengths']])
data1.loc[oneHist, "EHLengths"] = [rep(length) for length in data1.loc[oneHist, "EHLengths"]]

In [None]:
# all inputs to the linear layer
allInputs = np.array([elem == "all" for elem in data1.loc[:,'inputToLinear']])

# Batch Size 32
batchSize32 = np.array([elem == 32 for elem in data1.loc[:,'batchSize']])

valAccu_hiddenSize = alt.Chart(data1.loc[oneHist & allInputs & batchSize32, 
                                        ['EHLengths', 'hiddenSize', 'Best_Val_Accuracy']], 
                              title='Validation accuracy vs E.H. lengths').mark_line().encode(
    x = alt.X('EHLengths:Q', title="Exp. Hist. Length"),
    y = alt.Y('Best_Val_Accuracy:Q', title="Validation Accuracy (%)",
              scale = alt.Scale(domain=[40, 100])),
    color=alt.Color('hiddenSize:N', scale=alt.Scale(scheme="lighttealblue"))
).properties(
    width=200,
    height=250
)

plotsName = "val_accuracy"
# valAccu_hiddenSize.save(plotsPath + plotsName + "/EHlength_byHidden.png", scale_factor=2.0)
valAccu_hiddenSize

### PLOT: ACCURACY VS EHLENGTH BY LINEAR INPUT, HIDDEN SIZE 32, BS 32, MEAN AND VAR

In [None]:

hiddenSize32 = np.array([elem == 32 for elem in data1.loc[:,'hiddenSize']])

valAccu_inputLin = alt.Chart(data1.loc[oneHist & hiddenSize32 & batchSize32, 
                                      ['EHLengths', 'inputToLinear', 'Best_Val_Accuracy']], 
                            title='Validation accuracy vs E.H. lengths').mark_line().encode(
    x = alt.X('EHLengths:Q', title="Exp. Hist. Length"),
    y = alt.Y('Best_Val_Accuracy:Q', title="Validation Accuracy (%)",
              scale = alt.Scale(domain=[40, 100])),
    color=alt.Color('inputToLinear:N', scale=alt.Scale(scheme="lighttealblue"))
).properties(
    width=200,
    height=250
)

valAccu_inputLin

plotsName = "val_accuracy"
# valAccu_inputLin.save(plotsPath + plotsName + "/EHlength_byInput.png", scale_factor=2.0)
valAccu_inputLin

### PLOT: ACCURACY VS EHLENGTH BY EH CONFIGURATION, HIDDEN SIZE 32, BS 32, ALL INPUTS, MEAN AND VAR



In [None]:
data2 = data.iloc[0:len(data.index)-6,:]
EHlengths = data2.loc[:,'EHLengths'].unique()

valAccu_EHconfig = alt.Chart(data2.loc[hiddenSize32 & batchSize32 & allInputs, 
                                     ['EHLengths', 'Best_Val_Accuracy']], 
                            title='Validation accuracy vs E.H. configuration').mark_bar(size=14).encode(
    x = alt.X('EHLengths:O', title="Exp. Hist. Lengths", sort=EHlengths),
    y = alt.Y('Best_Val_Accuracy:Q', title="Validation Accuracy (%)",
              scale = alt.Scale(domain=[40, 100]))
).properties(
    width=400,
    height=250
).configure_axisX(labelAngle=-85)

valAccu_EHconfig

plotsName = "val_accuracy"
# valAccu_EHconfig.save(plotsPath + plotsName + "/EHs_byConfig.png", scale_factor=2.0)
valAccu_EHconfig

### PLOT: ACCURACY EHRRN AND VANILLA RNN BY HIDDEN SIZE, BS 32, LR 0.01, ALL INPUTS TO LINEAR; EH Size 48

In [None]:
dataVRNN = pd.read_csv(modelPath + 'vanillaRNN_pure_input/optiResVanillaRNN_pureInput.csv', index_col=0)
dataVRNN.index = np.linspace(1, len(dataVRNN.index), len(dataVRNN.index)).astype(int)
dataVRNN.columns = [name.replace(" ", "_") for name in dataVRNN.columns]

dataVRNN['Best_Val_Accuracy'] = dataVRNN['Best_Val_Accuracy'].round(2)
dataVRNN.head()

Unnamed: 0,hiddenSize,batchSize,NEpochs,lr,nParams,Best_Val_Accuracy,Test_Accuracy,avgEpochTime,experimentID
1,16,32,15,0.1,450,62.03,52.214212,2.666887,vanillaRNN_pure_input1
2,16,32,15,0.01,450,64.97,62.512873,2.679104,vanillaRNN_pure_input2
3,16,32,15,0.001,450,62.04,66.441077,2.653533,vanillaRNN_pure_input3
4,32,32,15,0.1,1410,38.16,47.785788,2.723342,vanillaRNN_pure_input4
5,32,32,15,0.01,1410,65.26,64.60203,2.686265,vanillaRNN_pure_input5


In [None]:
lr001 = np.array([elem == 0.01 for elem in dataVRNN.loc[:,'lr']])
bSize32 = np.array([elem == 32 for elem in dataVRNN.loc[:,'batchSize']])
accVanilla = dataVRNN.loc[bSize32 & lr001,['hiddenSize', 'Best_Val_Accuracy']]
accVanilla = accVanilla.iloc[3:,]
accVanilla['Model'] = 'VanillaRNN'

In [None]:
EH48 = np.array([elem == '[48]' for elem in data['EHLengths']])
bSize32EHRNN = np.array([elem == 32 for elem in data['batchSize']])
allInputsData = np.array([elem == 'all' for elem in data['inputToLinear']])
accEHRNN = data.loc[bSize32EHRNN & EH48 & allInputsData, ['hiddenSize', 'Best_Val_Accuracy']].drop_duplicates()
accEHRNN['Model'] = 'EHRNN'

In [None]:
valAccu_models = alt.Chart(pd.concat([accVanilla, accEHRNN], axis=0), 
                            title='Validation accuracy vs hidden size').mark_line().encode(
    x = alt.X('hiddenSize:Q', title="Hidden size"),
    y = alt.Y('Best_Val_Accuracy:Q', title="Validation Accuracy (%)",
              scale = alt.Scale(domain=[40, 100])),
    color=alt.Color('Model:O')
).properties(
    width=200,
    height=250
)

plotsName = "ehrnn_vs_rnn"
# valAccu_models.save(plotsPath + plotsName + "/HSize_byModelType_accu.png", scale_factor=2.0)
valAccu_models

### PLOT: MEAN EPOCH TIME EHRRN AND VANILLA RNN BY HIDDEN SIZE, BS 32, LR 0.01, ALL INPUTS TO LINEAR; EH Size 48

In [None]:
accVanilla = dataVRNN.loc[bSize32 & lr001, ['hiddenSize', 'avgEpochTime']]
accVanilla = accVanilla.iloc[3:,]
accVanilla['Model'] = 'VanillaRNN'

accEHRNN = data.loc[bSize32EHRNN & EH48 & allInputsData, ['hiddenSize', 'avgEpochTime']].drop_duplicates()
accEHRNN['Model'] = 'EHRNN'

In [None]:
epochTime_models = alt.Chart(pd.concat([accVanilla, accEHRNN], axis=0), 
                            title='Mean tr. epoch time vs hidden size').mark_line().encode(
    x = alt.X('hiddenSize:Q', title="Hidden size"),
    y = alt.Y('avgEpochTime:Q', title="Mean tr. epoch time (s)"),
    color=alt.Color('Model:O')
).properties(
    width=200,
    height=250
)

plotsName = "ehrnn_vs_rnn"
# epochTime_models.save(plotsPath + plotsName + "/HSize_byModelType_time.png", scale_factor=2.0)
epochTime_models

In [None]:
alt.hconcat(valAccu_models, epochTime_models).save(plotsPath + plotsName + "/double_ehrnn_vanilla.png", scale_factor=2.0)

### PLOT: ACCURACY + NUMPARAMS EHRRN AND VANILLA RNN W & W/O MOD INPUT BY HIDDEN SIZE, BS 32, LR 0.01, ALL INPUTS TO LINEAR; MEAN AND VARIANCE

In [None]:
modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/'
data1 = pd.read_csv(modelPath + 'vanillaRNN_multires_input/optiResVanillaRNN_multiresInput.csv', index_col=0)
data1.index = np.linspace(1, len(data1.index), len(data1.index)).astype(int)
data1.columns = [name.replace(" ", "_") for name in data1.columns]

data = pd.read_csv(modelPath + 'EHRNN_ScaledVar/optiResScaledVar.csv', index_col=0)
data.index = np.linspace(1, len(data.index), len(data.index)).astype(int)
data.columns = [name.replace(" ", "_") for name in data.columns]
data.head()

Unnamed: 0,hiddenSize,batchSize,EHLengths,NEpochs,EHeps,lr,summarization,inputToLinear,nParams,Best_Val_Accuracy,Test_Accuracy,avgEpochTime,experimentID
1,16,32,[4],15,0.05,0.01,VarAndMean,all,466,65.690746,52.567309,5.653085,EHRNN_ScaledVar1
2,16,32,[4],15,0.05,0.01,VarAndMean,estimates,434,62.92482,58.864205,5.078737,EHRNN_ScaledVar2
3,16,32,"[4, 8]",15,0.05,0.01,VarAndMean,all,482,70.383993,61.085773,6.935832,EHRNN_ScaledVar3
4,16,32,"[4, 8]",15,0.05,0.01,VarAndMean,estimates,450,63.204355,62.277475,6.446488,EHRNN_ScaledVar4
5,16,32,"[4, 8, 16]",15,0.05,0.01,VarAndMean,all,498,70.854789,70.545829,8.312449,EHRNN_ScaledVar5


In [None]:
import re

def parse_string(s):
  if "_" not in s:
    return str([int(s[s.find('Var')+3:])])
  else:
    indexes = [m.start() for m in re.finditer('_', s)]
    sizes = [int(s[s.find('Var')+3:indexes[0]])]
    for i in range(len(indexes)-1):
      sizes.append(int(s[indexes[i]+1:indexes[i+1]]))
    sizes.append(int(s[indexes[-1]+1:]))
    return str(sizes)

In [None]:
data1.loc[:,'windowsOfInput'] = data1.loc[:,'windowsOfInput'].apply(lambda w: parse_string(w))
data1.head()

Unnamed: 0,hiddenSize,batchSize,NEpochs,lr,nParams,Best_Val_Accuracy,Test_Accuracy,avgEpochTime,windowsOfInput,experimentID
1,16,32,15,0.1,642,61.836104,52.214212,2.705341,[4],vanillaRNN_multires_input1
2,16,32,15,0.01,642,67.559217,65.264087,2.682042,[4],vanillaRNN_multires_input2
3,16,32,15,0.001,642,60.776813,65.396498,2.689171,[4],vanillaRNN_multires_input3
4,32,32,15,0.1,1794,38.163896,47.785788,2.770886,[4],vanillaRNN_multires_input4
5,32,32,15,0.01,1794,64.984552,65.013977,2.690676,[4],vanillaRNN_multires_input5


In [None]:
lrs = np.array([elem == 0.01 for elem in data1['lr']])
bs = np.array([elem == 32 for elem in data1['batchSize']])
hs = np.array([elem == 32 for elem in data1['hiddenSize']])

data1 = data1.rename({'windowsOfInput': 'summaryLengths'}, axis=1)
data = data.rename({'EHLengths': 'summaryLengths'}, axis=1)

windows = data.loc[:,'summaryLengths'].unique()
windows1 = data1.loc[:,'summaryLengths'].unique()

inCommon = list(set(windows) & set(windows1))

data1['model'] = 'RNN_mod_input'
data['model'] = 'EHRNN'

data1['summaryLengths'] = data1['summaryLengths'].astype(str)
data['summaryLengths'] = data['summaryLengths'].astype(str)

In [None]:
rowsData1 = [elem in inCommon for elem in data1['summaryLengths']]
rowsData = [elem in inCommon for elem in data['summaryLengths']]

toPlot = pd.concat([data1.loc[rowsData1 & lrs & bs & hs,['Best_Val_Accuracy', 'nParams', 'summaryLengths', 'model']],
           data.loc[rowsData & (data['hiddenSize'] == 32) & 
                    (data['batchSize'] == 32) & 
                    (data['inputToLinear'] == 'all'),['Best_Val_Accuracy', 'nParams', 'summaryLengths', 'model']]], 
          axis=0)
toPlot = toPlot.iloc[:len(toPlot.index)-1,:]

In [None]:
epochTime_models = alt.Chart(toPlot,
                             title='Model accuracy and #param. by window sizes').mark_point(filled=True, size=70).encode(
    x = alt.X('summaryLengths:O', title="Window sizes", sort=windows),
    y = alt.Y('Best_Val_Accuracy:Q', title="Validation Accuracy (%)",
              scale = alt.Scale(domain=[40, 100])),
    color = alt.Color('model:O', title='Model (% accuracy)', scale=alt.Scale(scheme='category10'))
).properties(
    width=400,
    height=250
)

b = alt.Chart(toPlot, title='Model accuracy and #param. by window sizes').mark_point(filled=True, size=70, shape='triangle').encode(
    x = alt.X('summaryLengths:O', title="Window sizes", sort=windows),
    y = alt.Y('nParams:Q', title="#parameters", scale=alt.Scale(domain=[1000, 6000], )),
    color = alt.Color('model:O', title='Model (# param.)', scale=alt.Scale(scheme="category10"))
).properties(
    width=400,
    height=250
)

plot = (alt.layer(epochTime_models, b)
    .configure_axisX(labelAngle=-85)
    .configure_axisY(grid=True)
    .resolve_scale(color='independent',y='independent'))

plotsName = "ehrnn_vs_rnn"
# plot.save(plotsPath + plotsName + "/accu_params_bysize1.png", scale_factor=2.0)
plot

In [None]:
epochTime_models = alt.Chart(toPlot,
                             title='Model accuracy and #param. by window sizes').mark_point(size=70).encode(
    x = alt.X('summaryLengths:O', title="Window sizes", sort=windows),
    y = alt.Y('Best_Val_Accuracy:Q', title="Validation Accuracy (%)",
              scale = alt.Scale(domain=[40, 100])),
    shape = alt.Shape('model:O', title='Model (% accuracy)'),
    color = alt.value('#3657F0')
).properties(
    width=400,
    height=250
)

b = alt.Chart(toPlot, title='Model accuracy and #param. by window sizes').mark_point(size=70).encode(
    x = alt.X('summaryLengths:O', title="Window sizes", sort=windows),
    y = alt.Y('nParams:Q', title="#parameters", scale=alt.Scale(domain=[1000, 6000])),
    shape = alt.Shape('model:O',title='Model (# param.)'),
    color = alt.value('#F41C1C')
).properties(
    width=400,
    height=250
)

plot = (alt.layer(epochTime_models, b)
    .configure_axisX(labelAngle=-85)
    .configure_axisY(grid=True)
    .configure_axisLeft(labelColor='#3657F0', titleColor='#3657F0')
    .configure_axisRight(labelColor='#F41C1C', titleColor='#F41C1C')
    .resolve_scale(y='independent', shape='independent'))

plotsName = "ehrnn_vs_rnn"
# plot.save(plotsPath + plotsName + "/accu_params_bysize2.png", scale_factor=2.0)
plot

### PLOT: ACCURACY VS EPSILON (MEMORY UTILITY)

In [None]:
modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/'

data = pd.read_csv(modelPath + 'EHRNN_ScaledVar/optiResScaledVar_epsilons.csv', index_col=0)
data.index = np.linspace(1, len(data.index), len(data.index)).astype(int)
data.columns = [name.replace(" ", "_") for name in data.columns]

In [None]:
plot = alt.Chart(data).mark_line().encode(
    x=alt.X('EHeps'),
    y=alt.Y('Best_Val_Accuracy')
)
# plot.save()
plot

### PLOT: ACCURACY VS EHLENGTHS BY TYPE OF ESTIMATIONS, HS 32, BS 32, eps 0.05, LR 0.01, ALL INPUTS

In [None]:
modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/'

data = pd.read_csv(modelPath + 'EHRNN_ScaledVar/optiResScaledVar_MeanVar.csv', index_col=0)
data.index = np.linspace(1, len(data.index), len(data.index)).astype(int)
data.columns = [name.replace(" ", "_") for name in data.columns]
data['EHLengths'] = data['EHLengths'].apply(lambda s: int(s.replace("[","").replace("]","")))
data.head()

Unnamed: 0,hiddenSize,batchSize,EHLengths,NEpochs,EHeps,lr,summarization,inputToLinear,nParams,Best_Val_Accuracy,Test_Accuracy,avgEpochTime,experimentID
1,32,32,4,15,0.05,0.01,VarAndMean,all,1434,58.967191,48.889216,6.66684,EHRNN_ScaledVar169
2,32,32,8,15,0.05,0.01,VarAndMean,all,1434,62.968957,48.41842,6.712105,EHRNN_ScaledVar170
3,32,32,16,15,0.05,0.01,VarAndMean,all,1434,70.295719,79.358541,6.766945,EHRNN_ScaledVar171
4,32,32,32,15,0.05,0.01,VarAndMean,all,1434,82.904222,88.15654,6.817182,EHRNN_ScaledVar172
5,32,32,48,15,0.05,0.01,VarAndMean,all,1434,90.025011,86.582316,6.782772,EHRNN_ScaledVar173


In [None]:
summaryType = alt.Chart(data, title='W.L. vs. Accuracy by summary type').mark_line().encode(
    x = alt.X('EHLengths:Q', title="Exp. Hist. Length",sort='x'),
    y = alt.Y('Best_Val_Accuracy:Q', title="Validation Accuracy (%)"),
    color=alt.Color('summarization:O', scale=alt.Scale(scheme='category10'))
).properties(
    width=200,
    height=250
)

plotsName = "ehrnn"
summaryType.save(plotsPath + plotsName + "/accuracy_summaryType.png", scale_factor=2.0)
summaryType

## EHRNN FOR REGRESSION

In [None]:
class EHRNN_R(nn.Module):
    """ Keeps track of the mean of a pooled version of the hidden states across times. """

    def __init__(self, seq_len, num_values, input_size, hidden_size, 
                 num_layers, EHeps, EHlengths, useMean=True, 
                 useVariance=False, inputToLinear='all'):

        super().__init__()
        self.seq_len = seq_len
        self.num_layers = num_layers
        self.input_size = input_size
        self.num_values = num_values
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.Softmax = nn.Softmax(dim=1)
        self.EHeps = EHeps
        self.EHlengths = EHlengths
        self.useVariance = useVariance
        self.useMean = useMean
        self.inputToLinear = inputToLinear

        # each hidden size will be reduced to size sqrt(hidden_size). Then,
        # each element in it will go to an EH.
        self.avgKernelSize = int(np.floor(np.sqrt(hidden_size)))
        # from https://pytorch.org/docs/stable/generated/torch.nn.AvgPool1d.html
        self.numEHs = math.floor(((self.hidden_size - self.avgKernelSize) / self.avgKernelSize) + 1)
        self.avgPool = nn.AvgPool1d(kernel_size=self.avgKernelSize)

        if not useMean and not useVariance:
          raise Exception("At least one type of estimate must be used.")
        else:
          self.numberOfEstimates = 2 if useMean and useVariance else 1

        # EHs[i][j] is EH over pooled element i of some length EHlengths[j]
        self.EHs = [[VarEH(len, eps=EHeps, maxValue=1) for len in EHlengths] for _ in range(self.numEHs)]

        # linear: its input size depends on hidden size, how many EH we maintain
        # and how many estimates we query
        if self.inputToLinear == 'all':
          self.linear = nn.Linear(hidden_size + self.numEHs * len(EHlengths) * self.numberOfEstimates, num_values)
        elif self.inputToLinear == 'estimates':
          self.linear = nn.Linear(self.numEHs * len(EHlengths) * self.numberOfEstimates, num_values)
        else:
          raise Exception("Input to linear must be either 'all' or 'estimates'")

    def forward(self, x):
        linearInput = self.hidden_states(x)
        return self.linear(linearInput)

    def hidden_states(self, x):
        batch_size = x.size()[0]
        # assuming batch_first = True for RNN cells
        hidden = self._init_hidden(batch_size)
        hidden = hidden.to(hparams['device'])
        x = x.view(batch_size, self.seq_len, self.input_size)
        
        # apart from the output, rnn also gives us the hidden
        # cell, this gives us the opportunity to pass it to
        # the next cell if needed; we won't be needing it here
        # because the nn.RNN already computed all the time steps
        # for us. rnn_out will of size [batch_size, seq_len, hidden_size]
        # rnn_out: B x 1 x H
        rnn_out, _ = self.rnn(x, hidden)

        # add hidden states to EHs, getting the mean each time so as to not have
        # future hidden states.
        allMeans = torch.tensor([])
        rnn_pooled = torch.squeeze(self.avgPool(rnn_out)) # B x numEH
        for pointIndex, point in enumerate(rnn_pooled):
          for i, element in enumerate(point):
            for j in range(len(self.EHs[i])):
              self.EHs[i][j].add(element.item())

          # get the estimates at this point so as to not look into the future
          if self.useVariance and self.useMean:
            estimates = []
            for i in range(self.numEHs):
              for j in range(len(self.EHs[i])):
                estimates.append(self.EHs[i][j].get_mean_estimate())
                estimates.append(self.EHs[i][j].get_var_estimate())
          elif self.useMean:
            estimates = [self.EHs[i][j].get_mean_estimate() for i in range(self.numEHs) for j in range(len(self.EHs[i]))]
          elif self.useVariance:
            estimates = [self.EHs[i][j].get_var_estimate() for i in range(self.numEHs) for j in range(len(self.EHs[i]))]

          allMeans = torch.cat((allMeans, torch.tensor(estimates)))

        
        allMeans = allMeans.to(hparams['device'])

        if self.inputToLinear == 'all':
          # rnn_out: B x (H + self.numEHs * len(EHlengths) * self.numberOfEstimates)
          linearInput = torch.cat((torch.squeeze(rnn_out), allMeans.view(batch_size, self.numEHs * len(self.EHlengths) * self.numberOfEstimates)), 1)
          linearInput = linearInput.view(batch_size, self.hidden_size + self.numEHs * len(self.EHlengths) * self.numberOfEstimates)
        else:
          # rnn_out: B x (self.numEHs * len(EHlengths) * self.numberOfEstimates)
          linearInput = allMeans.view(batch_size, self.numEHs * len(self.EHlengths) * self.numberOfEstimates)

        return linearInput

    def _init_hidden(self, batch_size):
        """
        Initialize hidden cell states, assuming
        batch_first = True for RNN cells
        """
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)

In [None]:
from collections import deque
from math import ceil, floor, log2


class ExactWindow(object):
    """ Keeps track of exact statistics in a window of size n. """

    def __init__(self, n):
        self.nElems = 0
        self.buffer = deque()
        self.maxElems = n

    def add(self, element):
        self.buffer.appendleft(element)
        self.nElems += 1
        if len(self.buffer) > self.maxElems:
            self.buffer.pop()
            self.nElems -= 1

    def n_elems(self):
        return self.nElems

    def sum(self):
        return sum(self.buffer)

    def mean(self):
        return sum(self.buffer) / self.nElems

    def variance(self):
        if len(self.buffer) <= 1:
            return 0
        variance = 0
        mean = self.mean()
        for i in range(len(self.buffer)):
            variance += (self.buffer[i] - mean) ** 2
        return variance / float(len(self.buffer) - 1)

    def empty(self):
        return True if self.nElems == 0 else False

class Bucket(object):
    """ Simple structure with a timestamp and a var. representing number the of elements in it. """

    def __init__(self, timestamp, nElems):
        self.timestamp = timestamp
        self.nElems = nElems

class VarBucket(Bucket):

    """ An extension of the basic bucket that also contains its mean and variance. """

    def __init__(self, timestamp, value):
        # indicate value=None if the Bucket is to be initialized as empty
        if value is None:
            super().__init__(timestamp, 0)
            self.bucketMean = 0
        else:
            super().__init__(timestamp, 1)
            self.bucketMean = value

        self.var = 0


class Counter(object):

    """ A simple wraparound counter. """

    def __init__(self, upperLimit):
        self.step = 0
        self.upperLimit = upperLimit

    def increment(self):
        if self.step < self.upperLimit:
            self.step += 1
        else:
            # reset
            self.step = 1

    def dist_between_ticks(self, tick1, tick2):

        """ Returns the number of steps that have taken place between 'tick1' and 'tick2' assuming that at most 1
        wraparound has occured. 'tick2' is older than or the same as 'tick1'."""

        if tick1 <= tick2:
            return tick2 - tick1
        else:
            return self.upperLimit - tick1 + tick2


class VarEH(object):

    # todo add reset function, eh summary function, robustness when returning estimates

    """ A variation of the original EH structure that keeps track of the variance (k-medians with k = 1) to some eps
    of relative error. Although less space-efficient than the original, it still is sublinear to window length and
    provides approximations in constant time. Moreover, it is more flexible in terms of the functions it supports and
    can work with real numbers. Amortized update time is O(1) given that the |max. value| of the data is known a priori.

    A consequence of this structure is the ability to also keep track of the mean. Note that although the guarantees for
    the maximum relative error for this mean estimate are not presented in the paper, they hold experimentally
    in the majority of cases.

    In this documentation we adopt the notation of the paper, where buckets are refered to as B_i, 1 <= i <= m,
    B_1 being the most recent one and B_m being the oldest one. A suffix bucket B_i* contains the statistics of all
    elements arrived after the most recent element of the bucket B_i. """

    def __init__(self, n, eps, maxValue=None):
        self.n = n
        self.k = 9 / (eps ** 2)
        self.buckets = deque([])
        self.lastSuffix = VarBucket(0, None)
        self.interSuffix = None
        # timestamps up to n + 1 excluded
        self.timeCounter = Counter(n+1)

        self.stepsBetweenMerges = int(round((1 / eps) * log2(n * (maxValue ** 2)))) if maxValue is not None else 1
        # elements processed since last merge
        self.stepsSinceLastMerge = 0

    def add(self, value):

        """ Process a new element arrival, updating the statistics of the structure. If the EH is empty, just insert it.
            If there's at least one element:
                1. Update B_m* (lastSuffix)
                2. Insert the value (either in the previous or in a new Bucket)
                3. If the last bucket is expired, delete it and update B_m* so that it represents B_(m-1)*
                4. If 'stepsBetweenMerges' elements have arrived after the last bucket merge, merge buckets again. """

        self.timeCounter.increment()
        self.stepsSinceLastMerge += 1

        if self.buckets:
            # maintain B_m*
            self.insert_into_last_suffix(value)
            # new element does not affect statistics
            if value == self.buckets[-1].bucketMean:
                self.buckets[-1].nElems += 1
                self.buckets[-1].timestamp = self.timeCounter.step
            else:
                self.buckets.append(VarBucket(self.timeCounter.step, value))
        else:
            self.buckets.append(VarBucket(self.timeCounter.step, value))
            return

        # delete expired bucket, check on counter's wraparound property
        if self.get_timestamp_position(self.buckets[0].timestamp) > self.n:
            self.pop_from_last_suffix()
            self.buckets.popleft()

        # merge every self.stepsBetweenMerge steps to ensure amortized time O(1) (only if maxValue has been specified)
        if self.stepsSinceLastMerge == self.stepsBetweenMerges:
            self.merge_buckets()
            self.stepsSinceLastMerge = 0

    def insert_into_last_suffix(self, element):
        """ Updates the statistics of the suffix bucket B_m* (in reference) such that it now takes
        into account another element. """

        # order of operations crucial!
        newNElems = self.lastSuffix.nElems + 1

        self.lastSuffix.var += self.lastSuffix.nElems * ((self.lastSuffix.bucketMean - element) ** 2) / newNElems

        self.lastSuffix.bucketMean = (self.lastSuffix.bucketMean * self.lastSuffix.nElems + element) / newNElems

        self.lastSuffix.nElems = newNElems

    def pop_from_last_suffix(self):

        """ Updates the statistics of the suffix bucket B_m* (in reference) such that it does not take
         into account the oldest bucket anymore: it now represents B_(m-1)* """

        newNElems = self.lastSuffix.nElems - self.buckets[1].nElems

        if newNElems == 0:
            self.lastSuffix.bucketMean = 0
            self.lastSuffix.var = 0
            self.lastSuffix.nElems = 0
        else:
            # order of operations crucial!
            self.lastSuffix.bucketMean = (self.lastSuffix.bucketMean * self.lastSuffix.nElems -
                                          self.buckets[1].bucketMean * self.buckets[1].nElems) / newNElems

            self.lastSuffix.var = (self.lastSuffix.var - self.buckets[1].var -
                                   ((newNElems*self.buckets[1].nElems)/self.lastSuffix.nElems) *
                                   ((self.lastSuffix.bucketMean - self.buckets[1].bucketMean) ** 2))
            self.lastSuffix.nElems = newNElems

    def merge_buckets(self):

        """ Merges buckets following the procedure specified in the paper. Given V_(i,i-1) the variance of the
        combination of buckets B_i and B_(i-1), V_(i-1)* the variance of the suffix bucket B_(i-1)* and k=9 * (1/eps^2):

            while there exists i > 2:
                find the smallest i that satisfies k * V_(i,i-1) <= V_(i-1)*
                merge buckets B_i and B_(i-1)

        Note that V_(i-1)* is computed incrementally. """

        if len(self.buckets) > 2:
            self.interSuffix = VarBucket(0, None)
            # this implementation has the most recent buckets to the end of the structure self.buckets, hence i is
            # traversed decreasingly.
            i = len(self.buckets) - 3
            j = i + 1
            newNElems = self.buckets[i].nElems + self.buckets[j].nElems
            newVar = self.compute_new_variance(self.buckets[i], self.buckets[j], newNElems)
            self.update_inter_suffix(len(self.buckets) - 1)
            while i >= 0:
                if self.k * newVar <= self.interSuffix.var:
                    if i == 0:
                        # merging last two buckets, so need to update prefix B_m*
                        self.pop_from_last_suffix()
                    self.buckets[i].bucketMean = self.compute_new_mean(self.buckets[i], self.buckets[j], newNElems)
                    self.buckets[i].nElems = newNElems
                    self.buckets[i].var = newVar
                    self.buckets[i].timestamp = self.buckets[j].timestamp
                    del self.buckets[j]
                    # with bucket deleted, j represents buckets[j+1] before deletion
                self.update_inter_suffix(j)
                # prepare for next conditional check
                j = i
                i -= 1
                newNElems = self.buckets[i].nElems + self.buckets[j].nElems
                newVar = self.compute_new_variance(self.buckets[i], self.buckets[j], newNElems)

    def update_inter_suffix(self, index):

        """ Updates the suffix bucket B_index* (see reference -> insert procedure -> step 3) such that it now also
        contains the statistics of elements from bucket in position 'index'. That is, B_index* now
        represents B_(index+1)*. When called, it assumes that the new elements to include have not been taken into
        account yet. """

        newNElems = self.interSuffix.nElems + self.buckets[index].nElems
        self.interSuffix.var = self.compute_new_variance(self.buckets[index], self.interSuffix, newNElems)

        self.interSuffix.bucketMean = self.compute_new_mean(self.buckets[index], self.interSuffix, newNElems)
        self.interSuffix.nElems = newNElems

    @staticmethod
    def compute_new_mean(bucket1, bucket2, nElems):

        """ Computes the mean of the combination of two buckets. """

        return (bucket1.bucketMean * bucket1.nElems + bucket2.bucketMean * bucket2.nElems) / nElems

    @staticmethod
    def compute_new_variance(bucket1, bucket2, nElems):

        """ Computes the variance of the combination of two buckets. """

        return (bucket1.var + bucket2.var + ((bucket1.nElems * bucket2.nElems) / nElems) *
                ((bucket1.bucketMean - bucket2.bucketMean) ** 2))

    def get_timestamp_position(self, timestamp):

        """ Gets position of an element in the EH (from 1 to n) based on its timestamp. """

        return self.timeCounter.dist_between_ticks(timestamp, self.buckets[-1].timestamp) + 1

    def get_var_estimate(self):

        """ Returns an estimate of the variance within the window. """

        numEst = self.n + 1 - self.get_timestamp_position(self.buckets[0].timestamp)
        return (self.buckets[0].var / 2 + self.lastSuffix.var +
                ((numEst * self.lastSuffix.nElems)/(numEst + self.lastSuffix.nElems)) *
                ((self.buckets[0].bucketMean - self.lastSuffix.bucketMean)**2)) / (numEst + self.lastSuffix.nElems - 1)

    def get_mean_estimate(self):

        """ Returns an estimate of the mean within the window. """

        numEst = self.n + 1 - self.get_timestamp_position(self.buckets[0].timestamp)
        return (((numEst * self.buckets[0].bucketMean) +
                 (self.lastSuffix.nElems * self.lastSuffix.bucketMean)) /
                (numEst + self.lastSuffix.nElems))

    def empty(self):

        """ Tells if there are no buckets in the sketch. """

        return False if self.buckets else True

In [None]:
# Theoretical use of memory in EH vs linear

def mem_usage(eps=0.05, adjust=False):
  memLinear = np.linspace(1, 10000, 10000)

  memDf = pd.DataFrame()

  memDf['linear'] = memLinear
  memDf['EH (upper bound)'] = memLinear
  memDf['EH (lower bound)'] = memLinear
  memDf['window_size'] = memLinear

  # assimptotic bounds
  memDf['EH (upper bound)'] = memDf['EH (upper bound)'].apply(lambda N: (1/(eps**2)) * math.log(N,2))
  memDf['EH (lower bound)'] = memDf['EH (lower bound)'].apply(lambda N: (1/eps) * math.log(N,2))

  if adjust:
    # adjust bounds where memory usage of EH is higher (naive solution is O(N))
    memDf.loc[memDf['EH (upper bound)'] > memDf['linear'], 'EH (upper bound)'] = memDf['linear']
    memDf.loc[memDf['EH (lower bound)'] > memDf['linear'], 'EH (lower bound)'] = memDf['linear']

  memDf = pd.melt(memDf, id_vars=['window_size'], 
                  value_vars=['EH (upper bound)', 'EH (lower bound)', 'linear'],
                  var_name='Structure', value_name='memory_usage')
  return memDf

In [None]:
alt.data_transformers.enable('default', max_rows=None)

memDf005 = mem_usage()
memDf01 = mem_usage(eps=0.1)

plot = alt.hconcat(alt.Chart(memDf005, title='Memory usage, eps=0.05').mark_line().encode(
    x = 'window_size:Q',
    y = 'memory_usage:Q',
    color = 'Structure'
), alt.Chart(memDf01, title='Memory usage, eps=0.1').mark_line().encode(
    x = 'window_size:Q',
    y = 'memory_usage:Q',
    color = 'Structure'
))

plotsName = 'memory_usage'
plot.save(plotsPath + plotsName + "/theo_memory_usage.png", scale_factor=2.0)
plot

In [None]:
# BATCHNORM

class EHRNN_BN(nn.Module):
    """ Keeps track of the mean of a pooled version of the hidden states across times. """

    def __init__(self, seq_len, num_classes, input_size, hidden_size, 
                 num_layers, EHeps, EHlengths, useMean=True, 
                 useVariance=False, inputToLinear='all'):
        # todo try batchnorm in hidden states

        super().__init__()
        self.seq_len = seq_len
        self.num_layers = num_layers
        self.input_size = input_size
        self.num_classes = num_classes
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.Softmax = nn.Softmax(dim=1)
        self.EHeps = EHeps
        self.EHlengths = EHlengths
        self.useVariance = useVariance
        self.useMean = useMean
        self.inputToLinear = inputToLinear

        # each hidden size will be reduced to size sqrt(hidden_size). Then,
        # each element in it will go to an EH.
        self.avgKernelSize = int(np.floor(np.sqrt(hidden_size)))
        # from https://pytorch.org/docs/stable/generated/torch.nn.AvgPool1d.html
        self.numEHs = math.floor(((self.hidden_size - self.avgKernelSize) / self.avgKernelSize) + 1)
        self.avgPool = nn.AvgPool1d(kernel_size=self.avgKernelSize)

        if not useMean and not useVariance:
          raise Exception("At least one type of estimate must be used.")
        else:
          self.numberOfEstimates = 2 if useMean and useVariance else 1

        # EHs[i][j] is EH over pooled element i of some length EHlengths[j]
        self.EHs = [[VarEH(len, eps=EHeps, maxValue=1) for len in EHlengths] for _ in range(self.numEHs)]

        # linear: its input size depends on hidden size, how many EH we maintain
        # and how many estimates we query
        if self.inputToLinear == 'all':
          self.linear = nn.Linear(hidden_size + self.numEHs * len(EHlengths) * self.numberOfEstimates, num_classes)
        elif self.inputToLinear == 'estimates':
          self.linear = nn.Linear(self.numEHs * len(EHlengths) * self.numberOfEstimates, num_classes)
        else:
          raise Exception("Input to linear must be either 'all' or 'estimates'")

        # For each pooled hidden state, keep the mean, variance or both of various
        # resolutions
        self.batchnorm = nn.BatchNorm1d(self.numEHs * self.numberOfEstimates * len(EHlengths))

    def forward(self, x):
        batch_size = x.size()[0]
        # assuming batch_first = True for RNN cells
        hidden = self._init_hidden(batch_size)
        hidden = hidden.to(hparams['device'])
        x = x.view(batch_size, self.seq_len, self.input_size)
        
        # apart from the output, rnn also gives us the hidden
        # cell, this gives us the opportunity to pass it to
        # the next cell if needed; we won't be needing it here
        # because the nn.RNN already computed all the time steps
        # for us. rnn_out will of size [batch_size, seq_len, hidden_size]
        # rnn_out: B x 1 x H
        rnn_out, _ = self.rnn(x, hidden)

        # add hidden states to EHs, getting the mean each time so as to not have
        # future hidden states.
        allEstimates = torch.tensor([])
        rnn_pooled = torch.squeeze(self.avgPool(rnn_out)) # B x numEH
        for pointIndex, point in enumerate(rnn_pooled):
          for i, element in enumerate(point):
            for j in range(len(self.EHs[i])):
              self.EHs[i][j].add(element.item())

          # get the estimates at this point so as to not look into the future

          if self.useVariance and self.useMean:
            estimates = []
            for i in range(self.numEHs):
              for j in range(len(self.EHs[i])):
                estimates.append(self.EHs[i][j].get_mean_estimate())
                estimates.append(self.EHs[i][j].get_var_estimate())
          elif self.useMean:
            estimates = [self.EHs[i][j].get_mean_estimate() for i in range(self.numEHs) for j in range(len(self.EHs[i]))]
          elif self.useVariance:
            estimates = [self.EHs[i][j].get_var_estimate() for i in range(self.numEHs) for j in range(len(self.EHs[i]))]

          allEstimates = torch.cat((allEstimates, torch.tensor(estimates)))
        
        allEstimates = allEstimates.to(hparams['device'])

        allEstimates = allEstimates.view(batch_size, self.numEHs * 
                                 len(self.EHlengths) * self.numberOfEstimates)
        
        allEstimates = self.batchnorm(allEstimates)

        if self.inputToLinear == 'all':
          # rnn_out: B x (H + self.numEHs * len(EHlengths) * self.numberOfEstimates)
          linearInput = torch.cat((torch.squeeze(rnn_out), allEstimates), 1)
          linearInput = linearInput.view(batch_size, self.hidden_size + self.numEHs * len(self.EHlengths) * self.numberOfEstimates)
          linear_out = self.linear(linearInput)
        else:
          # rnn_out: B x (self.numEHs * len(EHlengths) * self.numberOfEstimates)
          linear_out = self.linear(allEstimates)

        return self.Softmax(linear_out)

    def _init_hidden(self, batch_size):
        """
        Initialize hidden cell states, assuming
        batch_first = True for RNN cells
        """
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)

In [None]:
# Trying EHRNN with batchnorm

# We fix learning rate after some experiments that determine that overall, for
# all other parameters fixed, a starting lr = 0.01 performs good.
# Optimizer is always RMSProp.

hiddenSizes = [16, 32, 64]
batchSizes = [32, 64]
# [mean, var]
summaryToUse = [[0,1], [1,0], [1,1]]

# first row are short term, second long term, third short and long term.
# fourth row are extras (individuals and combo with 48)
EHLengths = [[4], [4, 8], [4, 8, 16], [4, 8, 16, 32], [4, 8, 16, 32, 64],
             [32], [32, 64], [32, 64, 128], [32, 64, 128, 256], [32, 64, 128, 256, 512],
             [128], [4, 128], [4, 16, 128], [4, 16, 32, 128], [4, 16, 32, 128, 256],
             [32, 48, 128], [64], [8], [16], [256], [48]]

inputsToLinear = ['all', 'estimates']

hparams['learning_rate'] = 0.01

# We are first going to optimize using both the mean and the variance
# and then, with the best model we are going to find the performance of just
# using the mean or the variance or just using the estimates as input to the 
# fully connected layer

resultsName = 'optiEHRNN_BN.csv'
experimentName = 'EHRNN_BN'

modelPath = '/content/drive/MyDrive/U/4t/CBI/sketchModelling/models/' + experimentName + '/'

try:
  # try reading previous results
  results = pd.read_csv(modelPath+resultsName, index_col=0)
  dfColumns = results.columns
except:
  # or generate new results if none existed before
  dfColumns = ['hiddenSize', 'batchSize', 'EHLengths', 'NEpochs', 'EHeps', 'lr', 
               'summarization', 'inputToLinear', 'nParams', 'Best Val Accuracy', 
               'Test Accuracy', 'avgEpochTime' , 'experimentID']
  results = pd.DataFrame(columns=dfColumns)

i = 168
usingMean = True
usingVar = True
saveModels = False
eps=0.05

hparams['num_epochs'] = 15

for hiddenSize in hiddenSizes:
  for batchSize in batchSizes:
    # Reload electricity with different batch sizes
    trainLoader = torch.utils.data.DataLoader(electricityDataSet(elecTrain), 
                                              batch_size=batchSize, 
                                              shuffle=False)
    valLoader = torch.utils.data.DataLoader(electricityDataSet(elecVal), 
                                            batch_size=batchSize, 
                                            shuffle=False)
    testLoader = torch.utils.data.DataLoader(electricityDataSet(elecTest), 
                                             batch_size=batchSize, 
                                             shuffle=False)
    for EHLength in EHLengths:
      for inputToLinear in inputsToLinear:

        if usingMean and usingVar:
          summarisation = 'VarAndMean'
        elif usingMean:
          summarisation = 'Mean'
        elif usingVar:
          summarisation = 'Var'

        print(hiddenSize, batchSize, str(EHLength), hparams['num_epochs'],
                eps, hparams['learning_rate'], summarisation, inputToLinear)

        i = i + 1
        experimentID = experimentName + str(i)
        hparams['hidden_size'] = hiddenSize
        hparams['batch_size'] = batchSize

        ehrnn_bn = EHRNN_BN(seq_len=1, 
                      num_classes=hparams['num_classes'], 
                      input_size=hparams['input_size'], 
                      hidden_size=hparams['hidden_size'], 
                      num_layers=1,
                      EHeps=eps,
                      EHlengths=EHLength,
                      useMean=usingMean,
                      useVariance=usingVar,
                      inputToLinear=inputToLinear)
        
        nparams = get_nn_nparams(ehrnn_bn)
        
        data = [hiddenSize, batchSize, str(EHLength), hparams['num_epochs'],
                eps, hparams['learning_rate'], summarisation, inputToLinear,
                nparams]

        bestValidationAcc, testAcc, avgEpochTrainingTime = model_experiment(ehrnn_bn, hparams, modelPath, 
                                            experimentID, 888, EH_train_epoch, 
                                            EH_val_epoch, saveModels)

        data.append(bestValidationAcc)
        data.append(testAcc)
        data.append(avgEpochTrainingTime)
        data.append(experimentID)

        dataDf = pd.DataFrame([data],columns=dfColumns)
        results = pd.concat([results, dataDf])
        # save partial results in case of failure
        results.to_csv(modelPath+resultsName)

16 32 [4] 15 0.05 0.01 VarAndMean all


NameError: ignored