In [1]:
import pandas as pd
import numpy as np
from torchtext import data
import torch
import torch.nn as nn

In [None]:
from google.colab import drive
drive.mount('/content/drive')
df_train = pd.read_csv('Datasets/IMDB/Train.csv', engine='python')
df_test = pd.read_csv('Datasets/IMDB/Test.csv', engine='python')

TEXT = data.Field(tokenize = 'spacy',include_lengths = True)
LABEL = data.LabelField(dtype = torch.float)

In [3]:
tv_datafields = [("text", TEXT),("label", LABEL)]
train_data,  test_data = data.TabularDataset.splits(path='Datasets/IMDB/',
                                        train="Train.csv",
                                        test="Test.csv", format="csv",
                                        skip_header=True, fields=tv_datafields)

In [4]:
MAX_VOCAB_SIZE = 25000
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
TEXT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)
LABEL.build_vocab(train_data)

.vector_cache/glove.6B.zip: 862MB [06:28, 2.22MB/s]                           
100%|█████████▉| 399943/400000 [00:17<00:00, 22571.27it/s]

In [6]:
train_iterator,  test_iterator = data.BucketIterator.splits(
    (train_data,  test_data), 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    sort_key = lambda x: len(x.text),
    device = device)

In [7]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.rnn = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout)
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text, text_lengths):
        
        #text = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        
        #unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)

        #output = [sent len, batch size, hid dim * num directions]
        #output over padding tokens are zero tensors
        
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
                
        #hidden = [batch size, hid dim * num directions]
            
        result = self.fc(hidden)
        return result



In [8]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = RNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            DROPOUT, 
            PAD_IDX)

In [9]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')
pretrained_embeddings = TEXT.vocab.vectors

print(pretrained_embeddings.shape)
model.embedding.weight.data.copy_(pretrained_embeddings)

The model has 4,810,857 trainable parameters
torch.Size([25002, 100])


tensor([[-1.5590,  0.3448, -0.1751,  ..., -0.1506,  1.0337, -0.1511],
        [-0.1991,  0.8435,  1.0862,  ...,  1.6896, -0.7945, -0.0032],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [-0.1632, -0.5877,  0.6399,  ..., -0.0234,  0.0447,  0.8404],
        [-0.2156,  1.0275,  0.0914,  ..., -0.3935,  0.8041, -0.6058],
        [ 0.3393,  2.1684,  0.0894,  ..., -0.3708, -0.9544, -0.4810]])

In [10]:
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(model.embedding.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [-0.1632, -0.5877,  0.6399,  ..., -0.0234,  0.0447,  0.8404],
        [-0.2156,  1.0275,  0.0914,  ..., -0.3935,  0.8041, -0.6058],
        [ 0.3393,  2.1684,  0.0894,  ..., -0.3708, -0.9544, -0.4810]])


In [11]:
optimizer = torch.optim.Adam(model.parameters())

In [12]:
criterion = nn.BCEWithLogitsLoss()
torch.manual_seed(12)
model = model.to(device)
criterion = criterion.to(device)
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        text, text_lengths = batch.text
        
        predictions = model(text, text_lengths.to('cpu')).squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [13]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text, text_lengths = batch.text
            
            predictions = model(text, text_lengths.to('cpu')).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [14]:
import time
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [15]:
N_EPOCHS = 10
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    test_loss, test_acc = evaluate(model, test_iterator, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tTest Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

100%|█████████▉| 399943/400000 [00:30<00:00, 22571.27it/s]

Epoch: 01 | Epoch Time: 1m 29s
	Train Loss: 0.552 | Train Acc: 70.76%
	Test Loss: 0.390 | Test Acc: 81.39%
Epoch: 02 | Epoch Time: 1m 35s
	Train Loss: 0.319 | Train Acc: 86.81%
	Test Loss: 0.261 | Test Acc: 89.77%
Epoch: 03 | Epoch Time: 1m 35s
	Train Loss: 0.257 | Train Acc: 89.82%
	Test Loss: 0.235 | Test Acc: 90.86%
Epoch: 04 | Epoch Time: 1m 34s
	Train Loss: 0.220 | Train Acc: 91.44%
	Test Loss: 0.227 | Test Acc: 90.92%
Epoch: 05 | Epoch Time: 1m 35s
	Train Loss: 0.195 | Train Acc: 92.54%
	Test Loss: 0.247 | Test Acc: 90.66%
Epoch: 06 | Epoch Time: 1m 35s
	Train Loss: 0.173 | Train Acc: 93.53%
	Test Loss: 0.220 | Test Acc: 91.73%
Epoch: 07 | Epoch Time: 1m 35s
	Train Loss: 0.153 | Train Acc: 94.36%
	Test Loss: 0.211 | Test Acc: 92.11%
Epoch: 08 | Epoch Time: 1m 35s
	Train Loss: 0.139 | Train Acc: 95.00%
	Test Loss: 0.216 | Test Acc: 92.19%
Epoch: 09 | Epoch Time: 1m 35s
	Train Loss: 0.123 | Train Acc: 95.57%
	Test Loss: 0.219 | Test Acc: 92.13%
Epoch: 10 | Epoch Time: 1m 35s
	Train

In [16]:
class Delta_RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.rnn = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout)
        
        self.fc_1 = nn.Linear(hidden_dim * 2, 9)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text, text_lengths):
        
        #text = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        
        #unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)

        #output = [sent len, batch size, hid dim * num directions]
        #output over padding tokens are zero tensors
        
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
                
        #hidden = [batch size, hid dim * num directions]
            
        quants = self.fc_1(hidden)
        return quants

    def penultimate(self, text, text_lengths):
        
        #text = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        
        #unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)

        #output = [sent len, batch size, hid dim * num directions]
        #output over padding tokens are zero tensors
        
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
                
        #hidden = [batch size, hid dim * num directions]
        return hidden
    


In [20]:
# Relevant functions

all_qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
all_qs = torch.Tensor(all_qs).to(device)
mean_is = 0
std_is = 1
penalty = 1
epsilon = 0.00

def cumLaplaceDistribution(y_pred,mean,standard_deviation,all_qs):
  aa = ((1-all_qs) * (y_pred - mean))/standard_deviation
  aa.clamp_(max = 0)
  lesser_term = all_qs * torch.exp(aa)
  bb = (-1.0 * all_qs * (y_pred - mean))/standard_deviation
  bb.clamp_(max = 0)
  greater_term = 1 - ((1-all_qs) * torch.exp(bb))
  dummy_ones = torch.ones_like(mean)
  y_dummy_pred = torch.div(y_pred,dummy_ones)
  y_dummy_pred[y_pred >= mean] = 1.0
  y_dummy_pred[y_pred < mean] = 0.0
  return ((1 - y_dummy_pred) * lesser_term )+  (y_dummy_pred * greater_term)

def logLikelihoodLoss(y_true,y_pred,mean,standard_deviation,all_qs,reg_hook = False):
  gf = cumLaplaceDistribution(0.0,mean = y_pred,standard_deviation = standard_deviation,all_qs = all_qs)
  gf.clamp_(min = 1e-7,max = 1 - 1e-7)
  if_one = y_true * torch.log(1 - gf)
  if_zero = (1 - y_true) * torch.log(gf)
  if(reg_hook):
    return (if_one + if_zero)
  else:
    aa = - 1 * torch.mean(if_one + if_zero)
    return aa 

def customLoss(y_true, y_pred, mean, standard_deviation, all_qs, penalty):
    ind_losses = []
    for i,j in enumerate(all_qs):
      xyz = logLikelihoodLoss(y_true[:,0],y_pred[:,i] ,mean, standard_deviation, j)
      ind_losses.append(xyz)
    zero = torch.Tensor([0]).to(device)
    dummy1 = y_pred[:,1:] - y_pred[:,:-1]
    dummy2 = penalty * torch.mean(torch.max(zero,-1.0 * dummy1))
    total_loss  = torch.mean(torch.stack(ind_losses)) +dummy2
    return total_loss

def customTestPred(y_pred,mean,standard_deviation,all_qs,batch_size = 1):
  if(batch_size == 1):
    acc = []
    cdfs = []
    eps = 1e-10
    val = (y_pred - mean)/standard_deviation 
    for xx in range(batch_size):
      if(y_pred < mean.item()):
        lesser_term = all_qs * torch.exp((1 - all_qs) * val.item())
        lesser_term  = 1 - lesser_term
        cdfs.append(lesser_term.item())
        if(lesser_term.item() >= 0.5):
          acc.append([1])
        else:
          acc.append([0])

      elif(y_pred >= mean.item()):
        greater_term = 1 - ((1-all_qs) * torch.exp(-1 * all_qs * val.item()))
        greater_term = 1 - greater_term
        cdfs.append(greater_term.item())
        if(greater_term.item() >= 0.5):
          acc.append([1])
        else:
          acc.append([0])
  elif(batch_size > 1):
    acc = []
    cdfs = []
    eps = 1e-10
    val = (y_pred - mean)/standard_deviation 
    for xx in range(batch_size):
      if(y_pred < mean[xx]):
        lesser_term = all_qs * torch.exp((1 - all_qs) * val[xx])
        lesser_term  = 1 - lesser_term
        cdfs.append(lesser_term.item())
        if(lesser_term.item() >= 0.5):
          acc.append([1])
        else:
          acc.append([0])

      elif(y_pred >= mean[xx]):
        greater_term = 1 - ((1-all_qs) * torch.exp(-1 * all_qs * val[xx]))
        greater_term = 1 - greater_term
        cdfs.append(greater_term.item())
        if(greater_term.item() >= 0.5):
          acc.append([1])
        else:
          acc.append([0])
  return torch.Tensor(acc).to(device).reshape(-1,1),torch.Tensor(cdfs).to(device).reshape(-1,1)

def customTestPred_new(op,batch_size = 1):
  acc = []
  cdfs = []
  eps = 1e-10 
  tau = torch.Tensor([0.5])
  for xx in range(batch_size):
    #ip_val = -1* op[xx]
    ip_val = op[xx]
    if(ip_val < 0):
      predval = (1-tau)*torch.exp(tau*ip_val)
      cdfs.append(predval.item())
      if(predval.item() >= 0.5):
        acc.append([1])
      else:
        acc.append([0])
    elif(ip_val >= 0):
      predval = 1-tau*torch.exp(-1*tau*op[xx])
      cdfs.append(predval.item())
      if(predval.item() >= 0.5):
        acc.append([1])
      else:
        acc.append([0])
  return torch.Tensor(acc).to(device).reshape(-1,1),torch.Tensor(cdfs).to(device).reshape(-1,1)

def acc_Q(train_preds,train_labels):
    train_preds = np.array(train_preds).reshape(-1,1)
    train_labels = np.array(train_labels).reshape(-1,1)
    cdfs_acc,_ = customTestPred(0,train_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),batch_size = train_preds.shape[0])
    count = 0
    for i,j in zip(cdfs_acc,train_labels):
      if(i.item() == j[0]):
        count += 1
    return count/train_labels.shape[0]

def acc_tests(test_preds,test_labels):
    test_preds = np.array(test_preds).reshape(-1,1)
    test_labels = np.array(test_labels).reshape(-1,1)
    cdfs_acc,_ = customTestPred(0,test_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),batch_size = test_preds.shape[0])
    count = 0
    for i,j in zip(cdfs_acc,test_labels):
      if(i.item() == j[0]):
        count += 1
    return count/test_labels.shape[0]

def regular_acc(test_preds, test_labels):
    test_preds = np.array(test_preds).reshape(-1,1)
    test_labels = np.array(test_labels).reshape(-1,1)
    count = 0
    for i,j in zip(test_preds,test_labels):
      if(i.item() < 0.5):
        if j[0] == 0:
          count +=1
      else:
        if j[0] == 1:
          count +=1
    return count/test_labels.shape[0]

def cdf(mean, all_qs,epsilon = 0,standard_deviation = 1 ,y_pred= 0):
  aa = ((1-all_qs) * (y_pred - mean))/standard_deviation
  aa.clamp_(max = 0)
  lesser_term = all_qs * torch.exp(aa)
  lesser_term = 1 - lesser_term
  bb = (-1.0 * all_qs * (y_pred - mean))/standard_deviation
  bb.clamp_(max = 0)
  greater_term = 1 - ((1-all_qs) * torch.exp(bb))
  greater_term = 1 - greater_term
  dummy_ones = torch.ones_like(mean)
  y_dummy_pred = torch.div(y_pred,dummy_ones)
  y_dummy_pred[y_pred >= mean] = 1.0
  y_dummy_pred[y_pred < mean] = 0.0
  return ((1 - y_dummy_pred) * lesser_term )+  (y_dummy_pred * greater_term)


In [25]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]


d_model = Delta_RNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            DROPOUT, 
            PAD_IDX)

torch.manual_seed(12)
target_acc = 0.90
d_model = d_model.to(device)
optimizer = torch.optim.Adam(d_model.parameters())

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(d_model):,} trainable parameters')
pretrained_embeddings = TEXT.vocab.vectors

print(pretrained_embeddings.shape)
d_model.embedding.weight.data.copy_(pretrained_embeddings)

UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

d_model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
d_model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(d_model.embedding.weight.data)

def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc


def train_delta(model, optimizer, iterator, epochs):
    train_preds_Q = []
    train_labels = []    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        text, text_lengths = batch.text
        labels = batch.label
        batch_size = labels.shape[0]
        op_qs = model(text, text_lengths.to('cpu'))
        lossQ = customLoss(labels.reshape(-1,1),op_qs,0,1,all_qs,penalty)
        
        lossQ.backward()
  
        optimizer.step()

        for lag in op_qs[:,4].detach().reshape(-1,1):
          train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
          train_labels.append(lag.item())

    acc_is_Q = acc_Q(train_preds_Q,train_labels)
    return acc_is_Q


def test_quantiles(model,loader,epochs):
  model.eval()
  test_preds_Q = []
  test_preds_p = []
  test_labels = []
  with torch.no_grad():
    for batch in loader:
      labels = batch.label
      text, text_lengths = batch.text
      op_qs = model(text, text_lengths.to('cpu'))
      for lag in op_qs[:,4].detach().reshape(-1,1):
        test_preds_Q.append(lag.item())
      for lag in labels.reshape(-1,1):
        test_labels.append(lag.item())
    acc_is_Q = acc_tests(test_preds_Q,test_labels)
  return acc_is_Q

The model has 4,814,961 trainable parameters
torch.Size([25002, 100])
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [-0.1632, -0.5877,  0.6399,  ..., -0.0234,  0.0447,  0.8404],
        [-0.2156,  1.0275,  0.0914,  ..., -0.3935,  0.8041, -0.6058],
        [ 0.3393,  2.1684,  0.0894,  ..., -0.3708, -0.9544, -0.4810]],
       device='cuda:0')


In [26]:
N_EPOCHS = 10
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_acc = train_delta(d_model, optimizer, train_iterator,epoch)
    test_acc = test_quantiles(d_model, test_iterator, epoch)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Acc: {train_acc*100:.2f}%')
    print(f'\tTest Acc: {test_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 1m 51s
	Train Acc: 58.55%
	Test Acc: 70.88%
Epoch: 02 | Epoch Time: 1m 51s
	Train Acc: 76.39%
	Test Acc: 71.72%
Epoch: 03 | Epoch Time: 1m 51s
	Train Acc: 85.84%
	Test Acc: 88.92%
Epoch: 04 | Epoch Time: 1m 51s
	Train Acc: 90.02%
	Test Acc: 89.26%
Epoch: 05 | Epoch Time: 1m 51s
	Train Acc: 91.55%
	Test Acc: 91.00%
Epoch: 06 | Epoch Time: 1m 51s
	Train Acc: 92.72%
	Test Acc: 90.74%
Epoch: 07 | Epoch Time: 1m 51s
	Train Acc: 93.69%
	Test Acc: 91.52%
Epoch: 08 | Epoch Time: 1m 51s
	Train Acc: 94.34%
	Test Acc: 91.86%
Epoch: 09 | Epoch Time: 1m 51s
	Train Acc: 95.08%
	Test Acc: 91.38%
Epoch: 10 | Epoch Time: 1m 51s
	Train Acc: 95.48%
	Test Acc: 90.14%


In [27]:
total_per_delta = [0,0,0,0,0,0]
wrong_per_delta = [0,0,0,0,0,0]
d_model.eval()
pflag = True

for batch in train_iterator:
        text, text_lengths = batch.text
        labels = batch.label
        batch_size = labels.shape[0]
        op_qs = d_model(text, text_lengths.to('cpu'))
        preds = [[] for i in range(9)]
        lbls = []
        for itemset in op_qs.detach():
          for quant in range(9):
            preds[quant].append(itemset[quant].item())
        if not pflag:
          for q in preds:
            print(q)
          print()
        for lag in labels.reshape(-1,1):
          lbls.append(lag.item())
        for i in range(batch_size):
          start = 4
          delta = 0
          while (delta<5 and not (preds[start-delta][i] < 0 < (preds[start+delta][i]))):
            delta +=1
          if not pflag:
            print(delta)
          total_per_delta[delta] +=1
          if (preds[4][i]<=0 and lbls[i]==1) or  (preds[4][i]>0 and lbls[i]==0):
            wrong_per_delta[delta] +=1
        pflag = True

print(total_per_delta)
print(wrong_per_delta)

for batch in test_iterator:
        text, text_lengths = batch.text
        labels = batch.label
        batch_size = labels.shape[0]
        op_qs = d_model(text, text_lengths.to('cpu'))
        preds = [[] for i in range(9)]
        lbls = []
        for itemset in op_qs.detach():
          for quant in range(9):
            preds[quant].append(itemset[quant].item())
          for lag in labels.reshape(-1,1):
            lbls.append(lag.item())
        for i in range(batch_size):
          start = 4
          delta = 0
          while (delta<5 and not (preds[start-delta][i] < 0 < (preds[start+delta][i]))):
            delta +=1
          total_per_delta[delta] +=1
          if (preds[4][i]<=0 and lbls[i]==1) or  (preds[4][i]>0 and lbls[i]==0):
            wrong_per_delta[delta] +=1

# print(total_per_delta)
# print(wrong_per_delta)

[0, 715, 827, 1100, 1959, 40399]
[0, 316, 258, 292, 334, 471]


In [29]:
print("Delta      |", end="")
for i in range(1,6):
  print("{:.2f}".format(i/10), end= ' | ')
print()
print("Misc. Rate |", end="")
for i in range(1,6):
  print("{:.2f}".format(wrong_per_delta[i]/total_per_delta[i]), end= ' | ')
print()
print("Ret. Rate  |", end="")
current_sum = 0
total = sum(total_per_delta)
for i in range(1,6):
  retained = total - current_sum
  print("{:.2f}".format(retained/total), end= ' | ')
  current_sum += total_per_delta[i]

Delta      |0.10 | 0.20 | 0.30 | 0.40 | 0.50 | 
Misc. Rate |0.45 | 0.33 | 0.28 | 0.18 | 0.02 | 
Ret. Rate  |1.00 | 0.98 | 0.96 | 0.94 | 0.89 | 