In [0]:
import torch

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
cd drive/My Drive

In [0]:
cd MSRP

# **IMPORTING LIBRARIES**

In [0]:
from data import Data
from sklearn.metrics import f1_score
import sklearn

In [0]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
import numpy as np

In [0]:
use_cuda = torch.cuda.is_available()

In [0]:
data_file = "./dataset/train.tsv"
data_test_file = "./dataset/test.tsv"
training_ratio = 0.9999999
max_len = 20
tracking_pair = False
hidden_size = 50
batch_size = 16
num_iters = 10
learning_rate = 0.001

# DATA

In [0]:
data = Data(data_file,data_test_file,training_ratio,max_len)

In [0]:
len(data.word2index)

In [0]:
print('Number of training samples        :', len(data.x_train))
print('Number of validation samples      :', len(data.x_val))
print('Maximum sequence length           :', max_len)

# Embeddings

In [0]:
embd_file = "./glove-global-vectors-for-word-representation/glove.6B.100d.txt"

In [0]:
from embedding_helper2 import Get_Embedding

In [0]:
embedding = Get_Embedding(embd_file, data.word2index)
embedding_size = embedding.embedding_matrix.shape[1]

In [0]:
embedding_size

In [0]:
len(embedding.embedding_matrix[7])

In [0]:
len(embedding.embedding_matrix)

In [0]:
import torch.nn as nn
from torch import Tensor
from torch import optim
import torch.nn.functional as F

In [0]:
def commonWords(sen_1, sen_2):
  d = np.empty(len(data.word2index), dtype=int)
  for i in range(len(d)):
    d[i] = -1
    
  listPairs = []
  list1 = []
  list2 = []
  for i in range(len(sen_1)):
    d[sen_1[i]] = i
    
  for i in range(len(sen_2)):
    if d[sen_2[i]] > 1 and sen_2[i] > 0 :
      list1.append(d[sen_2[i]])
      list2.append(i)
    
  list1 = list(dict.fromkeys(list1))
  list2 = list(dict.fromkeys(list2))
  
  listPairs.append(list1)
  listPairs.append(list2)
  return listPairs

In [0]:
def max_pool(e_list):
  e_list = np.array(e_list)
  
  for i in range(len(e_list)):
    e_list[i] = e_list[i].data.cpu().numpy()
  mp = []
  for i in range(100):
    m = e_list[0][i]
    for j in range(len(e_list)):
      m = max(m, e_list[j][i])
    mp.append(m)
      
  #print("Length of mp = " + str(len(mp)))
  return torch.cuda.FloatTensor(mp)

# GAN MODEL

In [0]:
def weights_init(m):
  classname = m.__class__.__name__
  if classname.find('Linear') != -1:
    nn.init.xavier_normal_(m.weight.data, 1.0, 0.02).cuda()
    nn.init.constant_(m.bias.data, 0).cuda()

In [0]:
learning_rate_G = 0.001
learning_rate_D = 0.001
learning_rate_F = 0.001

In [0]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.use_cuda = torch.cuda.is_available()
        self.main = nn.Sequential(
            nn.Linear(100, 100),
            nn.Tanh()
        )
    def forward(self, input):
        return self.main(input).cuda()

In [0]:
netG = Generator()
if use_cuda: netG = netG.cuda()
netG.apply(weights_init)
print(netG)

In [0]:
class Discriminator(nn.Module):
    def __init__(self):
      super(Discriminator, self).__init__()
      self.use_cuda = torch.cuda.is_available()
      self.main = nn.Sequential(
        nn.Linear(100, 2),
        nn.Softmax(dim = 1)
      ) 
    def forward(self, input):
      return self.main(input).cuda()

In [0]:
netD = Discriminator()
if use_cuda: netD = netD.cuda()
netD.apply(weights_init)
print(netD)

In [0]:
real_label = torch.tensor([0,1])
fake_label = torch.tensor([1,0])
optimizerD = optim.Adam(netD.parameters(), lr=learning_rate_G)
optimizerG = optim.Adam(netG.parameters(), lr=learning_rate_D)

In [0]:
class Dropout_layer(nn.Module):
  def __init__(self):
    super(Dropout_layer, self).__init__()
    self.d = nn.Dropout(p=0.1)
    
  def forward(self, input):
    return self.d(input).cuda()

In [0]:
dropout_layer = Dropout_layer()
if use_cuda: dropout_layer = dropout_layer.cuda()
print(dropout_layer)

In [0]:
class Final_layer(nn.Module):
    def __init__(self):
        super(Final_layer, self).__init__()
        self.use_cuda = torch.cuda.is_available()
        self.main = nn.Sequential(
            nn.Linear(101, 2),
            nn.Softmax(dim = 1)
        )
    def forward(self, input):
        return self.main(input).cuda()

In [0]:
net_final = Final_layer()
if use_cuda: net_final = net_final.cuda()
net_final.apply(weights_init)
print(net_final)

In [0]:
optimizer_final = optim.Adam(net_final.parameters(), lr = learning_rate_F)

# MALSTM MODEL

In [0]:
class Manhattan_LSTM(nn.Module):
    def __init__(self, hidden_size, embedding, train_embedding = False):
        super(Manhattan_LSTM, self).__init__()
        self.use_cuda = torch.cuda.is_available()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(embedding.shape[0], embedding.shape[1])
        self.embedding.weight = nn.Parameter(embedding)
        self.input_size = embedding.shape[1]
        
        self.embedding.weight.requires_grad = train_embedding
        
        self.lstm_1 = nn.LSTM(self.input_size, self.hidden_size, num_layers=1, bidirectional=True)
        self.lstm_2 = nn.LSTM(self.input_size, self.hidden_size, num_layers=1, bidirectional=True)
        
    def exponent_neg_manhattan_distance(self, x1, x2):
        return torch.exp(-torch.sum(torch.abs(x1 - x2), dim=1))
    
    def forward(self, input, hidden):
        
        #print(input[0])
        #print(input[1])
        
        ip0 = input[0].t()
        ip1 = input[1].t()
        
        commonList = []
        
        for i in range(batch_size):
            listPairs = commonWords(ip0[i], ip1[i])
            commonList.append(listPairs)
    
        commonList = np.array(commonList)
        
        #print(commonList)
        input_len = len(input[1])
        
        embedded_1 = self.embedding(input[0])
        embedded_2 = self.embedding(input[1])
        
        bs = embedded_1.size()[1]
        outputs_1, hidden_1 = self.lstm_1(embedded_1, hidden)
        outputs_2, hidden_2 = self.lstm_1(embedded_2, hidden)
        
        max_pool_1 = F.adaptive_avg_pool1d(outputs_1.permute(1,2,0),1).view(batch_size,-1)
        max_pool_2 = F.adaptive_avg_pool1d(outputs_2.permute(1,2,0),1).view(batch_size,-1)
        
        att_weights = torch.bmm(max_pool_1.view(batch_size, 1, 100), outputs_2.view(batch_size, 100, input_len)).view(batch_size, input_len)
        
        att_softmax = torch.zeros([batch_size, input_len])
        for i in range(batch_size):
          att_softmax[i] = F.softmax(att_weights[i], dim = 0)
        
        new_pool = torch.bmm(att_softmax.view(batch_size, 1, input_len), outputs_2.view(batch_size, input_len, 100).cpu()).view(batch_size, 100).cuda()
        
        ehs_1 = []
        for i in range(batch_size):
            e_list = []
            for j in range(len(commonList[i][0])):
                x = commonList[i][0][j]
              
                e_list.append(outputs_1[x][i])
            if len(e_list) > 0:
                mp1 = max_pool(e_list)
            else:
                mp1 = torch.zeros(100)
              
            ehs_1.append(mp1.cuda())
        
        
        ehs_2 = []
        for i in range(batch_size):
            e_list = []
            for j in range(len(commonList[i][1])):
                x = commonList[i][1][j]
              
                e_list.append(outputs_2[x][i])
            if len(e_list) > 0:
                mp2 = max_pool(e_list)
            else:
                mp2 = torch.zeros(100)
              
            ehs_2.append(mp2.cuda())
        
        '''ths_1 = torch.zeros(batch_size, 200)
        for i in range(batch_size):
            ths_1[i] = torch.cat((max_pool_1[i], ehs_1[i]),0)
          
        ths_2 = torch.zeros(batch_size, 200)
        for i in range(batch_size):
            ths_2[i] = torch.cat((max_pool_2[i], ehs_2[i]),0)'''
            
            
        elitehs_1 = torch.zeros(batch_size, 100)
        for i in range(batch_size):
            elitehs_1[i] = ehs_1[i]
          
        elitehs_2 = torch.zeros(batch_size, 100)
        for i in range(batch_size):
            elitehs_2[i] = ehs_2[i]
          
        #ths_1.cuda()
        #ths_2.cuda()
        elitehs_1.cuda()
        elitehs_2.cuda()
        #similarity_scores = self.exponent_neg_manhattan_distance(ths_1.cuda(), ths_2.cuda())
        similarity_scores = self.exponent_neg_manhattan_distance(max_pool_1, new_pool)
        
        return similarity_scores, elitehs_1, elitehs_2
    
    def init_weights(self):
        for name_1, param_1 in self.lstm_1.named_parameters():
            if 'bias' in name_1:
                nn.init.constant_(param_1, 0.0)
            elif 'weight' in name_1:
                nn.init.xavier_normal_(param_1)

        lstm_1 = self.lstm_1.state_dict()
        lstm_2 = self.lstm_2.state_dict()

        for name_1, param_1 in lstm_1.items():
            # Backwards compatibility for serialized parameters.
            if isinstance(param_1, torch.nn.Parameter):
                param_1 = param_1.data

            lstm_2[name_1].copy_(param_1)

    def init_hidden(self, batch_size):
        # Hidden dimensionality : 2 (h_0, c_0) x Num. Layers * Num. Directions x Batch Size x Hidden Size
        result = torch.zeros(2, 2, batch_size, self.hidden_size)
        result = tuple(result)

        if self.use_cuda: 
            result = (result[0].cuda(), result[1].cuda())
            return result
        else: return result

In [0]:
model = Manhattan_LSTM(hidden_size, embedding.embedding_matrix, train_embedding=False)
if use_cuda: model = model.cuda()
model.init_weights()

In [0]:
import time
import random
from torch import optim
import torch.nn.utils.rnn as rnn

In [0]:
x_train = data.x_train
x_val = data.x_val
y_train = data.y_train
y_val = data.y_val
x_test = data.x_test
y_test = data.y_test
train_samples = len(x_train)
val_samples = len(x_val)
test_samples = len(x_test)
test_samples

In [0]:
criterion = nn.BCELoss()
print_every = 1
print_loss_total = 0.0
train_loss = 0.0
val_loss = 0.0
max_acc = 0.73

In [0]:
model_trainable_parameters = tuple(filter(lambda p: p.requires_grad, model.parameters()))
model_optimizer = optim.Adam(model_trainable_parameters, lr=learning_rate)

In [0]:
hidden = model.init_hidden(batch_size)
len(hidden[0][0][0])

In [0]:
from helper import Helper
help_fn = Helper()

In [62]:
#run to load the base model
model.load_state_dict(torch.load("./model_weights_base.pt"))
model.eval()
model.train()
netG.load_state_dict(torch.load("./netG_weights_base.pt"))
netG.eval()
netG.train()
netD.load_state_dict(torch.load("./netD_weights_base.pt"))
netD.eval()
netD.train()
net_final.load_state_dict(torch.load("./netfinal_weights_base.pt"))
net_final.eval()
net_final.train()

Final_layer(
  (main): Sequential(
    (0): Linear(in_features=101, out_features=2, bias=True)
    (1): Softmax()
  )
)

In [0]:
start = time.time()
print('Beginning Model Training.\n')
batch_size = 16

for epoch in range(0, num_iters):
    model_loss = 0.0
    gen_loss = 0.0
    dis_loss = 0.0
    fin_loss = 0.0
    for i in range(0, train_samples, batch_size):
        input_variables = x_train[i:i+batch_size]
        similarity_scores = y_train[i:i+batch_size]
        
        sequences_1 = [sequence[0] for sequence in input_variables]
        sequences_2 = [sequence[1] for sequence in input_variables]
        batch_size = len(sequences_1)
        
        # Make a tensor for the similarity scores
        
        sim_scores_2d = torch.zeros([batch_size, 2])
        for j in range(batch_size):
          if similarity_scores[j] == 0:
            sim_scores_2d[j] = fake_label
          else:
            sim_scores_2d[j] = real_label
            
        sim_scores_2d = sim_scores_2d.cuda()

        temp = rnn.pad_sequence(sequences_1 + sequences_2)
        sequences_1 = temp[:, :batch_size]
        sequences_2 = temp[:, batch_size:]

        model_optimizer.zero_grad()
        loss_s = 0.0
        
        optimizerG.zero_grad()
        loss_g= 0.0
        
        optimizerD.zero_grad()
        loss_d = 0.0
        
        optimizer_final.zero_grad()
        loss_f = 0.0

        # Initialise the hidden state and pass through the maLSTM
        hidden = model.init_hidden(batch_size)
        output_scores, ehs1, ehs2 = model([sequences_1, sequences_2], hidden)
        
        output_scores = output_scores.view(-1)
        
        loss_s += criterion(output_scores, similarity_scores)
        
        ehs1 = ehs1.cuda()
        ehs2 = ehs2.cuda()
        
        
        # Generator
        gen_feature = netG(ehs2)
        
        # 1. Discriminator for the real class
        discrimm_classes = netD(ehs1)
        labels = torch.zeros(batch_size, 2)
        for j in range(batch_size):
          labels[j] = real_label
          
        labels = labels.cuda()
        
        loss_d += criterion(discrimm_classes, labels)
        
        
        # 2. Discriminator for the fake class
        discrimm_classes = netD(gen_feature)
        labels = torch.zeros(batch_size, 2)
        for j in range(batch_size):
          labels[j] = fake_label
          
        labels = labels.cuda()
          
        loss_d += criterion(discrimm_classes, labels)
        
        #print(discrimm_classes)
        
        # Update generator loss
        loss_g += criterion(discrimm_classes, sim_scores_2d)
        
        d_feature = dropout_layer(gen_feature)
        
        cat_feature = torch.zeros([batch_size, len(d_feature[0])+1])
        for j in range(batch_size):
          for k in range(100):
            cat_feature[j][k] = d_feature[j][k]
          cat_feature[j][100] = output_scores[j]
          
        
        cat_feature = cat_feature.cuda()
        
        final_labels = net_final(cat_feature)
        
        loss_f += criterion(final_labels, sim_scores_2d)
        
        com_loss = loss_d + (0.5*loss_g) + loss_f + loss_s
        
        com_loss.backward()
        
        model_optimizer.step()
        optimizerG.step()
        optimizerD.step()
        optimizer_final.step()
        
        
        fin_loss += loss_f
        model_loss += loss_s
        gen_loss += loss_g
        dis_loss += loss_d
    
        train_loss = com_loss
        print_loss_total += com_loss
        
        
    '''if epoch % 5:
        learning_rate *= 0.5
        model_optimizer = optim.Adam(model_trainable_parameters, lr=learning_rate)
        optimizer_final = optim.Adam(net_final.parameters(), lr = learning_rate)
        optimizerD = optim.Adam(netD.parameters(), lr=learning_rate)
        optimizerG = optim.Adam(netG.parameters(), lr=learning_rate)
        '''
    
    a_scores = []
    p_scores = []
    corr = 0
    for i in range(0, test_samples, batch_size):
        input_variables = x_test[i:i+batch_size]
        actual_scores = y_test[i:i+batch_size]

        sequences_1 = [sequence[0] for sequence in input_variables]
        sequences_2 = [sequence[1] for sequence in input_variables]
        batch_size = len(sequences_1)
        
        sim_scores_2d = torch.zeros([batch_size, 2])
        for j in range(batch_size):
          if similarity_scores[j] == 0:
            sim_scores_2d[j] = fake_label
          else:
            sim_scores_2d[j] = real_label
            
        sim_scores_2d = sim_scores_2d.cuda()

        temp = rnn.pad_sequence(sequences_1 + sequences_2)
        sequences_1 = temp[:, :batch_size]
        sequences_2 = temp[:, batch_size:]

        loss = 0.0
        hidden = model.init_hidden(batch_size)
        output_scores, ehs1, ehs2 = model([sequences_1, sequences_2], hidden)
        
        output_scores = output_scores.view(-1)
        
        ehs2 = ehs2.cuda() 
        gen_feature = netG(ehs2)
        
        d_feature = dropout_layer(gen_feature)
        
        cat_feature = torch.zeros([batch_size, len(d_feature[0])+1])
        for j in range(batch_size):
          for k in range(100):
            cat_feature[j][k] = d_feature[j][k]
          cat_feature[j][100] = output_scores[j]
          
        cat_feature = cat_feature.cuda()
        
        final_labels = net_final(cat_feature)
        
        
        for j in range(0, batch_size):
          acts = actual_scores[j].data.cpu().numpy()
          preds = final_labels[j].data.cpu().numpy()
          a_scores.append(acts)

          if preds[0] >= 0.5 and acts == 0:
            corr = corr+1
            p_scores.append(0)
          elif preds[1] >= 0.5 and acts == 1:
            corr = corr+1
            p_scores.append(1)
          elif preds[0] >=0.5:
            p_scores.append(0)
          else:
            p_scores.append(1)
          
    
    if epoch % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print('%s (%d) %.4f' % (help_fn.time_slice(start, (epoch+1) / num_iters), epoch, print_loss_avg))
        print("LSTM loss    " + str(model_loss.data.cpu().numpy()) + "    Gen loss    " + str(gen_loss.data.cpu().numpy()) + "    Dis loss    " + str(dis_loss.data.cpu().numpy()) + "    Fin loss    " + str(fin_loss.data.cpu().numpy()))
        print(" Test Accuracy    " + str(corr/len(a_scores)) + "    f1 score    " + str(f1_score(p_scores, a_scores)))
        
        acc = corr/len(a_scores)
        
        if acc > max_acc :
          max_acc = acc
          torch.save(model.state_dict(), "./model_weights.pt")
          torch.save(netG.state_dict(), "./netG_weights.pt" )
          torch.save(netD.state_dict(), "./netD_weights.pt")
          torch.save(net_final.state_dict(), "./netfinal_weights.pt")
          print("Model Saved!")

In [0]:
torch.save(model.state_dict(), "./model_weights_base.pt")
torch.save(netG.state_dict(), "./netG_weights_base.pt")
torch.save(netD.state_dict(), "./netD_weights_base.pt")
torch.save(net_final.state_dict(), "./netfinal_weights_base.pt")
print("Model Saved!")

Model Saved!


In [0]:
print(max_acc)

0.73


In [0]:
#run to load the best model
model.load_state_dict(torch.load("./model_weights.pt"))
model.eval()
model.train()
netG.load_state_dict(torch.load("./netG_weights.pt"))
netG.eval()
netG.train()
netD.load_state_dict(torch.load("./netD_weights.pt"))
netD.eval()
netD.train()
net_final.load_state_dict(torch.load("./netfinal_weights.pt"))
net_final.eval()
net_final.train()

Final_layer(
  (main): Sequential(
    (0): Linear(in_features=101, out_features=2, bias=True)
    (1): Softmax()
  )
)

In [0]:
learning_rate = 0.0003
#model_optimizer = optim.Adam(model_trainable_parameters, lr=learning_rate)
optimizer_final = optim.Adam(net_final.parameters(), lr = learning_rate)
optimizerD = optim.Adam(netD.parameters(), lr=learning_rate)
optimizerG = optim.Adam(netG.parameters(), lr=learning_rate)

In [0]:
"Only generator-discriminator training"
start = time.time()
print('Beginning Model Training.\n')
batch_size = 16

for epoch in range(0, num_iters):
    model_loss = 0.0
    gen_loss = 0.0
    dis_loss = 0.0
    fin_loss = 0.0
    for i in range(0, train_samples, batch_size):
        input_variables = x_train[i:i+batch_size]
        similarity_scores = y_train[i:i+batch_size]
        
        sequences_1 = [sequence[0] for sequence in input_variables]
        sequences_2 = [sequence[1] for sequence in input_variables]
        batch_size = len(sequences_1)
        
        # Make a tensor for the similarity scores
        
        sim_scores_2d = torch.zeros([batch_size, 2])
        for j in range(batch_size):
          if similarity_scores[j] == 0:
            sim_scores_2d[j] = fake_label
          else:
            sim_scores_2d[j] = real_label
            
        sim_scores_2d = sim_scores_2d.cuda()

        temp = rnn.pad_sequence(sequences_1 + sequences_2)
        sequences_1 = temp[:, :batch_size]
        sequences_2 = temp[:, batch_size:]

        if model_optimizer: model_optimizer.zero_grad()
        loss_s = 0.0
        
        if optimizerG: optimizerG.zero_grad()
        loss_g= 0.0
        
        if optimizerD: optimizerD.zero_grad()
        loss_d = 0.0
        
        if optimizer_final: optimizer_final.zero_grad()
        loss_f = 0.0

        # Initialise the hidden state and pass through the maLSTM
        hidden = model.init_hidden(batch_size)
        output_scores, ehs1, ehs2 = model([sequences_1, sequences_2], hidden)
        
        output_scores = output_scores.view(-1)
        
        loss_s += criterion(output_scores, similarity_scores)
        
        ehs1 = ehs1.cuda()
        ehs2 = ehs2.cuda()
        
        
        # Generator
        gen_feature = netG(ehs2)
        
        # 1. Discriminator for the real class
        discrimm_classes = netD(ehs1)
        labels = torch.zeros(batch_size, 2)
        for j in range(batch_size):
          labels[j] = real_label
          
        labels = labels.cuda()
        
        loss_d += criterion(discrimm_classes, labels)
        
        
        # 2. Discriminator for the fake class
        discrimm_classes = netD(gen_feature)
        labels = torch.zeros(batch_size, 2)
        for j in range(batch_size):
          labels[j] = fake_label
          
        labels = labels.cuda()
          
        loss_d += criterion(discrimm_classes, labels)
        
        #print(discrimm_classes)
        
        # Update generator loss
        loss_g += criterion(discrimm_classes, sim_scores_2d)
        
        d_feature = dropout_layer(gen_feature)
        
        cat_feature = torch.zeros([batch_size, len(d_feature[0])+1])
        for j in range(batch_size):
          for k in range(100):
            cat_feature[j][k] = d_feature[j][k]
          cat_feature[j][100] = output_scores[j]
          
        
        cat_feature = cat_feature.cuda()
        
        final_labels = net_final(cat_feature)
        
        loss_f += criterion(final_labels, sim_scores_2d)
        
        com_loss = loss_d + (0.5*loss_g) + loss_f
        
        com_loss.backward()
        #model_optimizer.step()
        optimizerG.step()
        optimizerD.step()
        optimizer_final.step()
        
        
        fin_loss += loss_f
        model_loss += loss_s
        gen_loss += loss_g
        dis_loss += loss_d
    
        train_loss = com_loss
        print_loss_total += com_loss
        
        
    '''if epoch % 5:
        learning_rate *= 0.5
        model_optimizer = optim.Adam(model_trainable_parameters, lr=learning_rate)
        optimizer_final = optim.Adam(net_final.parameters(), lr = learning_rate)
        optimizerD = optim.Adam(netD.parameters(), lr=learning_rate)
        optimizerG = optim.Adam(netG.parameters(), lr=learning_rate)
        '''
    
    a_scores = []
    p_scores = []
    corr = 0
    for i in range(0, test_samples, batch_size):
        input_variables = x_test[i:i+batch_size]
        actual_scores = y_test[i:i+batch_size]

        sequences_1 = [sequence[0] for sequence in input_variables]
        sequences_2 = [sequence[1] for sequence in input_variables]
        batch_size = len(sequences_1)
        
        sim_scores_2d = torch.zeros([batch_size, 2])
        for j in range(batch_size):
          if similarity_scores[j] == 0:
            sim_scores_2d[j] = fake_label
          else:
            sim_scores_2d[j] = real_label
            
        sim_scores_2d = sim_scores_2d.cuda()

        temp = rnn.pad_sequence(sequences_1 + sequences_2)
        sequences_1 = temp[:, :batch_size]
        sequences_2 = temp[:, batch_size:]

        loss = 0.0
        hidden = model.init_hidden(batch_size)
        output_scores, ehs1, ehs2 = model([sequences_1, sequences_2], hidden)
        
        output_scores = output_scores.view(-1)
        
        ehs2 = ehs2.cuda() 
        gen_feature = netG(ehs2)
        
        d_feature = dropout_layer(gen_feature)
        
        cat_feature = torch.zeros([batch_size, len(d_feature[0])+1])
        for j in range(batch_size):
          for k in range(100):
            cat_feature[j][k] = d_feature[j][k]
          cat_feature[j][100] = output_scores[j]
          
        cat_feature = cat_feature.cuda()
        
        final_labels = net_final(cat_feature)
        
        
        for j in range(0, batch_size):
          acts = actual_scores[j].data.cpu().numpy()
          preds = final_labels[j].data.cpu().numpy()
          a_scores.append(acts)

          if preds[0] >= 0.5 and acts == 0:
            corr = corr+1
            p_scores.append(0)
          elif preds[1] >= 0.5 and acts == 1:
            corr = corr+1
            p_scores.append(1)
          elif preds[0] >=0.5:
            p_scores.append(0)
          else:
            p_scores.append(1)
          
    
    if epoch % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print('%s (%d) %.4f' % (help_fn.time_slice(start, (epoch+1) / num_iters), epoch, print_loss_avg))
        print("LSTM loss    " + str(model_loss.data.cpu().numpy()) + "    Gen loss    " + str(gen_loss.data.cpu().numpy()) + "    Dis loss    " + str(dis_loss.data.cpu().numpy()) + "    Fin loss    " + str(fin_loss.data.cpu().numpy()))
        print(" Test Accuracy    " + str(corr/len(a_scores)) + "    f1 score    " + str(f1_score(p_scores, a_scores)))
        
        acc = corr/len(a_scores)
        
        if acc > max_acc :
          max_acc = acc
          torch.save(model.state_dict(), "./model_weights.pt")
          torch.save(netG.state_dict(), "./netG_weights.pt")
          torch.save(netD.state_dict(), "./netD_weights.pt")
          torch.save(net_final.state_dict(), "./netfinal_weights.pt")
          print("Model Saved!")

Beginning Model Training.

3m 16s (- 29m 25s) (0) 1423.5413
LSTM loss    109.96675    Gen loss    272.357    Dis loss    67.64587    Fin loss    108.48346
 Test Accuracy    0.7275362318840579    f1 score    0.8183925811437404
6m 19s (- 25m 16s) (1) 550.9258
LSTM loss    193.8971    Gen loss    483.91293    Dis loss    120.182976    Fin loss    188.78645
 Test Accuracy    0.7194202898550724    f1 score    0.8062449959967974
9m 5s (- 21m 11s) (2) 1633.8693
LSTM loss    572.37994    Gen loss    1454.5221    Dis loss    360.27142    Fin loss    546.337
 Test Accuracy    0.7089855072463768    f1 score    0.7744833782569632
