<a href="https://colab.research.google.com/github/dhruvdcoder/HyperA/blob/master/Baseline/baseline_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
from torchtext import data,datasets

In [0]:
device = torch.device("cuda:0")

In [0]:
inputs = data.Field(tokenize='spacy')
answers = data.Field(sequential=False)

In [0]:
train, dev, test = datasets.MultiNLI.splits(inputs, answers)

In [0]:
inputs.build_vocab(train, dev, test)
inputs.vocab.load_vectors('glove.6B.100d')
answers.build_vocab(train)

train_iter, dev_iter, test_iter = data.BucketIterator.splits((train, dev, test), batch_size=128, device=device)

.vector_cache/glove.6B.zip: 862MB [01:32, 13.1MB/s]                           
 99%|█████████▉| 397614/400000 [00:15<00:00, 26161.32it/s]

In [0]:
import time
import torch.optim as optim

def train(model):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

  iterations = 0
  start = time.time()
  best_dev_acc = -1
  header = '  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy'
  dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}'.split(','))
  log_template =     ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'.split(','))
  print(header)

  for epoch in range(50):
    train_iter.init_epoch()
    n_correct, n_total = 0, 0
    for batch_idx, batch in enumerate(train_iter):

        # switch model to training mode, clear gradient accumulators
        model.train(); optimizer.zero_grad()

        iterations += 1

        # forward pass
        answer = model(batch)

        # calculate accuracy of predictions in the current batch
        n_correct += (torch.max(answer, 1)[1].view(batch.label.size()) == batch.label).sum().item()
        n_total += batch.batch_size
        train_acc = 100. * n_correct/n_total

        # calculate loss of the network output with respect to training labels
        loss = criterion(answer, batch.label)

        # backpropagate and update optimizer learning rate
        loss.backward(); optimizer.step()

        if iterations % 1000 == 0:

          # print progress message
          print(log_template.format(time.time()-start, epoch, iterations, 1+batch_idx, len(train_iter), 
                                    100. * (1+batch_idx) / len(train_iter), loss.item(), ' '*8, n_correct/n_total*100, ' '*12))

    # switch model to evaluation mode
    model.eval(); dev_iter.init_epoch()

    # calculate accuracy on validation set
    n_dev_correct, dev_loss = 0, 0
    with torch.no_grad():
      for dev_batch_idx, dev_batch in enumerate(dev_iter):
        answer = model(dev_batch)
        n_dev_correct += (torch.max(answer, 1)[1].view(dev_batch.label.size()) == dev_batch.label).sum().item()
        dev_loss = criterion(answer, dev_batch.label)
    dev_acc = 100. * n_dev_correct / len(dev)

    print(dev_log_template.format(time.time()-start,
        epoch, iterations, 1+batch_idx, len(train_iter),
        100. * (1+batch_idx) / len(train_iter), loss.item(), dev_loss.item(), train_acc, dev_acc))

In [0]:
import torch.nn as nn

class Encoder(nn.Module):
  def __init__(self,config):
    super(Encoder, self).__init__()
    self.config = config
    input_size = config['d_embed']
#     self.rnn = nn.RNN(input_size=input_size, hidden_size=config['d_hidden'],num_layers=config['n_layers'])
    self.rnn = nn.LSTM(input_size=input_size, hidden_size=config['d_hidden'],num_layers=config['n_layers'])
  
  def forward(self,inputs):
    #bsz = inputs.size()[1] 
    #h0 = c0 = inputs.new_zeros(config['n_layers'], bsz, config['d_hidden'])
#     outputs, hidden = self.rnn(inputs)
    outputs, (hidden,cell) = self.rnn(inputs)
    return outputs[-1]

class MultiNLIClassifier(nn.Module):
  def __init__(self,config):
    super(MultiNLIClassifier, self).__init__()
    self.config = config
    self.embed = nn.Embedding(config['n_embed'], config['d_embed'])
    self.encoder = Encoder(config)
#     self.relu = nn.ReLU()
    self.out = nn.Linear(2*config['d_hidden'], config['d_out'])
  
  def forward(self,batch):
    pre_emb = self.embed(batch.premise)
    hyp_emb = self.embed(batch.hypothesis)
    if self.config['freeze_emb']:
      pre_emb =pre_emb.detach()
      hyp_emb =hyp_emb.detach()
    prem = self.encoder(pre_emb)
    hypo = self.encoder(hyp_emb)
    logits = self.out(torch.cat([prem,hypo],1))

    return logits

In [0]:
import torch.nn as nn

class Encoder(nn.Module):
  def __init__(self,config):
    super(Encoder, self).__init__()
    self.config = config
    #self.rnn = nn.RNN(input_size=config['d_embed'], hidden_size=config['d_hidden'],num_layers=config['n_layers'])
    self.rnn = nn.LSTM(input_size=config['d_embed'], hidden_size=config['d_hidden'],num_layers=config['n_layers'])
  
  def forward(self,inputs):
    #bsz = inputs.size()[1]
    #outputs, hidden = self.rnn(inputs)
    outputs, (hidden,cell) = self.rnn(inputs)
    return outputs
  
class MultiNLIClassifier(nn.Module):
  def __init__(self,config):
    super(MultiNLIClassifier, self).__init__()
    self.config = config
    self.embed = nn.Embedding(config['n_embed'], config['d_embed'])
    self.encoder = Encoder(config)
    
#     self.tanh = nn.Tanh()
#     self.proj_p = nn.Linear(config['d_hidden'], config['d_hidden'])
#     self.proj_h = nn.Linear(config['d_hidden'], config['d_hidden'])
#     self.W = nn.Parameter(torch.randn(config['d_hidden'], 1))
#     self.register_parameter('W', self.W)
#     self.Wp = nn.Linear(config['d_hidden'], config['d_hidden'])
#     self.Wh = nn.Linear(config['d_hidden'], config['d_hidden'])

    
    
    self.out = nn.Linear(2*config['d_hidden'], config['d_out'])
  
  def forward(self,batch):
#     print(batch.premise)
    pre_emb = self.embed(batch.premise)
    hyp_emb = self.embed(batch.hypothesis)
    if self.config['freeze_emb']:
      pre_emb =pre_emb.detach()
      hyp_emb =hyp_emb.detach()
    prem = self.encoder(pre_emb).transpose(0,1)
    hypo = self.encoder(hyp_emb)[-1].unsqueeze(2)
    
#     M = self.tanh(self.proj_p(prem)+self.proj_h(hypo[None,:,:]))
#     alpha = nn.functional.softmax(torch.bmm(M, self.W.unsqueeze(0).expand(prem.size(0), *self.W.size())).squeeze(-1))
#     r = torch.bmm(prem.permute(1,2,0),alpha.transpose(0,1).unsqueeze(2)).squeeze(2)
#     h = self.tanh(self.Wp(r)+self.Wh(hypo))
#     logits = self.out(h)
    
    #Attention
    M = torch.bmm(prem,hypo)
    alpha = nn.functional.softmax(M,1)
    r = torch.bmm(prem.transpose(1,2),alpha)
    logits = self.out(torch.cat([r.squeeze(2),hypo.squeeze(2)],1))
    
    return logits

In [0]:
config = {}
config['n_embed'] = len(inputs.vocab)
config['d_embed'] = 100
config['d_hidden'] = 300
config['d_out'] = len(answers.vocab)
config['n_layers'] = 2
config['freeze_emb'] = 0

model = MultiNLIClassifier(config)
print(model)
model.embed.weight.data.copy_(inputs.vocab.vectors)
model.to(device)
train(model)

MultiNLIClassifier(
  (embed): Embedding(93537, 100)
  (encoder): Encoder(
    (rnn): LSTM(100, 300, num_layers=2)
  )
  (out): Linear(in_features=600, out_features=4, bias=True)
)
  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy
    65     0      1000  1000/3068       33% 1.079349               34.4898             
   130     0      2000  2000/3068       65% 0.920456               41.7711             
   195     0      3000  3000/3068       98% 0.935929               46.1693             
   200     0      3068  3068/3068      100% 0.910801 0.993797      46.4041      49.4142
   261     1      4000   932/3068       30% 0.932720               58.5418             
   326     1      5000  1932/3068       63% 0.837321               58.7583             
   391     1      6000  2932/3068       96% 0.896943               58.8538             
   401     1      6136  3068/3068      100% 0.794908 0.948965      58.8749      53.3164
   458     2      7000   8