This first block imports all the required dependencies and loads our models pretrained weights

In [1]:
!pip install omegaconf
!pip install hydra-core

import torch
import json
import omegaconf
import random
import tqdm

roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
roberta.register_classification_head('entailment', num_classes=2)
roberta.cuda()



Using cache found in /root/.cache/torch/hub/pytorch_fairseq_master


RobertaHubInterface(
  (model): RobertaModel(
    (encoder): RobertaEncoder(
      (sentence_encoder): TransformerSentenceEncoder(
        (dropout_module): FairseqDropout()
        (embed_tokens): Embedding(50265, 1024, padding_idx=1)
        (embed_positions): LearnedPositionalEmbedding(514, 1024, padding_idx=1)
        (layers): ModuleList(
          (0): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwi

This is our function for loading in and preprocessing our data

In [2]:
def createDataset(path):
    with open(path) as f:
        data = json.load(f)
        conversations = []
        hypothesis = []
        entailment = []
        for example in data:
            entailment.append(int(example["entailment"]))
            convo = []
            for dialog in example["items"][0]["items"]:
                if(dialog['speaker'] == "B"):
                    convo += ("SpeakerB says ", dialog['text'])
                if(dialog['speaker'] == "A"):
                    convo += ("SpeakerA says ", dialog['text'])
            convo = ' '.join(map(str, convo)) 
            conversations.append(convo)
            hypothesis.append(str(example["items"][1]["text"]))
            
    tokens = []
    maxX = 0 
    
    for i in range(len(conversations)):
        tokens.append(roberta.encode(hypothesis[i], conversations[i]))
        if(len(tokens[i]) > maxX):
            maxX = len(tokens[i])
            
    x = torch.ones((len(tokens),maxX),dtype=torch.long)
    for i in range(len(conversations)):
        x[i,:len(tokens[i])] = torch.tensor(tokens[i])
                
    y = torch.tensor(entailment)
    return x, y

This is our training loop, Which trains on all the data and shows the accuracy for the first 100 shuffled datapoints 

In [3]:
x,y = createDataset("dev_set.json")


shuffleIDX=torch.randperm(x.shape[0])

x = x[shuffleIDX]
y = y[shuffleIDX]

x_val = x[:100].cuda()
y_val = y[:100].cuda()

x_train = x[100:].cuda()
y_train = y[100:].cuda()

x_train = torch.chunk(x_train, 70, dim=0)
y_train = torch.chunk(y_train, 70, dim=0)

loss_function = torch.nn.CrossEntropyLoss()
params = [v for n,v in roberta.named_parameters() if '.entailment.' in n]
optimizer = torch.optim.AdamW(params,lr=1e-5)

epochs =  5

for j in range(1,epochs+1):
    roberta.eval()
    correct = 0
    for i in range(x_val.shape[0]):
      if (y_val[i] == torch.argmax(roberta.predict('entailment', x_val[i]),dim=1)):
        correct += 1
    print("Epoch ",str(j),"/",str(epochs) + " //-----// accuracy :", str(correct/x_val.shape[0]))
    roberta.train()

    for x_batch,y_batch in zip(x_train,y_train):

      # Forward pass
      roberta.zero_grad()
      scores = roberta.predict('entailment',x_batch)
      loss = loss_function(scores, y_batch)
      
      # Backward pass
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      #print('\tLoss:%s ' % (str(loss.item())))

# Your code ends here
print("roBERTa Trained")



Epoch  1 / 5 //-----// accuracy : 0.65
Epoch  2 / 5 //-----// accuracy : 0.7
Epoch  3 / 5 //-----// accuracy : 0.7
Epoch  4 / 5 //-----// accuracy : 0.7
Epoch  5 / 5 //-----// accuracy : 0.71
roBERTa Trained


This cell will output the accuracy of 

In [4]:
x = x.cuda()
y = y.cuda()
roberta.eval()
correct = 0
for i in range(x.shape[0]):
  if (y[i] == torch.argmax(roberta.predict('entailment', x[i]),dim=1)):
    correct += 1
print("Accuracy :", str(correct/x.shape[0]))

Accuracy : 0.675


In [5]:
def TestModel(path,model):
  ids = []
  with open(path) as f:
      data = json.load(f)
      conversations = []
      hypothesis = []
      entailment = []
      for example in data:
          ids.append(example["id"])
          convo = []
          for dialog in example["items"][0]["items"]:
              if(dialog['speaker'] == "B"):
                  convo += ("SpeakerB says ", dialog['text'])
              if(dialog['speaker'] == "A"):
                  convo += ("SpeakerA says ", dialog['text'])
          convo = ' '.join(map(str, convo)) 
          conversations.append(convo)
          hypothesis.append(str(example["items"][1]["text"]))

  tokens = []

  maxX = 0
  for i in range(len(conversations)):
      tokens.append(roberta.encode(hypothesis[i], conversations[i]))
      if(len(tokens[i]) > maxX):
          maxX = len(tokens[i])
          
  x = torch.ones((len(tokens),maxX),dtype=torch.long)
  for i in range(len(conversations)):
      x[i,:len(tokens[i])] = torch.tensor(tokens[i])
                  
  x = x.cuda()

  model.eval()
  predictions = []
  for i in range(x.shape[0]):
    temp = {}
    temp["id"] = ids[i]    
    temp["pred"] = torch.argmax(roberta.predict('entailment', x[i]),dim=1).item()    
    predictions.append(temp)
  with open("ConvEnt_24_preds.json", "w") as outfile:  
      json.dump(predictions, outfile)
  print("Testing Done")

In [6]:
##PATH_TO_TEST_SET is the path to the file containing testing dataset will output file named "ConvEnt_24_preds.json"
##Sometimes it takes a few minutes for the output file to show up

PATH_TO_TEST_SET = "/content/dev_set.json"

TestModel(PATH_TO_TEST_SET,roberta)



Testing Done
