# Logical Reasoning using Transformers with Dependency Parsed Input



In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: ignored

Setting up GPU

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)



Thu Dec  9 23:49:31 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    32W / 250W |   4897MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

Load Data

In [None]:
import os
import json
#import torch
from tqdm import tqdm
import numpy as np
import pandas as pd

drive_path = '/content/drive/MyDrive/FinalProject/'

# inputs is question and context and concatenated answer, one output as confidence

def load_data(path, test):
    dict = {
        'prompt': [],
        'label': []
    }
    input = pd.DataFrame(dict)
    f = open(path)
    data = json.load(f)
    iterator = 0
    for d in data['context']:
      prompt = '[CLS]' + data['context'][str(iterator)] + '[SEP]' + data['question'][str(iterator)] + '[DEP]' + data['dep_context'][str(iterator)] + '[SEP]'
      for a in range(4):
        answer = data['answers'][str(iterator)][a] + '[DEP]' + data['dep_answers'][str(iterator)][a]
        concatenated_dp = prompt + answer
        label = 0
        if not test and a == data['label'][str(iterator)]:
          label = [1]
        else:
          label = [0]
        input.loc[len(input.index)] = [concatenated_dp, label]
      iterator += 1
    return input
train_data = load_data(drive_path + 'reclor_data_with_dependencies/train.json', False)
val_data = load_data(drive_path + 'reclor_data_with_dependencies/val.json', False)
test_data = load_data(drive_path + 'reclor_data_with_dependencies/test.json', True)

  return array(a, dtype, copy=False, order=order)


In [None]:
!pip install transformers



Initialize tokenizer

In [None]:
from transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', 
                                             unk_token='[UNK]', 
                                             sep_token='[SEP]', 
                                             pad_token='[PAD]',
                                             cls_token='[CLS]',
                                             bos_token='[CLS]')

special_tokens_dict = {'additional_special_tokens': ['[DEP]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Tokenize the data

In [None]:
import torch
from tqdm import tqdm
from torch.utils.data import TensorDataset, Dataset, DataLoader, RandomSampler, SequentialSampler

max_len = 512
batch_size = 1

def preprocess(input, tokenizer, max_len, batch_size, data_class='train'):
    encoded_input = tokenizer(input['prompt'].values.tolist(), padding=True, max_length = max_len, truncation = True, return_tensors="pt")
    
    input_ids = encoded_input['input_ids']
    attention_mask = encoded_input['attention_mask']
    
    if data_class != 'test':
        labels = torch.tensor(input['label'].values.tolist())
    dataset_tensor = TensorDataset(input_ids, attention_mask, labels)
    
    if data_class == "train":
        sampler = RandomSampler(dataset_tensor)
    else:
        sampler = SequentialSampler(dataset_tensor)
    dataloader = DataLoader(dataset_tensor, sampler=sampler, batch_size=batch_size)
    
    return dataloader

train_dataloader = preprocess(train_data, tokenizer, max_len, batch_size, data_class="train")
val_dataloader = preprocess(val_data, tokenizer, max_len, batch_size, data_class="val")
#test_dataloader = preprocess(test_data, tokenizer, max_len, batch_size, data_class="test")

Train and Eval

In [None]:
from transformers import RobertaConfig, RobertaForSequenceClassification, AdamW


def Eval(model, dataloader):
    model.eval()
    predictions, true_labels = [], []
    num_correct = 0

    for step, batch in enumerate(tqdm(dataloader)):
        input_ids, attention_mask, labels = batch[0], batch[1], batch[2]
        outputs = model(input_ids.cuda(), attention_mask = attention_mask.cuda(), labels=labels.cuda())

        logits = outputs.logits
        label_ids = labels.numpy()

        temp = torch.nn.functional.softmax(logits, dim=1).topk(1, dim = 1).indices.flatten().tolist()

        predictions.extend(temp)
        true_labels.extend(labels.tolist())

        torch.cuda.empty_cache()
        gc.collect()
    
    for i in range(len(predictions)):
        if predictions[i] == true_labels[i]:
            num_correct += 1
    
    print("\nAccuracy: %s" % (float(num_correct) / float(len(true_labels))))

def Train(model, train_data, lr, n_epoch, dev_data):
    print("Start Training!")
    optimizer = AdamW(model.parameters(), lr=lr)

    # TRAIN loop
    for epoch in range(n_epoch):  

        print(f"\nEpoch {epoch}")
      
        model.train()
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0

        for step, batch in enumerate(tqdm(train_data)):
            #TODO: Implement BERT fine-tuning.
            optimizer.zero_grad()

            #print(batch[0])
            input_ids = batch[0].cuda()
            attention_mask = batch[1].cuda()
            labels = batch[2].cuda()

            outputs = model(input_ids, attention_mask = attention_mask, labels = labels)
            outputs.loss.backward()
            optimizer.step()
            tr_loss += float(outputs.loss)

            torch.cuda.empty_cache()
            gc.collect()
            nb_tr_steps += 1

    # print train loss per epoch
    print("Train loss on epoch {}: {}\n".format(epoch, tr_loss / nb_tr_steps))

    # Dev set evaluation
    print("Evaluate on the dev set:")
    Eval(model, dev_data)

import gc

config = RobertaConfig.from_pretrained('roberta-base')
print(config)
model = RobertaForSequenceClassification.from_pretrained('roberta-base', config=config)
model.resize_token_embeddings(len(tokenizer))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

learning_rate = 2e-5
num_epoch = 1

torch.no_grad()

torch.cuda.empty_cache()
gc.collect()

if n_gpu > 1:
    model.to(device)
    model = torch.nn.DataParallel(model)
else:
    model.cuda()

Train(model, train_dataloader, learning_rate, num_epoch, val_dataloader)

RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Start Training!

Epoch 0


100%|██████████| 18552/18552 [1:21:58<00:00,  3.77it/s]


Train loss on epoch 0: 0.5700470470735396

Evaluate on the dev set:


100%|██████████| 2000/2000 [05:14<00:00,  6.35it/s]


Accuracy: 0.0





In [None]:
print("Evaluate on the dev set in test taking form:")
def EvalTest(model, dataloader):
    model.eval()
    predictions, true_labels = [], []
    num_correct = 0
    
    for step, batch in enumerate(tqdm(dataloader)):
        input_ids, attention_mask, labels = batch[0], batch[1], batch[2]
        outputs = model(input_ids.cuda(), attention_mask = attention_mask.cuda(), labels=labels.cuda())

        logits = outputs.logits
        label_ids = labels.numpy()

        temp = torch.nn.functional.softmax(logits, dim=1).topk(1, dim = 1).indices.flatten().tolist()

        predictions.extend(temp)
        true_labels.extend(labels.tolist())

        torch.cuda.empty_cache()
        gc.collect()
    
    for i in range(len(predictions)):
        if predictions[i] == true_labels[i]:
            num_correct += 1
    
    print("\nAccuracy: %s" % (float(num_correct) / float(len(true_labels))))
EvalTest(model, val_dataloader)

Evaluate on the dev set:


100%|██████████| 500/500 [01:22<00:00,  6.07it/s]


Accuracy: 0.0





In [None]:
next(iter(val_dataloader))[1]

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1

In [None]:
for t in iter(train_dataloader):
  print(t[1].shape)