## **BOOLEAN QUESTION ANSWERING WITH BERT**


In [None]:
import json
import numpy as np 

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

# ! pip install datasets transformers # uncomment to install if needed
import transformers
from datasets import Dataset, load_metric

### **DEFINE MODEL AND HYPERPARAMETERS**


In [None]:
class BoolQA(nn.Module):

    def __init__(self, bert_model, freeze_bert=False):
        super(BoolQA, self).__init__()
        self.bert_layer = transformers.AutoModel.from_pretrained(bert_model)

        self.cls_layer = nn.Linear(1024, 2)
        self.dropout = nn.Dropout(0.1)

    def forward(self, input_ids, attention_mask, token_type_ids):
      cont_reps, pooler_output = self.bert_layer(input_ids, attention_mask, token_type_ids, return_dict=False)
      
      logits = self.cls_layer(self.dropout(pooler_output))
      
      return logits

In [None]:
class QATrainer(transformers.Trainer):
  def compute_loss(self, model, inputs, return_outputs=False):
    labels = inputs.pop('labels')
    outputs = model(**inputs)

    # print(f"{type(outputs)}{outputs.shape}")
    probs = F.softmax(outputs, dim=-1)
    
    loss = loss_func(probs.t()[1], labels.t())
    return (loss, outputs) if return_outputs else loss

In [None]:
def preprocess_fn(data):
  if key_2 is None:
    return tokenizer(data[key_1], truncation=True)
  else:
    return tokenizer(data[key_1], data[key_2], truncation=True)

In [None]:
model_checkpoint = "deepset/bert-large-uncased-whole-word-masking-squad2"
batch_size = 4
loss_func = nn.BCEWithLogitsLoss()

tokenizer = transformers.AutoTokenizer.from_pretrained(model_checkpoint)

### **PRE-PROCESS DATA**



In [None]:
train_data = []
dev_data = []
with open('music_QA_train.json', 'r') as infile:
    train_data = json.load(infile)
with open('music_QA_dev.json', 'r') as infile:
    dev_data = json.load(infile)
  
for rec in train_data:
  rec['label'] = 1. if rec['label'] else 0.
for rec in dev_data:
  rec['label'] = 1. if rec['label'] else 0.

train_dataset = Dataset.from_dict({k: [d[k] for d in train_data] for k in train_data[0]})
dev_dataset = Dataset.from_dict({k: [d[k] for d in dev_data] for k in dev_data[0]})
metric = load_metric('glue', 'qnli')

key_1 = 'question'
key_2 = 'passage'

# remove_columns = train_dataset.column_names.remove('idx')
encoded_train_dataset=train_dataset.map(preprocess_fn, batched=True, remove_columns=['question', 'passage'])
encoded_dev_dataset=dev_dataset.map(preprocess_fn, batched=True, remove_columns=['question', 'passage'])

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1848.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




### **Instantiate and train model**

In [None]:
args = transformers.TrainingArguments(
    "Test",
    evaluation_strategy = "epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
)

model = BoolQA(model_checkpoint, False)

trainer = QATrainer(
    model,
    args,
    train_dataset = encoded_train_dataset,
    eval_dataset = encoded_dev_dataset,
    tokenizer = tokenizer
)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1340669807.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at deepset/bert-large-uncased-whole-word-masking-squad2 were not used when initializing BertModel: ['qa_outputs.weight', 'qa_outputs.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### **Train**

In [None]:
trainer.train()

### **Results**

In [None]:
test_data = []
with open('music_QA_test.json', 'r') as infile:
    test_data = json.load(infile)

In [None]:
answers = []
i=0
model.to(device='cuda:0') 
for input in test_data:
  input = tokenizer(test_data[i]['question'], test_data[i]['passage'], truncation=True, return_tensors='pt')
  input.to(device='cuda:0')
  logits = model(**input)
  probs = F.softmax(logits, dim=-1)
  answers.append(probs.argmax().item())
  i+=1

In [None]:
import pandas as pd

ids = []
for d in test_data:
  ids.append(d['idx'])

data = {'idx': ids, 'label': answers}
df = pd.DataFrame(data=data)

df.to_csv('submissions.csv', index=False)

In [None]:
answers = []
i=0
model.to(device='cuda:0') 
for input in train_data:
  input = tokenizer(train_data[i]['question'], train_data[i]['passage'], truncation=True, return_tensors='pt')
  input.to(device='cuda:0')
  logits = model(**input)
  probs = F.softmax(logits, dim=-1)
  answers.append(probs.argmax().item())
  i+=1

Check accuracy on train set since test set labels are not provided.

In [None]:
actual = [d['label'] for d in train_data]

In [None]:
correct = 0
for i in range(len(actual)):
  if actual[i] == answers[i]:
    correct+=1
correct/419