In [2]:
import torch
from transformers import RobertaTokenizer, RobertaForMultipleChoice, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score
import json
import json_lines
import os
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class MultipleChoiceDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        input_ids, attention_mask, label = self.data[idx]
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': label
        }

In [13]:
answer_map = {'A':0,'B':1,'C':2,'D':3,'E':4}
def load_data(file_path):
    data = []
    with open(file_path, 'rb') as f: 
        for item in json_lines.reader(f):
            data.append(item)

    processed_data = []
    for item in data:
        question = item['question']['stem']
        options = [_['text'] for _ in item['question']['choices']]
        examples = []
        for option in options:
            text = question + " " + option
            encoded = tokenizer.encode_plus(
                text,
                truncation=True,
                max_length=512,
                padding='max_length',
                return_attention_mask=True,
                return_tensors='pt'
            )
            examples.append(encoded)
    
        input_ids = torch.stack([example['input_ids'] for example in examples]).squeeze()
        attention_mask = torch.stack([example['attention_mask'] for example in examples]).squeeze()

        label = torch.tensor(answer_map[item['answerKey']])

        processed_data.append((input_ids, attention_mask, label))

    return processed_data

In [14]:
model = RobertaForMultipleChoice.from_pretrained('roberta-large')
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')

Some weights of RobertaForMultipleChoice were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'roberta.pooler.dense.bias', 'classifier.weight', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
train_data = load_data("data/rs_train.jsonl")
valid_data = load_data("data/rs_dev.jsonl")

A man is incarcerated in prison, and as his punishment he has to carry a one tonne bag of sand backwards and forwards across a field the size of a football pitch.  What is the one thing he can put in it to make it lighter? throw
A man is incarcerated in prison, and as his punishment he has to carry a one tonne bag of sand backwards and forwards across a field the size of a football pitch.  What is the one thing he can put in it to make it lighter? bit
A man is incarcerated in prison, and as his punishment he has to carry a one tonne bag of sand backwards and forwards across a field the size of a football pitch.  What is the one thing he can put in it to make it lighter? gallon
A man is incarcerated in prison, and as his punishment he has to carry a one tonne bag of sand backwards and forwards across a field the size of a football pitch.  What is the one thing he can put in it to make it lighter? mouse
A man is incarcerated in prison, and as his punishment he has to carry a one tonne ba

In [25]:
train_dataset = MultipleChoiceDataset(train_data)
valid_dataset = MultipleChoiceDataset(valid_data)

batch_size = 4

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
valid_loader = DataLoader(valid_dataset, shuffle=False, batch_size=batch_size)

In [26]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
optimizer = AdamW(model.parameters(), lr=1e-5)
epochs = 3
total_steps = len(train_loader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)



In [27]:
for epoch in range(epochs):
    model.save_pretrained('model_{}_directory'.format(epoch))
    model.train()
    total_loss = 0
    average_loss = 0
    for index, batch in tqdm(enumerate(train_loader)):
        inputs = {key: val.to(device) for key, val in batch.items() if key != "labels"}
        labels = batch["labels"].to(device)
        outputs = model(**inputs, labels=labels)
        if index ==0:
            print(outputs)
        loss = outputs.loss
        total_loss += loss.item()
        average_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        if index%100==0:
            print("###{}####: Average loss: {}".format(index,average_loss / 100))
            average_loss = 0

    avg_train_loss = total_loss / len(train_loader)

    model.eval()
    preds = []
    true_labels = []
    for batch in tqdm(valid_loader):
        inputs = {key: val.to(device) for key, val in batch.items() if key != "labels"}
        labels = batch["labels"]
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits
        preds.extend(torch.argmax(logits, dim=1).detach().cpu().numpy())
        true_labels.extend(labels.numpy())

    acc = accuracy_score(true_labels, preds)
    print(f'Epoch: {epoch+1}, Train loss: {avg_train_loss}, Validation accuracy: {acc}')
    model.save_pretrained('model_{}_directory'.format(epoch))

0it [00:00, ?it/s]

MultipleChoiceModelOutput(loss=tensor(1.6510, device='cuda:0', grad_fn=<NllLossBackward0>), logits=tensor([[-0.0060, -0.0574, -0.1595, -0.1085, -0.1472],
        [-0.0915, -0.1951, -0.1928, -0.1051, -0.0818],
        [-0.2362, -0.0950, -0.3088, -0.2688, -0.2323],
        [ 0.0471, -0.0261,  0.1030,  0.0428, -0.1368]], device='cuda:0',
       grad_fn=<ViewBackward0>), hidden_states=None, attentions=None)


1it [00:01,  1.33s/it]

###0####: Average loss: 0.016509516239166258


101it [02:20,  1.40s/it]

###100####: Average loss: 1.496451369524002


201it [04:40,  1.39s/it]

###200####: Average loss: 1.0525232756137848


301it [06:59,  1.39s/it]

###300####: Average loss: 0.9923830005526543


401it [09:17,  1.39s/it]

###400####: Average loss: 0.8958130796253682


501it [11:36,  1.39s/it]

###500####: Average loss: 0.7830511924624443


601it [13:55,  1.39s/it]

###600####: Average loss: 0.7765758153051138


701it [16:13,  1.38s/it]

###700####: Average loss: 0.7270216728420928


801it [18:32,  1.39s/it]

###800####: Average loss: 0.7393367589978151


878it [20:18,  1.39s/it]
100%|██████████| 256/256 [02:03<00:00,  2.07it/s]


Epoch: 1, Train loss: 0.9099054373223484, Validation accuracy: 0.5935357492654261


0it [00:00, ?it/s]

MultipleChoiceModelOutput(loss=tensor(0.0423, device='cuda:0', grad_fn=<NllLossBackward0>), logits=tensor([[-3.3965, -4.6109, -5.1899, -3.7595,  4.5028],
        [-4.6873, -5.3800, -0.6399, -5.4474, -4.7167],
        [-3.6800, -4.4706,  2.3691,  4.4511, -5.8743],
        [-5.4509, -5.7377, -3.4823,  4.9380, -4.7760]], device='cuda:0',
       grad_fn=<ViewBackward0>), hidden_states=None, attentions=None)


1it [00:01,  1.37s/it]

###0####: Average loss: 0.000422726534307003


101it [02:19,  1.38s/it]

###100####: Average loss: 0.5509661382995545


201it [04:38,  1.38s/it]

###200####: Average loss: 0.5361963753920281


301it [06:56,  1.38s/it]

###300####: Average loss: 0.5459809051523916


401it [09:15,  1.38s/it]

###400####: Average loss: 0.572009013880379


501it [11:33,  1.38s/it]

###500####: Average loss: 0.42549232192061026


601it [13:52,  1.39s/it]

###600####: Average loss: 0.5884837852175951


701it [16:10,  1.39s/it]

###700####: Average loss: 0.41253565587307095


801it [18:29,  1.39s/it]

###800####: Average loss: 0.4220320232921949


878it [20:15,  1.38s/it]
100%|██████████| 256/256 [02:03<00:00,  2.07it/s]


Epoch: 2, Train loss: 0.5041741789562331, Validation accuracy: 0.6131243878550441


0it [00:00, ?it/s]

MultipleChoiceModelOutput(loss=tensor(0.0341, device='cuda:0', grad_fn=<NllLossBackward0>), logits=tensor([[-5.7757,  6.5004, -7.7905, -8.0654, -5.7249],
        [-3.6226, -0.0195,  2.1777, -1.3064, -4.3619],
        [ 7.8295, -8.3567, -3.4837, -7.8009, -8.4542],
        [-5.1161, -7.9450, -8.1659, -8.3911,  4.9216]], device='cuda:0',
       grad_fn=<ViewBackward0>), hidden_states=None, attentions=None)


1it [00:01,  1.37s/it]

###0####: Average loss: 0.00034146346151828764


101it [02:19,  1.39s/it]

###100####: Average loss: 0.2507724482954654


201it [04:38,  1.38s/it]

###200####: Average loss: 0.3621581985198327


301it [06:56,  1.38s/it]

###300####: Average loss: 0.2515381439379149


401it [09:15,  1.39s/it]

###400####: Average loss: 0.20269997735214018


501it [11:33,  1.39s/it]

###500####: Average loss: 0.24167063964106064


601it [13:52,  1.39s/it]

###600####: Average loss: 0.33257313119179455


701it [16:11,  1.39s/it]

###700####: Average loss: 0.4012812370620895


801it [18:29,  1.38s/it]

###800####: Average loss: 0.32189257025577717


878it [20:15,  1.38s/it]
100%|██████████| 256/256 [02:03<00:00,  2.07it/s]


Epoch: 3, Train loss: 0.2950253478745673, Validation accuracy: 0.6366307541625857
