### Imports

In [3]:
# !pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m63.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.1 tokenizers-0.13.2 transformers-4.26.1


In [4]:
from transformers import AutoTokenizer, BertModel, GPT2LMHeadModel, GPT2Tokenizer
import torch.optim as optim

import torch
import math
import time
import sys
import json
import numpy as np


### Train Loop

In [1]:
def train_loop(model, linear, optimizer, tokenizer, train, num_choices, epochs):
    for epoch in range(epochs):
        train_len = len(train)
        total_loss = 0.0

        for train_i in range(train_len):
            observation = train[train_i]
            contexts = []
            labels = []
            mask = torch.zeros((4, 2), dtype=float).to(device) ##### if code doesnt work, change to numpy

            for choice_i in range(num_choices):
                context = observation[choice_i][0]
                label = observation[choice_i][1]
                contexts.append(context)
                labels.append(label)
                mask[choice_i][label] = 1
            
            inputs = tokenizer(contexts, max_length=256, padding="max_length", truncation=True, return_tensors="pt")
            inputs = inputs.to(device)

            optimizer.zero_grad()
            hidden = model(inputs)

            logits = torch.matmul(hidden.last_hidden_state[:, 0, :], linear)
            probs = torch.Softmax(logits, dim=1)
            correct_probs = probs * mask
            log_probs = torch.log(torch.sum(correct_probs, dim=1)).squeeze()
            loss = -torch.sum(log_probs)

            total_loss += loss.item()

            loss.backward()
            optimizer.step()
        
        average_loss = total_loss / train_len
        print(f"Epoch {epoch} Average Loss: {average_loss}")
            


SyntaxError: ignored

### Test Loop

In [None]:
def test_loop(model, linear, tokenizer, test, num_choices):
    test_len = len(test)
    running_accuracy = 0

    for test_i in range(test_len):
        observation = test[test_i]
        contexts = []
        labels = []
        mask = torch.zeros((4, 2), dtype=float).to(device) ##### if code doesnt work, change to numpy

        for choice_i in range(num_choices):
            context = observation[choice_i][0]
            label = observation[choice_i][1]
            contexts.append(context)
            labels.append(label)
            mask[choice_i][label] = 1
        
        inputs = tokenizer(contexts, max_length=256, padding="max_length", truncation=True, return_tensors="pt")
        inputs = inputs.to(device)

        hidden = model(inputs)

        logits = torch.matmul(hidden.last_hidden_state[:, 0, :], linear)
        probs = torch.Softmax(logits, dim=1)[:, 1]

        print(probs)

        pred = torch.argmax(probs)
        real = torch.argmax(labels)
        if pred == real:
            running_accuracy += 1
        
    average_accuracy = running_accuracy / test_len
    print(f"Average Accuracy: {average_accuracy}")


### Find Device

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Main

In [None]:
def main():  
    torch.manual_seed(0)
    answers = ['A','B','C','D']

    train = []
    test = []
    valid = []
    
    file_name = 'train_complete.jsonl'        
    with open(file_name) as json_file:
        json_list = list(json_file)
    for i in range(len(json_list)):
        json_str = json_list[i]
        result = json.loads(json_str)
        
        base = result['fact1'] + ' [SEP] ' + result['question']['stem']
        ans = answers.index(result['answerKey'])
        
        obs = []
        for j in range(4):
            text = base + result['question']['choices'][j]['text'] + ' [SEP]'
            if j == ans:
                label = 1
            else:
                label = 0
            obs.append([text,label])
        train.append(obs)
        
        # print(obs)
        # print(' ')
        
        # print(result['question']['stem'])
        # print(' ',result['question']['choices'][0]['label'],result['question']['choices'][0]['text'])
        # print(' ',result['question']['choices'][1]['label'],result['question']['choices'][1]['text'])
        # print(' ',result['question']['choices'][2]['label'],result['question']['choices'][2]['text'])
        # print(' ',result['question']['choices'][3]['label'],result['question']['choices'][3]['text'])
        # print('  Fact: ',result['fact1'])
        # print('  Answer: ',result['answerKey'])
        # print('  ')
                
    file_name = 'dev_complete.jsonl'        
    with open(file_name) as json_file:
        json_list = list(json_file)
    for i in range(len(json_list)):
        json_str = json_list[i]
        result = json.loads(json_str)
        
        base = result['fact1'] + ' [SEP] ' + result['question']['stem']
        ans = answers.index(result['answerKey'])
        
        obs = []
        for j in range(4):
            text = base + result['question']['choices'][j]['text'] + ' [SEP]'
            if j == ans:
                label = 1
            else:
                label = 0
            obs.append([text,label])
        valid.append(obs)
        
    file_name = 'test_complete.jsonl'        
    with open(file_name) as json_file:
        json_list = list(json_file)
    for i in range(len(json_list)):
        json_str = json_list[i]
        result = json.loads(json_str)
        
        base = result['fact1'] + ' [SEP] ' + result['question']['stem']
        ans = answers.index(result['answerKey'])
        
        obs = []
        for j in range(4):
            text = base + result['question']['choices'][j]['text'] + ' [SEP]'
            if j == ans:
                label = 1
            else:
                label = 0
            obs.append([text,label])
        test.append(obs)

    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    model = BertModel.from_pretrained("bert-base-uncased")
    optimizer = optim.Adam(model.parameters(), lr=3e-5)
    linear = torch.rand(768,2)
    
    model = model.to(device)
    linear = linear.to(device)
#    Add code to fine-tune and test your MCQA classifier.
           
    train_loop(model, linear, optimizer, tokenizer, train, 4, 5)

    model.eval()

    test_loop(model, linear, tokenizer, valid, 4)
    test_loop(model, linear, tokenizer, test, 4)

In [None]:
main()