<a href="https://colab.research.google.com/github/Zarif123/497-DLNLP-HW2/blob/main/generate_ran_test_latest_choice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports

In [98]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [99]:
from google.colab import drive
drive.mount('/content/gdrive')

model_save_name = 'generate_20.pth'
path = F"/content/gdrive/My Drive/{model_save_name}"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [100]:
from transformers import AutoTokenizer, BertModel, GPT2LMHeadModel, GPT2Tokenizer
import torch.optim as optim

import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import time
import sys
import json
import numpy as np


### Train Loop

In [101]:
def train_loop(model, optimizer, tokenizer, train, num_choices, epochs):
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1} ///////////////////////////////")

        train_len = len(train)
        total_loss = 0.0

        for train_i in range(train_len):
            observation = train[train_i]

            # for choice_i in range(num_choices):
            #     context = observation[choice_i][0]
            #     label = observation[choice_i][1]
            #     contexts.append(context)
            #     labels.append(label)
            #     mask[choice_i][label] = 1
            
            inputs = tokenizer(observation, return_tensors="pt")
            inputs = inputs.to(device)
            outputs = model(**inputs, labels=inputs['input_ids'])

            optimizer.zero_grad()
            loss = outputs[0]
            total_loss += loss.item()

            loss.backward()
            optimizer.step()
            
            if train_i % 1000 == 0:
                print(train_i, "/", train_len)
        
        average_loss = total_loss / train_len
        print(f"Average Loss: {average_loss}")

        torch.save(model.state_dict(), path)
            


### Test Loop

In [102]:
def max_choice(tokens):
  try:
    a_index = tokens.index('A')
  except ValueError :
    a_index = -float('inf')
  try:
    b_index = tokens.index('B')
  except ValueError :
    b_index = -float('inf')
  try:
    c_index = tokens.index('C')
  except ValueError :
    c_index = -float('inf')
  try:
    d_index = tokens.index('D')
  except ValueError :
    d_index = -float('inf')
  #print(a_index, b_index, c_index, d_index)
  return len(tokens) - min(a_index, b_index, c_index, d_index) - 1

In [103]:
def test_loop(model, tokenizer, test):
    test_len = len(test)
    running_accuracy = 0

    for test_i in range(test_len):
        observation = test[test_i]
        
        inputs = tokenizer(observation, return_tensors="pt")
        inputs = inputs.to(device)
        outputs = model.generate(**inputs, max_length=len(inputs['input_ids'][0]) + 10, pad_token_id=tokenizer.eos_token_id)

        tokens = tokenizer.decode(outputs[0], skip_special_tokens=True).split(' ')
        
        pred_index = max_choice(list(reversed(tokens)))
        if pred_index == -float('inf'):
          continue
        pred = tokens[pred_index]
        real = observation[-1]
        # print('PRED: ', pred)
        # print('REAL: ', real)
        # print('OBS: ', observation)
        # print('TOKEN: ', tokens)
        if pred == real:
            running_accuracy += 1

        if test_i % 100 == 0:
                print(test_i, "/", test_len)
                if test_i != 0:
                  print(running_accuracy / test_i)
        
    average_accuracy = running_accuracy / test_len
    return average_accuracy
    # print(f"Average Accuracy: {average_accuracy}")


### Find Device

In [104]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Main

In [105]:
def main():  
    torch.manual_seed(0)
    answers = ['A','B','C','D']

    train = []
    test = []
    valid = []
    
    file_name = 'train_complete.jsonl'        
    with open(file_name) as json_file:
        json_list = list(json_file)
    for i in range(len(json_list)):
        json_str = json_list[i]
        result = json.loads(json_str)
        
        base = result['fact1'] + ' [SEP] ' + result['question']['stem'] + ' [SEP] '
        
        for j in range(4):
            base = base + result['question']['choices'][j]['label'] + ' ' + result['question']['choices'][j]['text'] + ' '
            
        base = base + ' [SEP] ' + result['answerKey']
        train.append(base)
        
        # print(obs)
        # print(' ')
        
        # print(result['question']['stem'])
        # print(' ',result['question']['choices'][0]['label'],result['question']['choices'][0]['text'])
        # print(' ',result['question']['choices'][1]['label'],result['question']['choices'][1]['text'])
        # print(' ',result['question']['choices'][2]['label'],result['question']['choices'][2]['text'])
        # print(' ',result['question']['choices'][3]['label'],result['question']['choices'][3]['text'])
        # print('  Fact: ',result['fact1'])
        # print('  Answer: ',result['answerKey'])
        # print('  ')
                
    file_name = 'dev_complete.jsonl'        
    with open(file_name) as json_file:
        json_list = list(json_file)
    for i in range(len(json_list)):
        json_str = json_list[i]
        result = json.loads(json_str)
        
        base = result['fact1'] + ' [SEP] ' + result['question']['stem'] + ' [SEP] '
        
        for j in range(4):
            base = base + result['question']['choices'][j]['label'] + ' ' + result['question']['choices'][j]['text'] + ' '
            
        base = base + ' [SEP] ' + result['answerKey']
        valid.append(base)
        
    file_name = 'test_complete.jsonl'        
    with open(file_name) as json_file:
        json_list = list(json_file)
    for i in range(len(json_list)):
        json_str = json_list[i]
        result = json.loads(json_str)
        
        base = result['fact1'] + ' [SEP] ' + result['question']['stem'] + ' [SEP] '
        
        for j in range(4):
            base = base + result['question']['choices'][j]['label'] + ' ' + result['question']['choices'][j]['text'] + ' '
            
        base = base + ' [SEP] ' + result['answerKey']
        test.append(base)

    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    optimizer = optim.Adam(model.parameters(), lr=3e-5)
    
    model = model.to(device)
#    Add code to fine-tune and test your MCQA classifier.
    

    # Use to toggle between training and testing
    is_training = False
    is_zero_shot = False

    if is_training:
        train_loop(model, optimizer, tokenizer, train, 4, 5)
    else:
        if not is_zero_shot:
            model.load_state_dict(torch.load(path))
            model.eval()

        av_valid_acc = test_loop(model, tokenizer, valid)
        print(f"Valid Average Accuracy: {av_valid_acc}")

        av_test_acc = test_loop(model, tokenizer, test)
        print(f"Test Average Accuracy: {av_test_acc}")

In [106]:
main()

0 / 500
100 / 500
0.39
200 / 500
0.385
300 / 500
0.42
400 / 500
0.435
Valid Average Accuracy: 0.43
0 / 500
100 / 500
0.28
200 / 500
0.34
300 / 500
0.37333333333333335
400 / 500
0.3925
Test Average Accuracy: 0.384
