<a href="https://colab.research.google.com/github/V1SHAL421/gamification_evaluation/blob/main/gamification_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.w

In [2]:
# Upload data from file explorer
from google.colab import files
uploaded = files.upload()

Saving multiple_choice_questions.json to multiple_choice_questions.json


In [3]:
# AlBERT model training without gamification

import json
import os
import sys
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AlbertTokenizer, AlbertForMultipleChoice, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
import random

tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') # Initialise tokenizer

data_json = list(uploaded.keys())[0] # Retrieve JSON file of data

correct_answers = 0 # Number of correct answers from evaluation phase
total_predictions = 0 # Total predictions from evaluation phase

with open(data_json, 'r', encoding='utf-8') as f:
    data = json.load(f) # Loads JSON data into variable

def preprocess_data(data, tokenizer, max_length=512):

    tokenized_inputs = []
    labels = []

    # For question, options and answer in data
    for data_question in data:
      # Assign question, options and answer to variables
        question = data_question['question_content']
        choices = [data_question['option_one'], data_question['option_two'], data_question['option_three'], data_question['option_four']]
        correct_answer_key = data_question['answer']

        # Initialize tokenized choices
        input_ids = []
        attention_masks = []
        # Map answer to number
        answer_key_index = {
            "one": 0,
            "two": 1,
            "three": 2,
            "four": 3
        }
        # Convert the correct answer key to an index
        correct_answer_index = answer_key_index[correct_answer_key[7:]]

        # Tokenize each choice with the question as a question-choice pair
        for choice in choices:
            inputs = tokenizer.encode_plus(
                question,
                choice,
                add_special_tokens=True,
                max_length=max_length,
                padding='max_length',
                truncation=True,
                return_attention_mask=True
            )
            input_ids.append(inputs['input_ids'])
            attention_masks.append(inputs['attention_mask'])

        # Convert lists to tensors
        input_ids = torch.tensor(input_ids)
        attention_masks = torch.tensor(attention_masks)

        # Append tokenized inputs for each example
        tokenized_inputs.append({
            'input_ids': input_ids,
            'attention_mask': attention_masks
        })

        # Append correct answer index into labels
        labels.append(correct_answer_index)

    return tokenized_inputs, labels

# Split data into training, validating and testing
train_val_data, test_data = train_test_split(data, test_size=0.2)
train_data, val_data = train_test_split(train_val_data, test_size=0.25)

# Retrieve inputs and labels for data
train_inputs, train_labels = preprocess_data(train_data, tokenizer)
valid_inputs, valid_labels = preprocess_data(val_data, tokenizer)
test_inputs, test_labels = preprocess_data(test_data, tokenizer)

# The class below converts inputs and labels into items compatible for training
class MCQDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels
        self.length = len(self.inputs) # Length of inputs

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
      # Gets a certain item based off its index
        input_ids = self.inputs[idx]['input_ids']
        attention_mask = self.inputs[idx]['attention_mask']
        label = self.labels[idx]

        item = {
            'input_ids': input_ids,         # Tensor of shape (4, 512)
            'attention_mask': attention_mask, # Same as above
            'labels': torch.tensor(label)   # Ensuring the labels are tensors
        }

        return item

# Converts inputs and labels into items
train_dataset = MCQDataset(train_inputs, train_labels)
valid_dataset = MCQDataset(valid_inputs, valid_labels)
test_dataset = MCQDataset(test_inputs, test_labels)

# Initiate data loaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=4)
test_loader = DataLoader(test_dataset, batch_size=4)

# Initiate model
model = AlbertForMultipleChoice.from_pretrained('albert-base-v2')

# Declare training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=6,              # total number of training epochs
    per_device_train_batch_size=4,   # batch size per device during training
    per_device_eval_batch_size=4,    # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch",     # evaluation is done at the end of each epoch
    fp16=True
)

# Declare trainer parameters
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

# Train
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate(test_dataset)

# Ensure the model is in evaluation mode
model.eval()

# Disable gradient calculations
with torch.no_grad():
    for batch in test_loader:
        # Prepare input and labels
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['labels'].to(model.device)

        # Model forward pass
        results = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

        # Get the predictions
        logits = results.logits
        predictions = torch.argmax(logits, dim=1)

        # Update variables
        answer_match = (predictions == labels)
        num_matches = answer_match.sum().item()
        correct_answers += num_matches
        total_predictions += labels.size(0)


# Calculate accuracy
accuracy = correct_answers / total_predictions
print(f'Accuracy: {accuracy * 100:.2f}%')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of AlbertForMultipleChoice were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss
1,1.2265,1.173419
2,1.068,1.069458
3,0.8725,0.96539
4,0.7801,1.098399
5,0.3622,1.242956
6,0.3732,1.43339


Accuracy: 60.98%


In [None]:
# AlBERT model training with gamification
import json
import os
import sys
import torch
from torch.utils.data import DataLoader
from transformers import AlbertTokenizer, AlbertForMultipleChoice, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
import random

tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')

correct_answers = 0 # Number of correct answers from evaluation phase
total_predictions = 0 # Total predictions from evaluation phase

with open(data_json, 'r', encoding='utf-8') as f:
    data = json.load(f) # Loads JSON data into variable


def preprocess_data(data, tokenizer, max_length=512):
    tokenized_inputs = []
    labels = []

    # For question, options and answer in data
    for data_question in data:
      # Assign question, options and answer to variables
        question = data_question['question_content']
        choices = [data_question['option_one'], data_question['option_two'], data_question['option_three'], data_question['option_four']]
        correct_answer_key = data_question['answer']

        # Initialize tokenized choices
        input_ids = []
        attention_masks = []
        answer_key_index = {
            "one": 0,
            "two": 1,
            "three": 2,
            "four": 3
        }
        # Convert the correct answer key to an index
        correct_answer_index = answer_key_index[correct_answer_key[7:]]

        # Tokenize each choice with the question as a question-choice pair
        for choice in choices:
            inputs = tokenizer.encode_plus(
                question,
                choice,
                add_special_tokens=True,
                max_length=max_length,
                padding='max_length',
                truncation=True,
                return_attention_mask=True
            )
            input_ids.append(inputs['input_ids'])
            attention_masks.append(inputs['attention_mask'])

        # Convert lists to tensors
        input_ids = torch.tensor(input_ids)
        attention_masks = torch.tensor(attention_masks)

        # Append tokenized inputs for each example
        tokenized_inputs.append({
            'input_ids': input_ids,
            'attention_mask': attention_masks
        })

        # Append correct answer index into labels
        labels.append(correct_answer_index)

    return tokenized_inputs, labels

# Split data into training, validating and testing
train_val_data, test_data = train_test_split(data, test_size=0.2)
train_data, val_data = train_test_split(train_val_data, test_size=0.25)

# Retrieve inputs and labels for data
train_inputs, train_labels = preprocess_data(train_data, tokenizer)
valid_inputs, valid_labels = preprocess_data(val_data, tokenizer)
test_inputs, test_labels = preprocess_data(test_data, tokenizer)

# The class below converts inputs and labels into items compatible for training
class MCQDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels
        self.length = len(self.inputs) # Length of inputs

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
      # Gets a certain item based off its index
        input_ids = self.inputs[idx]['input_ids']
        attention_mask = self.inputs[idx]['attention_mask']
        label = self.labels[idx]

        item = {
            'input_ids': input_ids,         # Tensor of shape (4, 512)
            'attention_mask': attention_mask, # Same as above
            'labels': torch.tensor(label)   # Ensuring the labels are tensors
        }

        return item


batch_size = 4

# Converts inputs and labels into items
train_dataset = MCQDataset(train_inputs, train_labels)
valid_dataset = MCQDataset(valid_inputs, valid_labels)
test_dataset = MCQDataset(test_inputs, test_labels)

# Initiate data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Initiate model
model = AlbertForMultipleChoice.from_pretrained('albert-base-v2')

# Declare training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=6,              # total number of training epochs
    per_device_train_batch_size=4,   # batch size per device during training
    per_device_eval_batch_size=4,    # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch",     # evaluation is done at the end of each epoch
    fp16=True
)

# Declare trainer parameters
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

# Train
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate(test_dataset)

# Ensure the model is in evaluation mode
model.eval()


# Disable gradient calculations
with torch.no_grad():
    for batch in test_loader:
        # Prepare input and labels
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['labels'].to(model.device)

        # Model forward pass
        results = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

        # Get the predictions
        results = results.logits
        predictions = torch.argmax(results, dim=1)

        # Update variables
        answer_match = (predictions == labels)
        num_matches = answer_match.sum().item()
        correct_answers += num_matches
        total_predictions += labels.size(0)


def evaluate_model_with_gamification(model, data_loader, tokenizer, device='cuda'):
    model.eval() # Set model to evaluation phase
    # Initiate variables
    correct_answers = 0
    total_questions = 0
    points = 0
    badges = 0

    # Iterate through batches in data loader
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        with torch.no_grad():
            results = model(input_ids=input_ids, attention_mask=attention_mask)
            predictions = torch.argmax(results.logits, dim=-1) # Retrieve predictions

        # Identify whether predictions matches the correct answers
        answer_match = (predictions == labels) # 1 for True, 0 for False
        num_matches = answer_match.sum().item() # Number of True matches
        correct_answers += num_matches # Increment total number of correct answers by number of matches
        total_questions += labels.size(0) # Increment number of questions by the number of labels as they represent the number of questions asked

        # Assign points for correct answers
        answer_match = (predictions == labels)
        num_matches = (answer_match.sum().item())*100 # Award 100 points per match
        points += num_matches # Increment number of points by number of matches
        # Grant badges for every 1000 points accumulated
        badges += points // 1000 # Increment number of points
        points %= 1000  # Reset points after granting a badge

    # Calculate accuracy
    accuracy = correct_answers / total_questions
    print(f"Accuracy: {accuracy:.2f}, Points: {points}, Badges: {badges}")
    return accuracy, points, badges

# Return results
accuracy, points, badges = evaluate_model_with_gamification(model, valid_loader, tokenizer)

In [None]:
# DistilBERT model training without gamification

import json
import os
import sys
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import DistilBertTokenizer, DistilBertForMultipleChoice, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

correct_answers = 0 # Number of correct answers from evaluation phase
total_predictions = 0 # Total predictions from evaluation phase

with open(data_json, 'r', encoding='utf-8') as f:
    data = json.load(f) # Loads JSON data into variable

def preprocess_data(data, tokenizer, max_length=512):

    tokenized_inputs = []
    labels = []

    # For question, options and answer in data
    for data_question in data:
      # Assign question, options and answer to variables
        question = data_question['question_content']
        choices = [data_question['option_one'], data_question['option_two'], data_question['option_three'], data_question['option_four']]
        correct_answer_key = data_question['answer']

        # Initialize tokenized choices
        input_ids = []
        attention_masks = []
        # Map answer to number
        answer_key_index = {
            "one": 0,
            "two": 1,
            "three": 2,
            "four": 3
        }
        # Convert the correct answer key to an index
        correct_answer_index = answer_key_index[correct_answer_key[7:]]

        # Tokenize each choice with the question as a question-choice pair
        for choice in choices:
            inputs = tokenizer.encode_plus(
                question,
                choice,
                add_special_tokens=True,
                max_length=max_length,
                padding='max_length',
                truncation=True,
                return_attention_mask=True
            )
            input_ids.append(inputs['input_ids'])
            attention_masks.append(inputs['attention_mask'])

        # Convert lists to tensors
        input_ids = torch.tensor(input_ids)
        attention_masks = torch.tensor(attention_masks)

        # Append tokenized inputs for each example
        tokenized_inputs.append({
            'input_ids': input_ids,
            'attention_mask': attention_masks
        })

        # Append correct answer index into labels
        labels.append(correct_answer_index)

    return tokenized_inputs, labels

# Split data into training, validating and testing
train_val_data, test_data = train_test_split(data, test_size=0.2)
train_data, val_data = train_test_split(train_val_data, test_size=0.25)

# Retrieve inputs and labels for data
train_inputs, train_labels = preprocess_data(train_data, tokenizer)
valid_inputs, valid_labels = preprocess_data(val_data, tokenizer)
test_inputs, test_labels = preprocess_data(test_data, tokenizer)


# The class below converts inputs and labels into items compatible for training
class MCQDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels
        self.length = len(self.inputs) # Length of inputs

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
      # Gets a certain item based off its index
        input_ids = self.inputs[idx]['input_ids']
        attention_mask = self.inputs[idx]['attention_mask']
        label = self.labels[idx]

        item = {
            'input_ids': input_ids,         # Tensor of shape (4, 512)
            'attention_mask': attention_mask, # Same as above
            'labels': torch.tensor(label)   # Ensuring the labels are tensors
        }

        return item


batch_size = 2

# Converts inputs and labels into items
train_dataset = MCQDataset(train_inputs, train_labels)
valid_dataset = MCQDataset(valid_inputs, valid_labels)
test_dataset = MCQDataset(test_inputs, test_labels)

# Initiate data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Initiate model
model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-uncased')

# Declare training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=2,   # batch size per device during training
    per_device_eval_batch_size=2,    # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch",     # evaluation is done at the end of each epoch
    fp16=True
)

# Declare trainer parameters
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

# Train
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate(test_dataset)

# Ensure the model is in evaluation mode
model.eval()


# Disable gradient calculations
with torch.no_grad():
    for batch in test_loader:
        # Prepare input and labels
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['labels'].to(model.device)

        # Model forward pass
        results = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

        # Get the predictions
        results = results.logits
        predictions = torch.argmax(results, dim=1)

        # Identify whether predictions matches the correct answers
        answer_match = (predictions == labels) # 1 for True, 0 for False
        num_matches = answer_match.sum().item() # Number of True matches
        correct_answers += num_matches # Increment total number of correct answers by number of matches
        total_predictions += labels.size(0) # Increment number of questions by the number of labels as they represent the number of questions asked

# Calculate accuracy
accuracy = correct_answers / total_predictions
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
# DistilBERT model training for gamification

import json
import os
import sys
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertTokenizer, DistilBertForMultipleChoice, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

data_json = list(uploaded.keys())[0]

correct_answers = 0 # Number of correct answers from evaluation phase
total_predictions = 0 # Total predictions from evaluation phase

with open(data_json, 'r', encoding='utf-8') as f:
    data = json.load(f) # Loads JSON data into variable

def preprocess_data(data, tokenizer, max_length=512):

    tokenized_inputs = []
    labels = []

    # For question, options and answer in data
    for data_question in data:
      # Assign question, options and answer to variables
        question = data_question['question_content']
        choices = [data_question['option_one'], data_question['option_two'], data_question['option_three'], data_question['option_four']]
        correct_answer_key = data_question['answer']

        # Initialize tokenized choices
        input_ids = []
        attention_masks = []
        # Map answer to number
        answer_key_index = {
            "one": 0,
            "two": 1,
            "three": 2,
            "four": 3
        }
        # Convert the correct answer key to an index
        correct_answer_index = answer_key_index[correct_answer_key[7:]]

        # Tokenize each choice with the question as a question-choice pair
        for choice in choices:
            inputs = tokenizer.encode_plus(
                question,
                choice,
                add_special_tokens=True,
                max_length=max_length,
                padding='max_length',
                truncation=True,
                return_attention_mask=True
            )
            input_ids.append(inputs['input_ids'])
            attention_masks.append(inputs['attention_mask'])

        # Convert lists to tensors
        input_ids = torch.tensor(input_ids)
        attention_masks = torch.tensor(attention_masks)

        # Append tokenized inputs for each example
        tokenized_inputs.append({
            'input_ids': input_ids,
            'attention_mask': attention_masks
        })

        # Append correct answer index into labels
        labels.append(correct_answer_index)

    return tokenized_inputs, labels

train_val_data, test_data = train_test_split(data, test_size=0.2)
train_data, val_data = train_test_split(train_val_data, test_size=0.25)

train_inputs, train_labels = preprocess_data(train_data, tokenizer)
valid_inputs, valid_labels = preprocess_data(val_data, tokenizer)
test_inputs, test_labels = preprocess_data(test_data, tokenizer)




class MCQDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels
        self.length = len(self.inputs)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):

        input_ids = self.inputs[idx]['input_ids']
        attention_mask = self.inputs[idx]['attention_mask']
        label = self.labels[idx]

        item = {
            'input_ids': input_ids,         # Tensor of shape (4, 512)
            'attention_mask': attention_mask, # Same as above
            'labels': torch.tensor(label)   # Ensuring the labels are tensors
        }

        return item


train_dataset = MCQDataset(train_inputs, train_labels)
valid_dataset = MCQDataset(valid_inputs, valid_labels)
test_dataset = MCQDataset(test_inputs, test_labels)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=2)
test_loader = DataLoader(test_dataset, batch_size=2)

model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-uncased')

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=2,   # batch size per device during training
    per_device_eval_batch_size=2,    # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch",     # evaluation is done at the end of each epoch
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

trainer.train()

eval_results = trainer.evaluate(test_dataset)


# Ensure the model is in evaluation mode
model.eval()

# Disable gradient calculations
with torch.no_grad():
    for batch in test_loader:
        # Prepare input and labels
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['labels'].to(model.device)

        # Model forward pass
        results = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

        # Get the predictions
        results = results.logits
        predictions = torch.argmax(results, dim=1)

        # Update variables
        answer_match = (predictions == labels)
        num_matches = answer_match.sum().item()
        correct_answers += num_matches
        total_predictions += labels.size(0)


def evaluate_model_with_gamification(model, data_loader, tokenizer, device='cuda'):
    model.eval()
    correct_answers = 0
    total_questions = 0
    points = 0
    badges = 0

    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        with torch.no_grad():
            results = model(input_ids=input_ids, attention_mask=attention_mask)
            predictions = torch.argmax(results.logits, dim=-1) # Retrieve predictions

        # Identify whether predictions matches the correct answers
        answer_match = (predictions == labels) # 1 for True, 0 for False
        num_matches = answer_match.sum().item() # Number of True matches
        correct_answers += num_matches # increment total number of correct answers by number of matches
        total_questions += labels.size(0) # Increment number of questions by the number of labels as they represent the number of questions asked

        # Assign points for correct answers
        answer_match = (predictions == labels)
        num_matches = (answer_match.sum().item())*100 # Award 100 points per match
        points += num_matches # Increment number of points by number of matches
        # Grant badges for every 1000 points accumulated
        badges += points // 1000 # Increment number of points
        points %= 1000  # Reset points after granting a badge

    # Calculate accuracy
    accuracy = correct_answers / total_questions
    print(f"Accuracy: {accuracy:.2f}, Points: {points}, Badges: {badges}")
    return accuracy, points, badges

# Return results
accuracy, points, badges = evaluate_model_with_gamification(model, valid_loader, tokenizer)

In [6]:
# T5 model training without gamification

import json
import os
import sys
import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from transformers import T5Tokenizer, T5ForConditionalGeneration, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import numpy as np
import random



def modify_attention_with_lora(attention_layer, lora_rank, lora_alpha):
    """
    Modifies T5 attention layer to include LoRA rank and LoRA alpha.

    Arguments:
    - attention_layer (nn.Module): The attention layer needing modification
    - lora_rank (int): The rank for the low-rank matrices in LoRA.
    - lora_alpha (float): LoRA scaling factor

    Returns:
    - T5 attention layer with LoRA parameters.
    """
    # Add LoRA parameters to attention layer
    attention_layer.lora_rank = lora_rank
    attention_layer.lora_alpha = lora_alpha

    return attention_layer

def get_lora_model(model_id, lora_rank, lora_alpha):
    """
    Modifies attention layers of T5 model to include LoRA.

    Arguments:
    - model_id (str): Path of pre-trained T5 model.
    - lora_rank (int): LoRA rank for the low-rank matrices in LoRA.
    - lora_alpha (float): LoRA scaling factor.

    Returns:
    - Modified T5 model with LoRA incorporated in its attention layers
    """
    # Load pre-trained T5 model
    model = T5ForConditionalGeneration.from_pretrained(model_id)

    # Modify the encoder's attention layer
    for block in model.encoder.block:
        block.layer[0].SelfAttention = modify_attention_with_lora(
            block.layer[0].SelfAttention, lora_rank, lora_alpha
        )

    # Modify the decoder's attention layer
    for block in model.decoder.block:
        block.layer[0].SelfAttention = modify_attention_with_lora(
            block.layer[0].SelfAttention, lora_rank, lora_alpha
        )

    return model



# The class below converts inputs and labels into items compatible for training while incorporating few shot learning
class T5DataPreprocessingFewShot(Dataset):
    def __init__(self, data, tokenizer, num_examples=3, percentile=85):
        self.data = data
        self.tokenizer = tokenizer
        self.num_examples = num_examples  # Number of examples for few-shot prompt engineering
        self.max_length = self._compute_max_length(percentile) # The maximum length based on given percentile

    def _compute_max_length(self, percentile):
        # Compute length of tokenized inputs
        length = [len(self.tokenizer.encode(self._format(question))) for question in self.data]
        return int(np.percentile(length, percentile)) # Maximum length based on given percentile

    def _format(self, item):
        # Assign item's question to variable
        question = item["question_content"]
        # Initialise options as empty string
        options = ""

        # List of choices
        choices = [item["option_one"], item["option_two"], item["option_three"], item["option_four"]]

        # Loop through choices and append each options string with formatting
        for i, choice in enumerate(choices):
            if i > 0:
                # Add space before each choice except first one
                options += " "
            # Append the formatted choice to the options string
            options += f"({chr(65 + i)}) {choice}" # Generate option label and choice
        return f"{question} {options}" # Return question and choices

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        wrong_options = [item for i, item in enumerate(self.data) if i != idx] # Filter out the correct answer
        few_shot_questions = random.sample(wrong_options, self.num_examples) # Provide a random sample of questions
        few_shot_prompt = ""
        # Iterate through questions for few shot prompt engineering
        for data_question in few_shot_questions:
            formatted_data_question= self._format(data_question)
            answer_label = data_question["answer"]
            answer_index = ["option_one", "option_two", "option_three", "option_four"].index(answer_label) # Retrieve answer from label
            few_shot_prompt += f"Q: {formatted_data_question} A: {chr(65 + answer_index)}\n" # Create few shot prompt

        cur_item = self.data[idx]
        formatted_current = self._format(cur_item) # Format current item
        input_text = f"{few_shot_prompt}Q: {formatted_current} A:"

        correct_answer_label = cur_item["answer"] # Retrieve answer of current item
        answer_index = ["option_one", "option_two", "option_three", "option_four"].index(correct_answer_label) # Retrieve index of answer
        target_text = chr(65 + answer_index) # Assign label to target text

        # Initialise input and target encodings
        input_encoding = self.tokenizer(input_text, max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt")
        target_encoding = self.tokenizer(target_text, max_length=4, padding="max_length", truncation=True, return_tensors="pt")

        # Squeeze the encoded input IDs and attention masks to ensure compatibility with model
        item = {
            'input_ids': input_encoding['input_ids'].squeeze(0),
            'attention_mask': input_encoding['attention_mask'].squeeze(0),
            'labels': target_encoding['input_ids'].squeeze(0).to(torch.long)
        }
        return item


def evaluate_model(model, tokenizer, data_loader, device='cuda'):
    model.eval() # Set model to evaluate phase
    correct_answers = 0

    # train_data, val_data = train_test_split(data, test_size=0.2)
    # Iterate through batches from data loader
    for batch in data_loader:
        # Set input IDs, attention masks and labels to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        with torch.no_grad():
            outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=4)

        predictions = [tokenizer.decode(generated_id, skip_special_tokens=True).strip() for generated_id in outputs] # Predictions are decoded
        labels = [tokenizer.decode(label_id, skip_special_tokens=True).strip() for label_id in labels] # Labels are decoded

        for prediction, label in zip(predictions, labels):
            if prediction == label:
                correct_answers += 1 # Increment correct answers by 1 if prediction matches label

    accuracy = correct_answers / len(data_loader.dataset) # Compute accuracy
    return accuracy


def main():
    # Retrieve data
    data_json = list(uploaded.keys())[0]

    with open(data_json, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Initialise the tokenizer and preprocess the dataset
    tokenizer = T5Tokenizer.from_pretrained('t5-small')
    # dataset = T5DataPreprocessing(data=data, tokenizer=tokenizer)
    dataset = T5DataPreprocessingFewShot(data=data, tokenizer=tokenizer, num_examples=3)
    model = T5ForConditionalGeneration.from_pretrained('t5-small')

    batch_size = 2  # from Hyperopt hyperparameter optimization
    learning_rate = 5e-5  # from Hyperopt hyperparameter optimization
    num_train_epochs = 3  # from Hyperopt hyperparameter optimization


    # Split the dataset into training and validation sets as well as training and validation data loaders
    train_data, val_data = train_test_split(dataset, test_size=0.2)
    train_loader = DataLoader(train_data, batch_size=2, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=2)


    model = T5ForConditionalGeneration.from_pretrained('t5-small') # Load T5 model
    model = get_lora_model(model_id='t5-small', lora_rank=32, lora_alpha=4.0) # Apply LoRA modifications
    model = model.to("cuda") # Prepare for training

    # Data collator so that each batch has the same length through padding
    data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, label_pad_token_id=-100, pad_to_multiple_of=8)

    # Declare training arguments
    training_arguments = Seq2SeqTrainingArguments(
        output_dir="./results_t5_lora",
        evaluation_strategy="steps",
        eval_steps=400,
        logging_steps=100,
        save_steps=800,
        warmup_steps=500,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        learning_rate=learning_rate,
        num_train_epochs=num_train_epochs,
        weight_decay=0.01,
    )

    # Declare trainer arguments
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_arguments,
        train_dataset=train_data,
        eval_dataset=val_data,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    # Train
    trainer.train()

    # Evaluate with game mechanics

    accuracy = evaluate_model(model, tokenizer, val_loader)

    print(f"Accuracy: {accuracy:.2%}")

if __name__ == '__main__':
    main()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss,Validation Loss
400,0.8481,0.407234
800,0.3485,0.320199
1200,0.2982,0.35449


Checkpoint destination directory ./results_t5_lora/checkpoint-800 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Accuracy: 32.20%


In [7]:
# T5 model training with gamification

import json
import os
import sys
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import numpy as np
import random


def modify_attention_with_lora(attention_layer, lora_rank, lora_alpha):
    """
    Modifies T5 attention layer to include LoRA rank and LoRA alpha.

    Args:
    - attention_layer (nn.Module): The attention layer needing modification
    - lora_rank (int): The rank for the low-rank matrices in LoRA.
    - lora_alpha (float): LoRA scaling factor

    Returns:
    - T5 attention layer with LoRA parameters.
    """
    # Add LoRA parameters to attention layer
    attention_layer.lora_rank = lora_rank
    attention_layer.lora_alpha = lora_alpha

    return attention_layer

def get_lora_model(model_id, lora_rank, lora_alpha):
    """
    Modifies attention layers of T5 model to include LoRA.

    Args:
    - model_id (str): Path of pre-trained T5 model.
    - lora_rank (int): LoRA rank for the low-rank matrices in LoRA.
    - lora_alpha (float): LoRA scaling factor.

    Returns:
    - Modified T5 model with LoRA incorporated in its attention layers
    """
    # Load pre-trained T5 model
    model = T5ForConditionalGeneration.from_pretrained(model_id)

    # Modify encoder attention layers
    for block in model.encoder.block:
        block.layer[0].SelfAttention = modify_attention_with_lora(
            block.layer[0].SelfAttention, lora_rank, lora_alpha
        )

    # Modify decoder attention layers
    for block in model.decoder.block:
        block.layer[0].SelfAttention = modify_attention_with_lora(
            block.layer[0].SelfAttention, lora_rank, lora_alpha
        )

    return model



# The class below converts inputs and labels into items compatible for training while incorporating few shot learning
class T5DataPreprocessingFewShot(Dataset):
    def __init__(self, data, tokenizer, num_examples=3, percentile=85):
        self.data = data
        self.tokenizer = tokenizer
        self.num_examples = num_examples  # Number of examples for few-shot prompt engineering
        self.max_length = self._compute_max_length(percentile) # The maximum length based on given percentile

    def _compute_max_length(self, percentile):
        # Compute length of tokenized inputs
        length = [len(self.tokenizer.encode(self._format(question))) for question in self.data]
        return int(np.percentile(length, percentile)) # Maximum length based on given percentile

    def _format(self, item):
        # Assign item's question to variable
        question = item["question_content"]
        # Initialise options as empty string
        options = ""

        # List of choices
        choices = [item["option_one"], item["option_two"], item["option_three"], item["option_four"]]

        # Loop through choices and append each options string with formatting
        for i, choice in enumerate(choices):
            if i > 0:
                # Add space before each choice except first one
                options += " "
            # Append the formatted choice to the options string
            options += f"({chr(65 + i)}) {choice}" # Generate option label and choice
        return f"{question} {options}" # Return question and choices

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        wrong_options = [item for i, item in enumerate(self.data) if i != idx] # Filter out the correct answer
        few_shot_questions = random.sample(wrong_options, self.num_examples) # Provide a random sample of questions
        few_shot_prompt = ""
        # Iterate through questions for few shot prompt engineering
        for data_question in few_shot_questions:
            formatted_data_question= self._format(data_question)
            answer_label = data_question["answer"]
            answer_index = ["option_one", "option_two", "option_three", "option_four"].index(answer_label) # Retrieve answer from label
            few_shot_prompt += f"Q: {formatted_data_question} A: {chr(65 + answer_index)}\n" # Create few shot prompt

        cur_item = self.data[idx]
        formatted_current = self._format(cur_item) # Format current item
        input_text = f"{few_shot_prompt}Q: {formatted_current} A:"

        correct_answer_label = cur_item["answer"] # Retrieve answer of current item
        answer_index = ["option_one", "option_two", "option_three", "option_four"].index(correct_answer_label) # Retrieve index of answer
        target_text = chr(65 + answer_index) # Assign label to target text

        # Initialise input and target encodings
        input_encoding = self.tokenizer(input_text, max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt")
        target_encoding = self.tokenizer(target_text, max_length=4, padding="max_length", truncation=True, return_tensors="pt")

        # Squeeze the encoded input IDs and attention masks to ensure compatibility with model
        item = {
            'input_ids': input_encoding['input_ids'].squeeze(0),
            'attention_mask': input_encoding['attention_mask'].squeeze(0),
            'labels': target_encoding['input_ids'].squeeze(0).to(torch.long)
        }
        return item




def evaluate_model(model, tokenizer, data_loader, device='cuda'):
    model.eval() # Set model to evaluate phase
    correct_answers = 0
    total_points = 0
    badges = 0

    # train_data, val_data = train_test_split(data, test_size=0.2)
    # Iterate through batches from data loader
    for batch in data_loader:
        # Set input IDs, attention masks and labels to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        with torch.no_grad():
            outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=4)

        predictions = [tokenizer.decode(generated_id, skip_special_tokens=True).strip() for generated_id in outputs] # Predictions are decoded
        labels = [tokenizer.decode(label_id, skip_special_tokens=True).strip() for label_id in labels] # Labels are decoded

        for prediction, label in zip(predictions, labels):
            if prediction == label:
                correct_answers += 1 # Increment correct answers by 1 if prediction matches label
                total_points += 100
                if total_points >= 1000:
                    badges += 1
                    total_points %= 1000

    accuracy = correct_answers / len(data_loader.dataset) # Compute accuracy
    return accuracy, total_points, badges

def main():
    # Retrieve data
    data_json = list(uploaded.keys())[0]

    with open(data_json, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Initialise the tokenizer and preprocess the dataset
    tokenizer = T5Tokenizer.from_pretrained('t5-small')

    # Initialise the dataset for training
    dataset = T5DataPreprocessingFewShot(data=data, tokenizer=tokenizer, num_examples=3)

    # Initialise the model
    model = T5ForConditionalGeneration.from_pretrained('t5-small')

    batch_size = 2  # from Hyperopt hyperparameter optimization
    learning_rate = 5e-5  # from Hyperopt hyperparameter optimization
    num_train_epochs = 3  # from Hyperopt hyperparameter optimization


    # Split the dataset into training and validation sets
    train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)
    train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=8)

    model = T5ForConditionalGeneration.from_pretrained('t5-small') # Load T5 model
    model = get_lora_model(model_id='t5-small', lora_rank=32, lora_alpha=4.0) # Apply LoRA modifications
    model = model.to("cuda") # Prepare for training

    # Data collator so that each batch has the same length through padding
    data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, label_pad_token_id=-100, pad_to_multiple_of=8)

    # Declare training arguments
    training_arguments = Seq2SeqTrainingArguments(
        output_dir="./results_t5_lora",
        evaluation_strategy="steps",
        eval_steps=400,
        logging_steps=100,
        save_steps=800,
        warmup_steps=500,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        learning_rate=learning_rate,
        num_train_epochs=num_train_epochs,
        weight_decay=0.01,
    )

    # Declare trainer arguments
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_arguments,
        train_dataset=train_data,
        eval_dataset=val_data,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    # Train
    trainer.train()

    # Evaluate with rewards
    accuracy, total_points,  badges = evaluate_model(model, tokenizer, val_loader)

    print(f"Accuracy: {accuracy:.2%}")
    print(f"Total Points: {total_points}, Total Badges Earned: { badges}")

if __name__ == '__main__':
    main()


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss,Validation Loss
400,0.8481,0.407234
800,0.3485,0.320199
1200,0.2982,0.35449


Checkpoint destination directory ./results_t5_lora/checkpoint-800 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Accuracy: 32.20%
Total Points: 600, Total Badges Earned: 6
