## Import

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModel
import pandas as pd
import numpy as np

# Define Classes

In [3]:
class LogicalReasoningModel(nn.Module):
    def __init__(self, model_name, num_labels):
        super(LogicalReasoningModel, self).__init__()
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.classifier = nn.Sequential(
            nn.Linear(self.model.config.hidden_size, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_labels)
        )

    def forward(self, context, question, options):
        # input for the transformer model
        inputs = [
            self.tokenizer(
                f"{context} [SEP] {question} [SEP] {option}",
                truncation=True,
                padding="max_length",
                max_length=512,
                return_tensors="pt"
            ) for option in options
        ]

        # Process each option and aggregate
        outputs = []
        for input_data in inputs:
            input_data = {k: v.to(self.model.device) for k, v in input_data.items()}
            output = self.model(**input_data).last_hidden_state[:, 0, :]  # CLS token output
            outputs.append(output)

        # Stack outputs and classify
        logits = torch.stack([self.classifier(output) for output in outputs], dim=1)
        return logits

class LogicalReasoningDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length, is_train=True):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.is_train = is_train

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        context = row['context']
        question = row['question']
        options = eval(row['answers'])
        label = row['label'] if self.is_train else -1

        return context, question, options, label


## Training Configuration

In [5]:
# config
MODEL_NAME = "bert-base-uncased"
NUM_LABELS = 4
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5

def load_data(file_path):
    return pd.read_csv(file_path)

train_file_path = "train.csv"
test_file_path = "test.csv"
submission_file_path = "sample_submission.csv"

train_data = load_data(train_file_path)
test_data = load_data(test_file_path)
submission_template = load_data(submission_file_path)

train_dataset = LogicalReasoningDataset(train_data, AutoTokenizer.from_pretrained(MODEL_NAME), max_length=512, is_train=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Initialise
model = LogicalReasoningModel(model_name=MODEL_NAME, num_labels=NUM_LABELS)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimiser and loss
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

## Training loop

In [6]:

# Training loop
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for batch in train_loader:
        context, question, options, labels = batch
        labels = labels.to(device)

        logits = model(context[0], question[0], options[0])

        optimizer.zero_grad()
        loss = criterion(logits.view(-1, NUM_LABELS), labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {total_loss / len(train_loader)}")

# Save model
torch.save(model.state_dict(), "logical_reasoning_model.pth")


Epoch 1/9, Loss: 1.3919760584831238
Epoch 2/9, Loss: 1.3881862188207692
Epoch 3/9, Loss: 1.388550208765885
Epoch 4/9, Loss: 1.384347879064494
Epoch 5/9, Loss: 1.3705515018824874
Epoch 6/9, Loss: 1.2897828985904825
Epoch 7/9, Loss: 1.0378175092154536
Epoch 8/9, Loss: 0.6379564823261623
Epoch 9/9, Loss: 0.35939797795024414


## Prediction

In [7]:

# Prediction on test set
def predict_test_data(model, test_data, tokenizer, device):
    model.eval()
    test_dataset = LogicalReasoningDataset(test_data, tokenizer, max_length=512, is_train=False)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    predictions = []
    with torch.no_grad():
        for context, question, options, _ in test_loader:
            logits = model(context[0], question[0], options[0])
            probabilities = torch.softmax(logits, dim=-1)
            best_option = torch.argmax(probabilities, dim=-1).item()
            predictions.append(best_option)

    return predictions

# Generate predictions
predictions = predict_test_data(model, test_data, AutoTokenizer.from_pretrained(MODEL_NAME), device)

# Create submission file
submission_template['label'] = predictions
submission_template.to_csv("submission.csv", index=False)
