# Imports

In [1]:
import json
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW

from tqdm import tqdm

In [2]:
tokenizer = T5Tokenizer.from_pretrained("t5-base")

train_jsonl = "alphanli-train-dev/train.jsonl"
train_labels = "alphanli-train-dev/train-labels.lst"
dev_jsonl = "alphanli-train-dev/dev.jsonl"
dev_labels = "alphanli-train-dev/dev-labels.lst"

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


# Helpers

In [3]:
def load_data(jsonl_file, labels_file):
    data = []
    with open(jsonl_file, "r") as f_json, open(labels_file, "r") as f_labels:
        labels = [int(line.strip()) for line in f_labels.readlines()]
        for idx, line in enumerate(f_json):
            entry = json.loads(line.strip())
            data.append({
                "obs1": entry["obs1"],
                "obs2": entry["obs2"],
                "hyp1": entry["hyp1"],
                "hyp2": entry["hyp2"],
                "label": labels[idx]
            })
    return data

train_data = load_data(train_jsonl, train_labels)
dev_data = load_data(dev_jsonl, dev_labels)

def format_input(entry):
    return f"obs1: {entry['obs1']} obs2: {entry['obs2']} hyp1: {entry['hyp1']} hyp2: {entry['hyp2']} Which hypothesis is more plausible?"

class aNLIDataset(Dataset):
    def __init__(self, data, tokenizer, max_len=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        input_text = format_input(entry)
        label_text = str(entry["label"])  # "1" or "2"

        inputs = self.tokenizer(input_text, max_length=self.max_len, padding="max_length", truncation=True, return_tensors="pt")
        labels = self.tokenizer(label_text, max_length=2, padding="max_length", truncation=True, return_tensors="pt")

        return {
            "input_ids": inputs["input_ids"].squeeze(),
            "attention_mask": inputs["attention_mask"].squeeze(),
            "labels": labels["input_ids"].squeeze()
        }

train_dataset = aNLIDataset(train_data, tokenizer)
dev_dataset = aNLIDataset(dev_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=8)

# Train

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device)
optimizer = AdamW(model.parameters(), lr=3e-5)

epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        optimizer.zero_grad()
        
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        
        loss.backward()
        optimizer.step()
    
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}")

    output_dir = f"t5-aNLI_epoch_{epoch+1}"
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    print(f"Saved model checkpoint to {output_dir}")

final_output_dir = "t5-aNLI_final"
model.save_pretrained(final_output_dir)
tokenizer.save_pretrained(final_output_dir)
print(f"Saved final model to {final_output_dir}")

100%|██████████| 21207/21207 [33:39<00:00, 10.50it/s]


Epoch 1: Loss = 0.1561
Saved model checkpoint to t5-aNLI_epoch_1


100%|██████████| 21207/21207 [33:36<00:00, 10.52it/s]


Epoch 2: Loss = 0.0893
Saved model checkpoint to t5-aNLI_epoch_2


100%|██████████| 21207/21207 [33:48<00:00, 10.46it/s]


Epoch 3: Loss = 0.0575
Saved model checkpoint to t5-aNLI_epoch_3


100%|██████████| 21207/21207 [33:46<00:00, 10.46it/s]


Epoch 4: Loss = 0.0393
Saved model checkpoint to t5-aNLI_epoch_4


100%|██████████| 21207/21207 [33:45<00:00, 10.47it/s]


Epoch 5: Loss = 0.0279
Saved model checkpoint to t5-aNLI_epoch_5
Saved final model to t5-aNLI_final


# Eval

In [17]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

model_dir = "t5-aNLI_epoch_5"
tokenizer = T5Tokenizer.from_pretrained(model_dir)
model = T5ForConditionalGeneration.from_pretrained(model_dir).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in tqdm(dev_loader, desc="Evaluating"):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)

        generated_ids = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=2)
        
        preds = [tokenizer.decode(g, skip_special_tokens=True).strip() for g in generated_ids]
        true_labels = [tokenizer.decode(lbl, skip_special_tokens=True).strip() for lbl in batch["labels"]]
        
        preds = [int(p) for p in preds]
        true_labels = [int(t) for t in true_labels]
        
        all_preds.extend(preds)
        all_labels.extend(true_labels)

accuracy = accuracy_score(all_labels, all_preds)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average="binary", pos_label=1)

print(f"Dev Accuracy: {accuracy:.4f}")
print(f"Dev Precision: {precision:.4f}")
print(f"Dev Recall: {recall:.4f}")
print(f"Dev F1-score: {f1:.4f}")

Evaluating: 100%|██████████| 192/192 [00:06<00:00, 27.45it/s]

Dev Accuracy: 0.6749
Dev Precision: 0.6845
Dev Recall: 0.6722
Dev F1-score: 0.6783



