# Proof-of-Concept NLI Model

## 1. Load Dependencies

In [None]:
import torch
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

## 2. Load Tokenizer and Model

In [None]:
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=3)

label_mapping = {0: 'entailment', 1: 'contradiction', 2: 'neutral'}

## 3. Prepare Minimal NLI Dataset

In [None]:
premises = ["A man inspects the uniform of a figure.", "Two women are embracing.", "A soccer game with multiple players playing.", "A black dog is running through the snow."]
hypotheses = ["The man is sleeping.", "The women are arguing.", "Some men are playing a sport.", "A pet is enjoying the weather."]
labels = [1, 1, 0, 2]  # 0: entailment, 1: contradiction, 2: neutral

## 4. Tokenize Data

In [None]:
# Tokenize the pairs
encodings = tokenizer(premises, hypotheses, truncation=True, padding=True, return_tensors="pt")

# Create PyTorch Dataset
dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'], torch.tensor(labels))

# Create DataLoader
dataloader = DataLoader(dataset, batch_size=2) # Small batch size for PoC

## 5. Fine-tuning

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
num_epochs = 3

for epoch in range(num_epochs):
    for i, batch in enumerate(dataloader):
        # Move batch to device
        input_ids, attention_mask, batch_labels = [b.to(device) for b in batch]
        
        # Clear previous gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
        
        # Calculate loss
        loss = outputs.loss
        
        # Backward pass
        loss.backward()
        
        # Optimize
        optimizer.step()
        
        if (i + 1) % 1 == 0: # Print loss every batch for this small dataset
            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{len(dataloader)}], Loss: {loss.item():.4f}")

## 6. Inference Example

In [None]:
model.eval()

test_premise = "A cat is on a mat."
test_hypothesis = "A feline is resting."

inputs = tokenizer(test_premise, test_hypothesis, return_tensors="pt", truncation=True, padding=True)
inputs = {k: v.to(device) for k, v in inputs.items()}

with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
predicted_class_idx = torch.argmax(logits, dim=1).item()
predicted_label = label_mapping[predicted_class_idx]

print(f"Premise: {test_premise}")
print(f"Hypothesis: {test_hypothesis}")
print(f"Predicted Label: {predicted_label} (Class index: {predicted_class_idx})")

## 7. Save Model

In [None]:
import os

model_save_path = '../src/nli_model/' # Relative to notebook location
os.makedirs(model_save_path, exist_ok=True)

model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"Model and tokenizer saved to {os.path.abspath(model_save_path)}")