# Spacy - Natural Language Inference (SNLI) Task 

In [None]:
import spacy
import numpy as np
import spacy
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.amp import autocast, GradScaler
from models.esim import ESIM

from utils.utils import (
    load_nli_data,
    get_embeddings_spacy,
    tokenize_to_ids,
    compute_lengths,
    evaluate,
)

nlp = spacy.load(
    "en_core_web_lg",
    exclude=[
        "parser",
        "tagger",
        "ner",
        "textcat",
        "lemmatizer",
        "attribute_ruler",
        "tok2vec",
    ],
)
print("unique vector size", len(nlp.vocab.vectors))

# Hyper‑parameters
MAX_LEN = 64
BATCH_SIZE = 128
EPOCHS = 3
HIDDEN = 512
NUM_CLASSES = 3
LR = 1e-3
NR_UNK = 100

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device:", device)

label_map = {"entailment": 0, "contradiction": 1, "neutral": 2}

# reverse it: id→name
id2label = {v: k for k, v in label_map.items()}

In [None]:
snli_train = load_nli_data("data/snli_1.0_train.jsonl")
snli_dev = load_nli_data("data/snli_1.0_dev.jsonl")
snli_test = load_nli_data("data/snli_1.0_test.jsonl")

# 1-Tokenization and Preprocessing NLI Pairs
* * Following function uses "tokenize_to_ids" to convert the sentences into token ids. Process is repeated for all NLI sets

In [None]:
def process_and_save_nli_data(df, name, nlp, max_length=MAX_LEN, nr_unk=NR_UNK):

    np.savez_compressed(
        f"data/{name}.npz",
        sentence1_tokens=tokenize_to_ids(
            df["sentence1"], nlp=nlp, max_length=max_length, nr_unk=nr_unk
        ),
        sentence2_tokens=tokenize_to_ids(
            df["sentence2"], nlp=nlp, max_length=max_length, nr_unk=nr_unk
        ),
        label=df["label"],
    )

    print(f"Saved {name}")


# Process each dataset
process_and_save_nli_data(snli_train, "train", nlp)
process_and_save_nli_data(snli_dev, "dev", nlp)
process_and_save_nli_data(snli_test, "test", nlp)

# 2-Extract Embedding Matrix from Spacy NLP

In [None]:
embedding_matrix = get_embeddings_spacy(nlp, NR_UNK)
np.save("data/embedding_matrix.npy", embedding_matrix)
print("Saved emb_matrix.npy with shape", embedding_matrix.shape)

# 3- Load the Processed NLI Dataset and Embedding Matrix

In [None]:
# Load the data
train_data = np.load("data/train.npz")
dev_data = np.load("data/dev.npz")
test = np.load("data/test.npz")

x1_train = torch.tensor(train_data["sentence1_tokens"], dtype=torch.long)
x2_train = torch.tensor(train_data["sentence2_tokens"], dtype=torch.long)
y_train = torch.tensor(train_data["label"], dtype=torch.long)

x1_dev = torch.tensor(dev_data["sentence1_tokens"], dtype=torch.long)
x2_dev = torch.tensor(dev_data["sentence2_tokens"], dtype=torch.long)
y_dev = torch.tensor(dev_data["label"], dtype=torch.long)

x1_test = torch.tensor(test["sentence1_tokens"], dtype=torch.long)
x2_test = torch.tensor(test["sentence2_tokens"], dtype=torch.long)
y_test = torch.tensor(test["label"], dtype=torch.long)

# Datasets & loaders
train_ds = TensorDataset(x1_train, x2_train, y_train)
dev_ds = TensorDataset(x1_dev, x2_dev, y_dev)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
dev_loader = DataLoader(dev_ds, batch_size=BATCH_SIZE)
test_loader = DataLoader(TensorDataset(x1_test, x2_test, y_test), batch_size=BATCH_SIZE)

# Load embedding matrix
emb_mat = torch.tensor(np.load("data/embedding_matrix.npy"), dtype=torch.float32)

# 4-Train the ESIM Model
* Inıtialize the ESIM model with the embedding matrix and compile it.

In [None]:
# Model, optimizer, scaler, loss
model = ESIM(
    embedding_matrix=emb_mat,
    hidden_size=HIDDEN,
    num_classes=NUM_CLASSES,
    dropout=0.5,
    padding_idx=0,
).to(device)

model = torch.compile(model, backend="inductor")

opt = optim.Adam(model.parameters(), lr=LR)
scaler = GradScaler()
crit = nn.CrossEntropyLoss()

* Train the ESIM model

In [None]:
for epoch in range(1, EPOCHS + 1):
    model.train()
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS}", leave=False)
    running_loss = 0
    running_correct = 0
    samples = 0

    for x1, x2, y in pbar:
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        l1, l2 = compute_lengths(x1), compute_lengths(x2)

        opt.zero_grad()
        with autocast(device_type=device.type):
            logits = model(x1, l1, x2, l2)
            loss = crit(logits, y)
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()

        bs = y.size(0)
        running_loss += loss.item() * bs
        running_correct += (logits.argmax(1) == y).sum().item()
        samples += bs

        pbar.set_postfix(
            loss=f"{running_loss / samples:.4f}", acc=f"{running_correct / samples:.4f}"
        )

    # optional end‑of‑epoch eval
    dev_loss, dev_acc = evaluate(model, dev_loader, crit, device, return_loss=True)
    print(f"→ Dev  loss: {dev_loss:.4f}, acc: {dev_acc:.4f}")

* Save the model and later load to see outsample test scores of model accuracy

In [None]:
torch.save(model, "data/esim_nli_model.pt")

In [None]:
model = torch.load("data/esim_nli_model.pt", map_location=device, weights_only=False)
model.eval()

In [None]:
# Load the trained model and test it on outsample dataset

test_acc = evaluate(
    model=model, loader=test_loader, crit=None, device=device, return_loss=False
)
print(f"Test acc: {test_acc:.4f}")