In [2]:
# ## 1. Install Required Packages (uncomment if needed)
# !pip install transformers torch pandas tqdm scikit-learn

# ## 2. Import Libraries
import pandas as pd
import torch
from transformers import BartTokenizer, BartForSequenceClassification
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# ## 3. Load the Dataset
data_path = r"C:\Users\yozev\PycharmProjects\Probing-Slang-Ambiguity-in-LLM\using_claude\manual_slang_dataset.csv"
df = pd.read_csv(data_path).rename(columns={"sentence": "text", "binary": "label"})
print("Dataset size:", len(df))
display(df.head())

# ## 4. Initialize Tokenizer and Model (Pure PyTorch NLI)
model_name = "facebook/bart-large-mnli"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForSequenceClassification.from_pretrained(model_name)
model.eval()  # disable dropout

# Use GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# ## 5. Zero-Shot Prediction Function
def zero_shot_predict(sentences, batch_size=16):
    """
    For each sentence in `sentences`, computes:
      P_entail(slang) vs. P_entail(literal)
    using the NLI model. Whichever entailment probability is higher
    becomes the predicted label.
    """
    predictions = []
    for i in tqdm(range(0, len(sentences), batch_size), desc="Zero-shot batches"):
        batch_texts = sentences[i : i + batch_size]

        # Prepare two hypotheses for each text
        hyp_slang = ["This example is slang." for _ in batch_texts]
        hyp_literal = ["This example is literal." for _ in batch_texts]

        # Tokenize premise–hypothesis pairs
        enc_slang = tokenizer(batch_texts, hyp_slang, padding=True, truncation=True, return_tensors="pt")
        enc_literal = tokenizer(batch_texts, hyp_literal, padding=True, truncation=True, return_tensors="pt")

        # Move to device
        enc_slang = {k: v.to(device) for k, v in enc_slang.items()}
        enc_literal = {k: v.to(device) for k, v in enc_literal.items()}

        # Forward pass
        with torch.no_grad():
            out_slang = model(**enc_slang)
            out_literal = model(**enc_literal)

        # Logits shape: (batch_size, 3) → [contradiction, neutral, entailment]
        # We take the entailment probability (index 2)
        probs_slang = torch.softmax(out_slang.logits, dim=1)[:, 2]
        probs_literal = torch.softmax(out_literal.logits, dim=1)[:, 2]

        # Compare entailment probabilities for each example
        for ps, pl in zip(probs_slang.cpu(), probs_literal.cpu()):
            if ps > pl:
                predictions.append("slang")
            else:
                predictions.append("literal")

    return predictions

# ## 6. Run Zero-Shot Classification on Entire Dataset
sentences = df["text"].tolist()
true_labels = ["slang" if lab == 1 else "literal" for lab in df["label"].tolist()]

predicted_labels = zero_shot_predict(sentences, batch_size=16)

# ## 7. Compute Accuracy
acc = accuracy_score(true_labels, predicted_labels)
print(f"\nZero-Shot NLI Accuracy: {acc:.4f}")


Dataset size: 736


Unnamed: 0,text,label
0,"That new song is absolutely fire, I can't stop...",1
1,The fire department responded quickly to the h...,0
2,"Your outfit is so bad, everyone's going to be ...",1
3,I got a bad grade on my chemistry test yesterday,0
4,"She killed that performance, the crowd went wild",1


Zero-shot batches: 100%|██████████| 46/46 [01:14<00:00,  1.63s/it]


Zero-Shot NLI Accuracy: 0.5136



