In [None]:
!pip install transformers pandas scikit-learn

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)

df = pd.read_csv('dataset_core_v2.csv')
df['numeric_label'] = df['label'].map({"entailment": 1, "non-entailment": 0})

def predict_with_probabilities(premise, hypothesis):
    inputs = tokenizer(premise, hypothesis, truncation=True, return_tensors="pt").to(device)
    outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=-1)
    entailment_prob = probs[0, 0].item()
    non_entailment_prob = (probs[0, 1] + probs[0, 2]).item()
    predicted_label = 1 if entailment_prob > non_entailment_prob else 0
    return predicted_label, entailment_prob, non_entailment_prob

predictions = []
for _, row in tqdm(df.iterrows(), total=df.shape[0]):
    premise, hypothesis, true_label, alternation = row['premise'], row['hypothesis'], row['numeric_label'], row['alternation']
    pred_label, entailment_prob, non_entailment_prob = predict_with_probabilities(premise, hypothesis)
    predictions.append([premise, hypothesis, true_label, pred_label, entailment_prob, non_entailment_prob, alternation])

detailed_df = pd.DataFrame(predictions, columns=['Premise', 'Hypothesis', 'True Label', 'Predicted Label', 'Entailment Prob', 'Non-Entailment Prob', 'Alternation'])
detailed_df.to_csv('predictions_DeBERTa_base.csv', index=False)

metrics = []
for alt in df['alternation'].unique():
    alt_df = detailed_df[detailed_df['Alternation'] == alt]
    accuracy = accuracy_score(alt_df['True Label'], alt_df['Predicted Label'])
    f1_ent = f1_score(alt_df[alt_df['True Label'] == 1]['True Label'], alt_df[alt_df['True Label'] == 1]['Predicted Label'], pos_label=1) if len(alt_df[alt_df['True Label'] == 1]) > 0 else None
    f1_non_ent = f1_score(alt_df[alt_df['True Label'] == 0]['True Label'], alt_df[alt_df['True Label'] == 0]['Predicted Label'], pos_label=0) if len(alt_df[alt_df['True Label'] == 0]) > 0 else None
    metrics.append([alt, accuracy, f1_ent, f1_non_ent])

metrics_df = pd.DataFrame(metrics, columns=['Alternation', 'Accuracy', 'F1 Entailment', 'F1 Non-Entailment'])
metrics_df.to_csv('metrics_by_alternation_DeBERTa_base.csv', index=False)

overall_accuracy = accuracy_score(detailed_df['True Label'], detailed_df['Predicted Label'])
print(f"Overall Accuracy: {overall_accuracy:.4f}")


  0%|          | 0/500 [00:00<?, ?it/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Overall Accuracy: 0.6340
