In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from collections import Counter, defaultdict
import string
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.metrics import accuracy_score
import lime
from lime.lime_text import LimeTextExplainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("snli")

# Initialize model + tokenizer
model_name = "google/electra-small-discriminator"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
if hasattr(model, 'electra'):
        for param in model.electra.parameters():
            if not param.is_contiguous():
                param.data = param.data.contiguous()
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

def preprocess(example):
    return tokenizer(example['premise'], example['hypothesis'], truncation=True, padding='max_length', max_length=tokenizer.model_max_length)

dataset = dataset.filter(lambda ex: ex['label'] != -1)
encoded_dataset = dataset.map(preprocess, batched=True)
encoded_dataset = encoded_dataset.rename_column("label", "labels")  # Ensure labels are named correctly
encoded_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=500,
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset['train'],
    eval_dataset=encoded_dataset['validation'],
)
trainer.train()

# Save the trained model and tokenizer
output_dir = "./electra-snli-model"
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

In [3]:
output_dir = "./model"

model = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

In [None]:
#Evaluate model accuracy on SNLI validation

# Define the compute_metrics function
def compute_metrics(pred):
    predictions = pred.predictions.argmax(axis=-1)
    labels = pred.label_ids
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc}

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=500,
)

# Reinitialize the Trainer with compute_metrics
trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=encoded_dataset['validation'],
    compute_metrics=compute_metrics,
)

# Evaluate the model
validation_results = trainer.evaluate()
print(f"Validation accuracy: {validation_results['eval_accuracy']:.4f}")


100%|██████████| 616/616 [13:18<00:00,  1.30s/it]

Validation accuracy: 0.8953





In [None]:
# Evaluate model accuracy on MNLI validation
mnli = load_dataset("multi_nli")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 392702/392702 [00:00<00:00, 529445.07 examples/s]
Generating validation_matched split: 100%|██████████| 9815/9815 [00:00<00:00, 446033.35 examples/s]
Generating validation_mismatched split: 100%|██████████| 9832/9832 [00:00<00:00, 409586.49 examples/s]


In [12]:
mnli = mnli.filter(lambda ex: ex['label'] != -1)
def preprocess_mnli(examples):
    return tokenizer(
        examples["premise"],  # MNLI uses "premise" and "hypothesis"
        examples["hypothesis"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )

# Apply preprocessing
tokenized_mnli = mnli.map(preprocess_mnli, batched=True)

# Ensure column naming consistency
tokenized_mnli = tokenized_mnli.rename_column("label", "labels")  # Ensure the label column is named 'labels'
tokenized_mnli.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Filter: 100%|██████████| 392702/392702 [00:13<00:00, 29151.66 examples/s]
Filter: 100%|██████████| 9815/9815 [00:00<00:00, 29204.66 examples/s]
Filter: 100%|██████████| 9832/9832 [00:00<00:00, 29697.22 examples/s]
Map: 100%|██████████| 392702/392702 [00:59<00:00, 6612.24 examples/s]
Map: 100%|██████████| 9815/9815 [00:01<00:00, 6363.68 examples/s]
Map: 100%|██████████| 9832/9832 [00:01<00:00, 7061.63 examples/s]


In [13]:
mnli_results = trainer.evaluate(eval_dataset=tokenized_mnli["validation_matched"])
print(f"MNLI Matched accuracy: {mnli_results['eval_accuracy']:.4f}")

mnli_results_mismatched = trainer.evaluate(eval_dataset=tokenized_mnli["validation_mismatched"])
print(f"MNLI Mismatched accuracy: {mnli_results_mismatched['eval_accuracy']:.4f}")


100%|██████████| 614/614 [02:03<00:00,  4.97it/s]


MNLI Matched accuracy: 0.7018


100%|██████████| 615/615 [02:09<00:00,  4.76it/s]

MNLI Mismatched accuracy: 0.7122



