<a href="https://colab.research.google.com/github/eriksali/DNN_2023_NLP/blob/main/NLP_Pre_trained_Transformer_based_Models_reload_zero_shot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets !pip install datasets 
!pip install apache_beam
!pip install transformers

In [None]:
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the tokenizer and the model
tokenizer_bert = AutoTokenizer.from_pretrained("bert-base-cased")
model_bert = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=len(emotions))

tokenizer_distilbert = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model_distilbert = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=len(emotions))

# Tokenize the dataset
'''def tokenize_dataset(dataset):
    return tokenizer_bert(dataset["text"], padding=True, truncation=True)

tokenized_dataset = dataset_dict.map(tokenize_dataset, batched=True)'''

def tokenize_dataset(dataset):
    return tokenizer_bert(dataset["text"], padding=True, truncation=True, max_length=64)

tokenized_dataset = dataset_dict.map(tokenize_dataset, batched=True)


# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Define the trainer
trainer_bert = Trainer(
    model=model_bert,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

trainer_distilbert = Trainer(
    model=model_distilbert,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

# Fine-tune the model
trainer_bert.train()
trainer_distilbert.train()

# Evaluate the model
eval_results_bert = trainer_bert.evaluate()
eval_results_distilbert = trainer_distilbert.evaluate()

print(f"BERT evaluation results: {eval_results_bert}")
print(f"DistilBERT evaluation results: {eval_results_distilbert}")




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,No log,0.358048
2,0.666000,0.250329
3,0.666000,0.263554




Epoch,Training Loss,Validation Loss
1,No log,1.55841
2,1.573200,1.406392
3,1.573200,1.292105


BERT evaluation results: {'eval_loss': 0.26355400681495667, 'eval_runtime': 218.3504, 'eval_samples_per_second': 4.58, 'eval_steps_per_second': 0.289, 'epoch': 3.0}
DistilBERT evaluation results: {'eval_loss': 1.292104721069336, 'eval_runtime': 120.0263, 'eval_samples_per_second': 8.332, 'eval_steps_per_second': 0.525, 'epoch': 3.0}


In [None]:


# Save the fine-tuned BERT model
bert_model_path = "./bert_emotion_classification"
model_bert.save_pretrained(bert_model_path)

# Save the fine-tuned DistilBERT model
distilbert_model_path = "./distilbert_emotion_classification"
model_distilbert.save_pretrained(distilbert_model_path)


In [None]:
# Define output directory
output_dir = './emotion_classification_bert/'

import os
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save model to output directory
model_bert.save_pretrained(output_dir)

# Save tokenizer to output directory
tokenizer_bert.save_pretrained(output_dir)

!zip -r emotion_classification_bert.zip emotion_classification_bert

# Define output directory
output_dir = './emotion_classification_distilbert/'

import os
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save model to output directory
model_distilbert.save_pretrained(output_dir)

# Save tokenizer to output directory
tokenizer_distilbert.save_pretrained(output_dir)

!zip -r emotion_classification_distilbert.zip emotion_classification_distilbert



updating: emotion_classification_bert/ (stored 0%)
updating: emotion_classification_bert/pytorch_model.bin (deflated 7%)
updating: emotion_classification_bert/vocab.txt (deflated 49%)
updating: emotion_classification_bert/config.json (deflated 54%)
updating: emotion_classification_bert/tokenizer.json (deflated 70%)
updating: emotion_classification_bert/special_tokens_map.json (deflated 42%)
updating: emotion_classification_bert/tokenizer_config.json (deflated 46%)
updating: emotion_classification_distilbert/ (stored 0%)
updating: emotion_classification_distilbert/pytorch_model.bin (deflated 8%)
updating: emotion_classification_distilbert/config.json (deflated 52%)
  adding: emotion_classification_distilbert/vocab.txt (deflated 53%)
  adding: emotion_classification_distilbert/tokenizer.json (deflated 71%)
  adding: emotion_classification_distilbert/special_tokens_map.json (deflated 42%)
  adding: emotion_classification_distilbert/tokenizer_config.json (deflated 42%)


In [2]:
!unzip -uq "/content/emotion_classification_bert.zip" -d "/content/" 

from transformers import BertForSequenceClassification, BertTokenizer
# Load saved model
model_bert = BertForSequenceClassification.from_pretrained('emotion_classification_bert')

# Load saved tokenizer
tokenizer_bert = BertTokenizer.from_pretrained('emotion_classification_bert')

sentence = "Hello, how are you?"
inputs = tokenizer_bert(sentence, return_tensors='pt')
outputs = model_bert(**inputs)
print(outputs)


!unzip -uq "/content/emotion_classification_distilbert.zip" -d "/content/" 

from transformers import DistilBertTokenizer, DistilBertModel
# Load saved model
model_distilbert = DistilBertModel.from_pretrained('emotion_classification_distilbert')

# Load saved tokenizer
tokenizer_distilbert = DistilBertTokenizer.from_pretrained('emotion_classification_distilbert')

sentence = "Hello, how are you?"
inputs = tokenizer_distilbert(sentence, return_tensors='pt')
outputs = model_distilbert(**inputs)
print(outputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.4119,  2.9381, -0.9873,  0.1655, -0.1946, -0.9263]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)


Some weights of the model checkpoint at emotion_classification_distilbert were not used when initializing DistilBertModel: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BaseModelOutput(last_hidden_state=tensor([[[-0.4762, -0.8302,  0.0108,  ..., -0.0368,  1.4159,  0.0619],
         [ 0.0206, -0.4679,  0.2187,  ...,  0.2304,  1.4955, -0.3915],
         [-0.4497,  0.0608,  0.5119,  ..., -0.3182,  0.9448, -0.1339],
         ...,
         [-0.0048, -1.0545,  0.6014,  ...,  0.2451,  1.2031, -0.5100],
         [-0.2260, -0.8109, -0.1103,  ..., -0.2533,  1.4551, -0.1857],
         [ 0.7890,  0.1797, -0.5541,  ...,  0.3301, -0.0665, -0.5065]]],
       grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)


In [1]:
# zero-shot-classification

from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification")

# Define the test data
test_data = [
    {"text": "That's so gay."},
    {"text": "I love this song!"},
    {"text": "This pizza is terrible."},
    {"text": "I can't believe she said that."},
]

# Define the candidate labels
candidate_labels = ["offensive", "non-offensive"]

# Evaluate the model on the test data
true_labels = ["offensive", "non-offensive", "offensive", "non-offensive"]
predicted_labels = [classifier(example["text"], candidate_labels)["labels"][0] for example in test_data]

accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, pos_label="offensive")
recall = recall_score(true_labels, predicted_labels, pos_label="offensive")
f1 = f1_score(true_labels, predicted_labels, pos_label="offensive")

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 score: {f1}")


No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Accuracy: 0.75
Precision: 0.6666666666666666
Recall: 1.0
F1 score: 0.8


In [6]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "textattack/roberta-base-MNLI"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

test_data = [
    {"text": "I am feeling happy today", "label": "Positive"},
    {"text": "I am not feeling well", "label": "Negative"},
    {"text": "The movie was awesome", "label": "Positive"},
    {"text": "I hate it when it rains", "label": "Negative"},
    {"text": "The food was delicious", "label": "Positive"},
]

inputs = tokenizer(
    [sample["text"] for sample in test_data],
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=128,
)

outputs = model(**inputs)
predictions = outputs.logits.argmax(dim=-1)

from sklearn.metrics import classification_report


labels = [sample["label"] for sample in test_data]
label_map = {"Positive": 0, "Negative": 1}
label_indices = [label_map[label] for label in labels]
print(classification_report(label_indices, predictions, digits=4))




##################################################################

from transformers import AutoModelForSequenceClassification, AutoTokenizer

from transformers import BertForSequenceClassification, BertTokenizer
# Load saved model
model = BertForSequenceClassification.from_pretrained('emotion_classification_bert')

# Load saved tokenizer
tokenizer = BertTokenizer.from_pretrained('emotion_classification_bert')

'''##model_name = "mrm8488/emotion_classification_bert"
model_name = "emotion_classification_bert"
tokenizer = AutoTokenizer.from_prepared(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)'''

## Prepare the input for the model: We need to tokenize the input text and convert it to a format that can be input to the fine-tuned model.

inputs = tokenizer(
    [sample["text"] for sample in test_data],
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=128,
)

outputs = model(**inputs)
predictions = outputs.logits.argmax(dim=-1)

from sklearn.metrics import classification_report

labels = [sample["label"] for sample in test_data]
label_map = {"Positive": 0, "Negative": 1}
label_indices = [label_map[label] for label in labels]
print(classification_report(label_indices, predictions, digits=4))



Some weights of the model checkpoint at textattack/roberta-base-MNLI were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000         3
           1     0.4000    1.0000    0.5714         2

    accuracy                         0.4000         5
   macro avg     0.2000    0.5000    0.2857         5
weighted avg     0.1600    0.4000    0.2286         5

              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000         3
           1     0.2500    0.5000    0.3333         2

    accuracy                         0.2000         5
   macro avg     0.1250    0.2500    0.1667         5
weighted avg     0.1000    0.2000    0.1333         5



In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch

# Load the labeled dataset
dataset = [
    {"text": "I'm so happy!", "label": "happy"},
    {"text": "I'm feeling sad today.", "label": "sad"},
    {"text": "I'm so angry right now.", "label": "angry"},
    # more examples...
]

# Define the label mapping
label_map = {"happy": 0, "sad": 1, "angry": 2, ...}

# Define the batch size and number of epochs
batch_size = 8
num_epochs = 3

# Load the emotion_classification_bert model and tokenizer
model_name = "mrm8488/emotion_classification_bert"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(label_map))

# Tokenize the dataset
tokenized_dataset = tokenizer(
    [example["text"] for example in dataset],
    padding=True,
    truncation=True,
    max_length=128 # for example, set the maximum sequence length to 128
)

dataloader = DataLoader(tokenized_dataset, batch_size=batch_size, shuffle=True)
optimizer = torch.optim.Adam(model.parameters())
loss_fn = torch.nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    for batch in dataloader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = loss_fn(outputs.logits, labels)
            
            loss.backward()
            optimizer.step()

# Evaluate the model after each epoch
predictions = []
true_labels = []
for batch in dataloader:
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)
    labels = batch["labels"].to(device)
    
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    
    _, batch_predictions = torch.max(outputs.logits, dim=1)
    predictions.extend(batch_predictions.cpu().numpy())
    true_labels.extend(labels.cpu().numpy())

acc = accuracy_score(true_labels, predictions)
precision = precision_score(true_labels, predictions, average="weighted")
recall = recall_score(true_labels, predictions, average="weighted")
f1 = f1_score(true_labels, predictions, average="weighted")

print(f"Accuracy: {acc:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F




In [None]:
inputs = tokenizer_bert('Hello, how are you?', return_tensors='pt')
outputs = model_bert(**inputs)
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[-0.4119,  2.9381, -0.9873,  0.1655, -0.1946, -0.9263]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

# Load the dataset
dataset = datasets.load_dataset('go_emotions', split='train[:80%]')

# Load the tokenizer and encode the dataset
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
encoded_dataset = dataset.map(lambda examples: tokenizer(examples['text'], padding=True, truncation=True), batched=True)

# Load the pre-trained BERT model
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=28)

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    evaluation_strategy='epoch',     # evaluate every epoch
    learning_rate=2e-5,              # learning rate
    per_device_train_batch_size=32,  # batch size for training
    per_device_eval_batch_size=64,   # batch size for evaluation
    num_train_epochs=3,              # total number of training epochs
    weight_decay=0.01,               # weight decay
    push_to_hub=False,               # whether to push the fine-tuned model to the Hugging Face model hub
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    load_best_model_at_end=True,     # load the best model at the end of training
    metric_for_best_model="accuracy",
)

# Define the Trainer object and fine-tune the BERT model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

# Save the fine-tuned BERT model
trainer.save_model('./models/bert_emotion_classification')

# Fine-tune the DistilBERT model
distilbert_model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=28)
distilbert_trainer = Trainer(
    model=distilbert_model,
    args=training_args,
    train_dataset=encoded_dataset,
    compute_metrics=compute_metrics,
)
distilbert_trainer.train()

# Save the fine-tuned DistilBERT model
distilbert_trainer.save_model('./models/distilbert_emotion_classification')

# Load the test dataset and encode it
test_dataset = datasets.load_dataset('go_emotions', split='train[80%:]')
encoded_test_dataset = test_dataset.map(lambda examples: tokenizer(examples['text'], padding=True, truncation=True), batched=True)

# Define the function to compute the metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    accuracy = (predictions == labels).mean()
    precision = precision_score(labels, predictions, average='macro')
    recall = recall_score(labels, predictions, average='macro')
    f1 = f1_score(labels, predictions, average='macro')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

# Evaluate the fine-tuned BERT model on the test dataset
bert_eval_result = trainer.evaluate(encoded_test_dataset)
print("BERT Evaluation Result:")
for key, value in bert_eval_result.items():
    print(f"{key}: {value:.4f}")

# Evaluate the fine-tuned DistilBERT model on the test dataset
distilbert_eval_result = distilbert_trainer.evaluate(encoded_test_dataset)
print("DistilBERT Evaluation Result:")
for key, value in distilbert_eval_result.items():
  print(f"{key}: {value:.4f}")


In [None]:
# Define output directory
output_dir = './emotion_classification_bert/'

# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save model to output directory
model_bert.save_pretrained(output_dir)

# Load saved model
model_bert = BertForSequenceClassification.from_pretrained(output_dir)

# Save tokenizer to output directory
tokenizer.save_pretrained(output_dir)
# Load saved tokenizer
tokenizer = BertTokenizer.from_pretrained(output_dir)


In [None]:
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the emotion dataset
dataset = load_dataset("emotion")

# Instantiate the zero-shot classification pipeline
classifier = pipeline(
    "zero-shot-classification",
    model="joeddav/bart-large-mnli-yahoo-answers",
    tokenizer="joeddav/bart-large-mnli-yahoo-answers",
)

# Example prompt for offensive classification
prompt = "Is this text offensive? Answer yes or no.\n"

# Use a few examples from the dataset to create classification prompts
text = dataset["train"]["text"][:5]
labels = dataset["train"]["label"][:5]
prompts = [prompt + t for t in text]

# Use the zero-shot classifier on the test set
test_text = dataset["test"]["text"]
test_labels = dataset["test"]["label"]
zero_shot_preds = classifier(prompts, test_text)

# Evaluate zero-shot classification
correct = 0
total = len(test_labels)
for i, pred in enumerate(zero_shot_preds):
    label_pred = pred["labels"][0]
    if label_pred == labels[i]:
        correct += 1

accuracy = correct / total
print(f"Zero-shot classification accuracy: {accuracy:.4f}")


In [None]:
from transformers import pipeline

zero_shot_classifier = pipeline("zero-shot-classification", model="EleutherAI/gpt-neo-2.7B")

prompts = ["Is this text about anger, fear, joy, love, sadness, or surprise?",
           "What is the emotion expressed in this text?",
           "Can you classify the emotion in this text?",
           "Which of these emotions best describes the sentiment in this text?"]

zero_shot_results = []
for prompt in prompts:
    zero_shot_results.append(zero_shot_classifier(test_data["text"], candidate_labels=["anger", "fear", "joy", "love", "sadness", "surprise"], prompt=prompt))

print("Zero-shot classification results:", zero_shot_results)


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/10.7G [00:00<?, ?B/s]