<a href="https://colab.research.google.com/github/eriksali/DNN_2023_NLP/blob/main/NLP_Pre_trained_Transformer_based_Models_zero-shot-classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets 
!pip install apache_beam
!pip install transformers

In [None]:
import torch
import numpy as np
from datasets import load_dataset
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, f1_score

# Load the "emotion" dataset
dataset = load_dataset("emotion")

# Split the dataset into training, testing, and validation sets
train_data = dataset["train"]
val_data = dataset["validation"]
test_data = dataset["test"]

# Check the dataset size for each split
print("Total number of examples:", len(dataset))
print("Training set size:", len(train_data))
print("Validation set size:", len(val_data))
print("Testing set size:", len(test_data))

# Get the number of items from each class in each split
train_counts = np.unique(train_data["label"], return_counts=True)
val_counts = np.unique(val_data["label"], return_counts=True)
test_counts = np.unique(test_data["label"], return_counts=True)
class_names = dataset["train"].features["label"].names

for i, label_count in enumerate(train_counts[1]):
    print(f"Number of items from {class_names[i]} class in training set:", label_count)
    print(f"Number of items from {class_names[i]} class in testing set:", test_counts[1][i])
    print(f"Number of items from {class_names[i]} class in validation set:", val_counts[1][i])

# Calculate the average length of text in each split
train_lengths = [len(text.split()) for text in train_data["text"]]
val_lengths = [len(text.split()) for text in val_data["text"]]
test_lengths = [len(text.split()) for text in test_data["text"]]
print("Average length of text in the training set:", np.mean(train_lengths))
print("Average length of text in the validation set:", np.mean(val_lengths))
print("Average length of text in the testing set:", np.mean(test_lengths))

# Extract the input features (word embeddings) using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_data["text"])
X_val = vectorizer.transform(val_data["text"])
X_test = vectorizer.transform(test_data["text"])
y_train = train_data["label"]
y_val = val_data["label"]
y_test = test_data["label"]

# Check the vocabulary size and the number of examples for each split
print("Vocabulary size:", X_train.shape[1])
print("Number of examples in the training set:", X_train.shape[0])
print("Number of examples in the validation set:", X_val.shape[0])
print("Number of examples in the testing set:", X_test.shape[0])




  0%|          | 0/3 [00:00<?, ?it/s]

Total number of examples: 3
Training set size: 16000
Validation set size: 2000
Testing set size: 2000
Number of items from sadness class in training set: 4666
Number of items from sadness class in testing set: 581
Number of items from sadness class in validation set: 550
Number of items from joy class in training set: 5362
Number of items from joy class in testing set: 695
Number of items from joy class in validation set: 704
Number of items from love class in training set: 1304
Number of items from love class in testing set: 159
Number of items from love class in validation set: 178
Number of items from anger class in training set: 2159
Number of items from anger class in testing set: 275
Number of items from anger class in validation set: 275
Number of items from fear class in training set: 1937
Number of items from fear class in testing set: 224
Number of items from fear class in validation set: 212
Number of items from surprise class in training set: 572
Number of items from surpri

In [None]:
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the tokenizer and the model
tokenizer_bert = AutoTokenizer.from_pretrained("bert-base-cased")
model_bert = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=len(emotions))

tokenizer_distilbert = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model_distilbert = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=len(emotions))

# Tokenize the dataset
'''def tokenize_dataset(dataset):
    return tokenizer_bert(dataset["text"], padding=True, truncation=True)

tokenized_dataset = dataset_dict.map(tokenize_dataset, batched=True)'''

def tokenize_dataset(dataset):
    return tokenizer_bert(dataset["text"], padding=True, truncation=True, max_length=64)

tokenized_dataset = dataset_dict.map(tokenize_dataset, batched=True)


# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Define the trainer
trainer_bert = Trainer(
    model=model_bert,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

trainer_distilbert = Trainer(
    model=model_distilbert,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

# Fine-tune the model
trainer_bert.train()
trainer_distilbert.train()

# Evaluate the model
eval_results_bert = trainer_bert.evaluate()
eval_results_distilbert = trainer_distilbert.evaluate()

print(f"BERT evaluation results: {eval_results_bert}")
print(f"DistilBERT evaluation results: {eval_results_distilbert}")




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,No log,0.358048
2,0.666000,0.250329
3,0.666000,0.263554




Epoch,Training Loss,Validation Loss
1,No log,1.55841
2,1.573200,1.406392
3,1.573200,1.292105


BERT evaluation results: {'eval_loss': 0.26355400681495667, 'eval_runtime': 218.3504, 'eval_samples_per_second': 4.58, 'eval_steps_per_second': 0.289, 'epoch': 3.0}
DistilBERT evaluation results: {'eval_loss': 1.292104721069336, 'eval_runtime': 120.0263, 'eval_samples_per_second': 8.332, 'eval_steps_per_second': 0.525, 'epoch': 3.0}


In [None]:
# Define output directory
output_dir = './emotion_classification_bert/'

import os
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save model to output directory
model_bert.save_pretrained(output_dir)

# Save tokenizer to output directory
tokenizer_bert.save_pretrained(output_dir)

!zip -r emotion_classification_bert.zip emotion_classification_bert

# Define output directory
output_dir = './emotion_classification_distilbert/'

import os
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save model to output directory
model_distilbert.save_pretrained(output_dir)

# Save tokenizer to output directory
tokenizer_distilbert.save_pretrained(output_dir)

!zip -r emotion_classification_distilbert.zip emotion_classification_distilbert



updating: emotion_classification_bert/ (stored 0%)
updating: emotion_classification_bert/pytorch_model.bin (deflated 7%)
updating: emotion_classification_bert/vocab.txt (deflated 49%)
updating: emotion_classification_bert/config.json (deflated 54%)
updating: emotion_classification_bert/tokenizer.json (deflated 70%)
updating: emotion_classification_bert/special_tokens_map.json (deflated 42%)
updating: emotion_classification_bert/tokenizer_config.json (deflated 46%)
updating: emotion_classification_distilbert/ (stored 0%)
updating: emotion_classification_distilbert/pytorch_model.bin (deflated 8%)
updating: emotion_classification_distilbert/config.json (deflated 52%)
  adding: emotion_classification_distilbert/vocab.txt (deflated 53%)
  adding: emotion_classification_distilbert/tokenizer.json (deflated 71%)
  adding: emotion_classification_distilbert/special_tokens_map.json (deflated 42%)
  adding: emotion_classification_distilbert/tokenizer_config.json (deflated 42%)


In [None]:
!unzip -uq "/content/emotion_classification_bert.zip" -d "/content/" 

from transformers import BertForSequenceClassification, BertTokenizer
# Load saved model
model_bert = BertForSequenceClassification.from_pretrained('emotion_classification_bert')

# Load saved tokenizer
tokenizer_bert = BertTokenizer.from_pretrained('emotion_classification_bert')

sentence = "Hello, how are you?"
inputs = tokenizer_bert(sentence, return_tensors='pt')
outputs = model_bert(**inputs)
print(outputs)


!unzip -uq "/content/emotion_classification_distilbert.zip" -d "/content/" 

from transformers import DistilBertTokenizer, DistilBertModel
# Load saved model
model_distilbert = DistilBertModel.from_pretrained('emotion_classification_distilbert')

# Load saved tokenizer
tokenizer_distilbert = DistilBertTokenizer.from_pretrained('emotion_classification_distilbert')

sentence = "Hello, how are you?"
inputs = tokenizer_distilbert(sentence, return_tensors='pt')
outputs = model_distilbert(**inputs)
print(outputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.4119,  2.9381, -0.9873,  0.1655, -0.1946, -0.9263]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)


Some weights of the model checkpoint at emotion_classification_distilbert were not used when initializing DistilBertModel: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BaseModelOutput(last_hidden_state=tensor([[[-0.4762, -0.8302,  0.0108,  ..., -0.0368,  1.4159,  0.0619],
         [ 0.0206, -0.4679,  0.2187,  ...,  0.2304,  1.4955, -0.3915],
         [-0.4497,  0.0608,  0.5119,  ..., -0.3182,  0.9448, -0.1339],
         ...,
         [-0.0048, -1.0545,  0.6014,  ...,  0.2451,  1.2031, -0.5100],
         [-0.2260, -0.8109, -0.1103,  ..., -0.2533,  1.4551, -0.1857],
         [ 0.7890,  0.1797, -0.5541,  ...,  0.3301, -0.0665, -0.5065]]],
       grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)


In [None]:
# zero-shot-classification

from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification")

# Define the test data
test_data = [
    {"text": "That's so gay."},
    {"text": "I love this song!"},
    {"text": "This pizza is terrible."},
    {"text": "I can't believe she said that."},
]

# Define the candidate labels
candidate_labels = ["offensive", "non-offensive"]

# Evaluate the model on the test data
true_labels = ["offensive", "non-offensive", "offensive", "non-offensive"]
predicted_labels = [classifier(example["text"], candidate_labels)["labels"][0] for example in test_data]

accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, pos_label="offensive")
recall = recall_score(true_labels, predicted_labels, pos_label="offensive")
f1 = f1_score(true_labels, predicted_labels, pos_label="offensive")

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 score: {f1}")


No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Accuracy: 0.75
Precision: 0.6666666666666666
Recall: 1.0
F1 score: 0.8


In [None]:
# Load the required libraries
from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-MNLI")
classifier = pipeline("text-classification", model="textattack/roberta-base-MNLI", tokenizer=tokenizer)

# Perform zero-shot classification on the test set
predictions = []
for i in range(len(dataset_dict["test"])):
    text = dataset_dict["test"][i]["text"]
    result = classifier(text, hypothesis_template="This text is about {}.", multi_label=True, labels=emotions)
    predicted_labels = [emotions[index] for index, label in enumerate(result[0]["scores"]) if label > 0.5]
    predictions.append(predicted_labels)

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)
print("Accuracy:", accuracy)


In [None]:
!pip install sentencepiece
!pip install sacremoses

import torch
from transformers import pipeline, AutoTokenizer

# Load the pre-trained NLI model and tokenizer
model_name = "textattack/roberta-base-MNLI"
model = torch.hub.load('huggingface/transformers', model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a prompt for zero-shot classification using NLI
def classify_emotion(text):
    prompt = "Is this text expressing {emotion}? Answer yes or no.\n{text}"
    emotions = ["anger", "joy", "love", "sadness", "surprise"]
    encoded_inputs = tokenizer(prompt=prompt.format(emotion=emotions[0], text=text), padding=True, truncation=True, return_tensors="pt")
    logits = model(encoded_inputs['input_ids'], attention_mask=encoded_inputs['attention_mask'])[0]
    probabilities = logits.softmax(dim=1)
    prediction = "yes" if probabilities[0][1] > probabilities[0][0] else "no"
    return prediction


In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "textattack/roberta-base-MNLI"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

test_data = [
    {"text": "I am feeling happy today", "label": "Positive"},
    {"text": "I am not feeling well", "label": "Negative"},
    {"text": "The movie was awesome", "label": "Positive"},
    {"text": "I hate it when it rains", "label": "Negative"},
    {"text": "The food was delicious", "label": "Positive"},
]

inputs = tokenizer(
    [sample["text"] for sample in test_data],
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=128,
)

outputs = model(**inputs)
predictions = outputs.logits.argmax(dim=-1)

from sklearn.metrics import classification_report


labels = [sample["label"] for sample in test_data]
label_map = {"Positive": 0, "Negative": 1}
label_indices = [label_map[label] for label in labels]
print(classification_report(label_indices, predictions, digits=4))




##################################################################

from transformers import BertForSequenceClassification, BertTokenizer
# Load saved model
model = BertForSequenceClassification.from_pretrained('emotion_classification_bert')

# Load saved tokenizer
tokenizer = BertTokenizer.from_pretrained('emotion_classification_bert')

'''##model_name = "mrm8488/emotion_classification_bert"
model_name = "emotion_classification_bert"
tokenizer = AutoTokenizer.from_prepared(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)'''

## Prepare the input for the model: We need to tokenize the input text and convert it to a format that can be input to the fine-tuned model.

inputs = tokenizer(
    [sample["text"] for sample in test_data],
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=128,
)

outputs = model(**inputs)
predictions = outputs.logits.argmax(dim=-1)

from sklearn.metrics import classification_report

labels = [sample["label"] for sample in test_data]
label_map = {"Positive": 0, "Negative": 1}
label_indices = [label_map[label] for label in labels]
print(classification_report(label_indices, predictions, digits=4))



Some weights of the model checkpoint at textattack/roberta-base-MNLI were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000         3
           1     0.4000    1.0000    0.5714         2

    accuracy                         0.4000         5
   macro avg     0.2000    0.5000    0.2857         5
weighted avg     0.1600    0.4000    0.2286         5

              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000         3
           1     0.2500    0.5000    0.3333         2

    accuracy                         0.2000         5
   macro avg     0.1250    0.2500    0.1667         5
weighted avg     0.1000    0.2000    0.1333         5



In [None]:
!pip install datasets
from datasets import load_dataset
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, f1_score

# Load the "emotion" dataset
dataset = load_dataset("emotion")

# Split the dataset into training and test sets
train_data = dataset["train"]["text"]
train_labels = dataset["train"]["label"]
test_data = dataset["test"]["text"]
test_labels = dataset["test"]["label"]

# Vectorize the text using TfidfVectorizer
vectorizer = TfidfVectorizer()
train_vectors = vectorizer.fit_transform(train_data)
test_vectors = vectorizer.transform(test_data)

# Train a logistic regression classifier
classifier = LogisticRegression(max_iter=1000)
classifier.fit(train_vectors, train_labels)

# Make predictions on the test set
test_preds = classifier.predict(test_vectors)

# Compute accuracy and F1-score
accuracy = accuracy_score(test_labels, test_preds)
f1 = f1_score(test_labels, test_preds, average="weighted")

'''print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
'''
from sklearn.metrics import classification_report
print(classification_report(test_labels, test_preds, digits=4))


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.10.1-py3-none-any.whl (469 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.0/469.0 KB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash
  Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 KB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess
  Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.9/132.9 KB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting aiohttp
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

Downloading builder script:   0%|          | 0.00/3.97k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.28k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/8.78k [00:00<?, ?B/s]



Downloading and preparing dataset emotion/split to /root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/592k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/74.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/74.9k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/16000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Dataset emotion downloaded and prepared to /root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0     0.8893    0.9260    0.9073       581
           1     0.8310    0.9554    0.8889       695
           2     0.8230    0.5849    0.6838       159
           3     0.9024    0.8073    0.8522       275
           4     0.8700    0.7768    0.8208       224
           5     0.8378    0.4697    0.6019        66

    accuracy                         0.8610      2000
   macro avg     0.8589    0.7533    0.7925      2000
weighted avg     0.8617    0.8610    0.8558      2000



In [None]:
'''
The code performs text classification on the "emotion" dataset using different machine learning models and evaluates their performance using accuracy, F1 score, and classification report.

The code begins by installing the "datasets" package and importing necessary modules such as numpy, sklearn, and datasets.

It then loads the "emotion" dataset using the load_dataset function from the datasets module and splits it into training and testing sets.

Afterwards, it extracts the input features from the text using the CountVectorizer function from the sklearn.feature_extraction.text module and trains a Multinomial Naive Bayes classifier using the training set.

The trained model is then used to predict the labels for the test set, and the accuracy and F1 score are calculated using the accuracy_score and f1_score functions from the sklearn.metrics module.

The code then prints a classification report using the classification_report function from the sklearn.metrics module to display precision, recall, and F1 score for each class, as well as the overall accuracy.

Next, the code defines a random baseline model using the DummyClassifier function from the sklearn.dummy module and evaluates its performance using the same metrics as before.

Finally, the code defines a majority class baseline model that predicts the most frequent class in the training set for all test samples and evaluates its performance using the same metrics.

In summary, the code demonstrates how to load a dataset, extract features from text, train and evaluate different machine learning models for text classification, and compare their performance using different evaluation metrics.

'''


!pip install datasets

import numpy as np
from datasets import load_dataset
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, f1_score

# Load the "emotion" dataset
dataset = load_dataset("emotion")

# Split the dataset into training and testing sets
train_data = dataset["train"]
test_data = dataset["test"]

# Extract the input features (word embeddings) using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_data["text"])
X_test = vectorizer.transform(test_data["text"])
y_train = train_data["label"]
y_test = test_data["label"]

# Train a bag-of-words classifier using Multinomial Naive Bayes
clf = MultinomialNB()
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the model using accuracy and F1 score
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average="macro")

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, digits=4))

################################################################
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Random baseline
random_model = DummyClassifier(strategy="uniform", random_state=42)
random_model.fit(X_train, y_train)
random_pred = random_model.predict(X_test)

# Evaluate performance of random model
random_accuracy = accuracy_score(y_test, random_pred)
random_precision = precision_score(y_test, random_pred, average='macro')
random_recall = recall_score(y_test, random_pred, average='macro')
random_f1 = f1_score(y_test, random_pred, average='macro')

print(classification_report(y_test, random_pred, digits=4))

#################################################################
import numpy as np

# get the majority class in the training set
majority_class = np.argmax(np.bincount(y_train))

# predict the majority class for all test samples
y_pred = np.full((len(y_test),), fill_value=majority_class)

# evaluate the performance of the baseline model
accuracy = np.mean(y_pred == y_test)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1_score = f1_score(y_test, y_pred, average='macro')

print(classification_report(y_test, y_pred, digits=4))






Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/




  0%|          | 0/3 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0     0.7429    0.9398    0.8298       581
           1     0.7398    0.9698    0.8394       695
           2     0.9474    0.2264    0.3655       159
           3     0.9176    0.5673    0.7011       275
           4     0.8151    0.5312    0.6432       224
           5     0.0000    0.0000    0.0000        66

    accuracy                         0.7655      2000
   macro avg     0.6938    0.5391    0.5632      2000
weighted avg     0.7657    0.7655    0.7302      2000

              precision    recall  f1-score   support

           0     0.2812    0.1704    0.2122       581
           1     0.3343    0.1583    0.2148       695
           2     0.0706    0.1447    0.0948       159
           3     0.1503    0.1782    0.1631       275
           4     0.0712    0.1071    0.0856       224
           5     0.0515    0.2576    0.0859        66

    accuracy                         0.1610      2000
   macro avg     0.1599

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# On a smaller model 

import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the emotion dataset
dataset = load_dataset("emotion")

# Load the smaller model and tokenizer
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Instantiate the zero-shot classification pipeline with the smaller model
classifier = pipeline(
    "zero-shot-classification",
    model=model,
    tokenizer=tokenizer,
)

# Example prompt for offensive classification
prompt = "Is this text offensive? Answer yes or no.\n"

# Use a few examples from the dataset to create classification prompts
text = dataset["train"]["text"][:5]
labels = dataset["train"]["label"][:5]
prompts = [prompt + t for t in text]

# Use the zero-shot classifier on the test set
test_text = dataset["test"]["text"]
test_labels = dataset["test"]["label"]
zero_shot_preds = classifier(prompts, test_text)

# Evaluate zero-shot classification
correct = 0
total = len(test_labels)
for i, pred in enumerate(zero_shot_preds):
    label_pred = pred["labels"][0]
    if label_pred == labels[i]:
        correct += 1

from sklearn.metrics import classification_report
print(classification_report(test_labels, label_pred, digits=4))

print(prompts)


In [63]:
from transformers import pipeline
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Get examples for each label from the training data
label_examples = {}
for label in emotions:
    examples = dataset_dict["train"].filter(lambda example: example["label"] == label)["text"]
    label_examples[label] = list(examples)[:5]

# Craft a prompt using the label examples
prompt = "Classify the emotion for the following sentence:\n\n"
for label in emotions:
    prompt += f"{label}:\n\n"
    for i, example in enumerate(label_examples[label]):
        prompt += f"{i+1}. {example}\n"
    prompt += "\n"



  0%|          | 0/3 [00:00<?, ?it/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [64]:
prompt

'Classify the emotion for the following sentence:\n\nsadness:\n\n\njoy:\n\n\nlove:\n\n\nanger:\n\n\nfear:\n\n\nsurprise:\n\n\n'

In [117]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Define the label to index mapping
label2idx = {"sadness": 0, "joy": 1, "love": 2, "anger": 3, "fear": 4, "surprise": 5}

'''# Convert the string labels to numbers
def convert_labels_to_numbers(labels):
    return [label2idx[label] for label in labels]

# Define the index to label mapping
idx2label = {idx: label for label, idx in label2idx.items()}'''

'''# Convert the numbers to string labels
def convert_numbers_to_labels(numbers):
    return [idx2label[number] for number in numbers]'''
    
'''# Craft a prompt using examples from the training data
prompt = f"Classify the emotion for the following sentence:\n\n"

for i, example in enumerate(dataset_dict["train"]):
    if i >= 500:
        break
    prompt += f"{example['text']}\nLabel: {emotions[example['label']]}.\n\n"'''

# Craft a prompt using examples from the training data
prompt = f"Classify the emotion for the following sentence:\n\n"

# Get examples for each label from the training data
label_examples = {}
for label in emotions:
    i = 0
    for example in dataset_dict["train"]:
    #for i, example in enumerate(dataset_dict["train"]):
      
      if i >= 5:
        break
      if example.get("label") == label2idx[label]:
        ##print(label2idx[label])
        ##print(example.get("label"))
        prompt += f"{example}\n"
        i += 1


      '''examples = [example["text"] for example in dataset_dict["train"] if example["label"] == label]
      label_examples[label] = examples[:5]'''

'''# Craft a prompt using the label examples
prompt = "Classify the emotion for the following sentence:\n\n"
for label in emotions:
    prompt += f"{label}:\n\n"
    for example in label_examples[label]:
        prompt += f"{example}\n"
    prompt += "\n"
'''
# Use the zero-shot-classification pipeline with the prompt and the test dataset
classifier = pipeline("zero-shot-classification", model="sentence-transformers/quora-distilbert-base")
#results = classifier(list(dataset_dict["test"]["text"]), prompt)

# Classify the emotions in the test set
true_labels = dataset_dict["test"]["label"]
sentences = dataset_dict["test"]["text"]
predicted_labels = []
for sentence in sentences:
    result = classifier(sentence, emotions, prompt=prompt)
    predicted_label = result["labels"][0]
    predicted_labels.append(label2idx[predicted_label])



# Print the predicted labels for the test dataset
##print(results["labels"])
from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))



  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       302
           1     0.0000    0.0000    0.0000       351
           2     0.0692    0.7468    0.1266        79
           3     0.0488    0.0462    0.0474       130
           4     0.1000    0.0097    0.0177       103
           5     0.1538    0.0571    0.0833        35

    accuracy                         0.0680      1000
   macro avg     0.0620    0.1433    0.0458      1000
weighted avg     0.0275    0.0680    0.0209      1000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [118]:
from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))

              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       302
           1     0.0000    0.0000    0.0000       351
           2     0.0692    0.7468    0.1266        79
           3     0.0488    0.0462    0.0474       130
           4     0.1000    0.0097    0.0177       103
           5     0.1538    0.0571    0.0833        35

    accuracy                         0.0680      1000
   macro avg     0.0620    0.1433    0.0458      1000
weighted avg     0.0275    0.0680    0.0209      1000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
true_labels

In [None]:
predicted_labels

In [112]:
from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))

ValueError: ignored

In [76]:
label_examples

{'sadness': [], 'joy': [], 'love': [], 'anger': [], 'fear': [], 'surprise': []}

In [110]:
prompt

"Classify the emotion for the following sentence:\n\n{'text': 'i am so fucking sick its not funny my head feels like its going to explode my sinuses are aching my stomach is feeling sloshy im not sure if thats good', 'label': 0}\n{'text': 'i feel lonely and lost', 'label': 0}\n{'text': 'i feel my repressed emotions surfacing im glad for the solace i can seek in my writing', 'label': 0}\n{'text': 'i would feel boring rejected or just downright unlikeable', 'label': 0}\n{'text': 'ive been feeling depressed anxious and unhappy', 'label': 0}\n{'text': 'i feel these paints will be perfect for my plein air work', 'label': 1}\n{'text': 'i feel im like a bird flying in the air in a very carefree manner', 'label': 1}\n{'text': 'i feel so un smart yo', 'label': 1}\n{'text': 'i wished i could feel more energetic and deal with less pain but it might be my best option', 'label': 1}\n{'text': 'i know but i m also upset because i increasingly get the feeling that i m a pleasant accessory', 'label': 1

In [69]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Define the label to index mapping
label2idx = {"sadness": 0, "joy": 1, "love": 2, "anger": 3, "fear": 4, "surprise": 5}

# Convert the string labels to numbers
def convert_labels_to_numbers(labels):
    return [label2idx[label] for label in labels]

'''# Get examples for each label from the training data
label_examples = {}
for label in emotions:
    examples = dataset_dict["train"].filter(lambda example: example["label"] == label)["text"]
    label_examples[label] = list(examples)[:5]

# Craft a prompt using the label examples
prompt = "Classify the emotion for the following sentence:\n\n"
for label in emotions:
    prompt += f"{label}:\n\n"
    for example in label_examples[label]:
        prompt += f"{example}\n"
    prompt += "\n"'''

# Get examples for each label from the training data
label_examples = {}
for label in emotions:
    examples = [example["text"] for example in dataset_dict["train"] if example["label"] == label]
    label_examples[label] = examples[:5]

# Craft a prompt using the label examples
prompt = "Classify the emotion for the following sentence:\n\n"
for label in emotions:
    prompt += f"{label}:\n\n"
    for example in label_examples[label]:
        prompt += f"{example}\n"
    prompt += "\n"





  0%|          | 0/3 [00:00<?, ?it/s]

In [72]:
prompt

'Classify the emotion for the following sentence:\n\ni feel so helpless i have no one to talk to\nLabel: sadness.\n\ni feel an aching tiredness that goes down to my core\nLabel: sadness.\n\ni feel curious to know more i think the procedure worked well\nLabel: surprise.\n\nive been feeling weirdly superior about my knowledge of this book roundabouts now\nLabel: joy.\n\ni feel like im being punished because of it\nLabel: sadness.\n\ni think browsers are more comfortable in my booth if all my attention is not focused on them and they don t feel pressured to make a purchase\nLabel: fear.\n\ni remember feeling so helpless i had been a mother for no less than hours and i had already failed my daughter\nLabel: sadness.\n\ni feel a recipe is only a theme which an intelligent cook can play each time with a variation\nLabel: joy.\n\ni don t know how sasha fierce feels i m definitely curious about the future of beyonc s sound\nLabel: surprise.\n\nive never made anything from this book as they all

In [60]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Define the desired number of examples per label
num_examples_per_label = 5

# Create an empty list to store the filtered examples
filtered_examples = []

# Loop through each label
for label in emotions:
    # Filter the examples with the current label
    examples_with_label = [ex for ex in dataset_dict["train"] if ex["label"] == label]
    # Randomly select the desired number of examples with the current label
    selected_examples = np.random.choice(examples_with_label, size=num_examples_per_label, replace=False)
    # Add the selected examples to the filtered examples list
    filtered_examples.extend(selected_examples)

# Create a new dataset with the filtered examples
filtered_dataset_dict = DatasetDict({
    "train": filtered_examples,
    "test": dataset_dict["test"]
})

# Create a prompt that includes the filtered examples and their true labels
for ex in filtered_dataset_dict["train"]:
    print(f"{ex['text']} : {ex['label']}")




  0%|          | 0/3 [00:00<?, ?it/s]

NameError: ignored

In [56]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

from torch.utils.data import DataLoader, Subset
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

'''# Filter the train split to only include examples with a specific emotion label
emotion_label = "happy"
subset_indices = [i for i, example in enumerate(dataset_dict["train"]) if example["label"] == emotion_label]
subset = Subset(dataset_dict["train"], subset_indices)

# Use the subset in a PyTorch DataLoader
dataloader = DataLoader(subset, batch_size=32, shuffle=True)'''


# Choose representative examples from the training data for each emotion label
examples_per_emotion = 5
'''examples = {}
for emotion in emotions:
    examples[emotion] = dataset_dict["train"].filter(lambda example: example["label"] == emotion)["text"][:examples_per_emotion]'''

# Define a list of emotion labels
emotion_labels = dataset["train"].unique("label")
num_examples_per_label = 5

# Filter the train split to only include examples with 5 for each of all the emotion labels
filtered_indices = [idx for idx, example in enumerate(dataset_dict["train"]) if all([dataset_dict["train"][i]["label"] == label for i in range(len(dataset_dict["train"])) if dataset_dict["train"][i]["label"] == example["label"]]) and sum([1 for i in range(len(dataset_dict["train"])) if dataset_dict["train"][i]["label"] == example["label"]]) == num_examples_per_label for label in emotion_labels]

filtered_dataset_dict = DatasetDict({
    "train": dataset_dict["train"].filter(filtered_indices),
    "test": dataset_dict["test"]
})


# Create a prompt that includes some examples from the training data and their true labels
prompt = "Classify the emotion of the following text: \n\n"
for emotion, example_list in examples.items():
    prompt += f"{emotion}: "
    prompt += " ".join(example_list)
    prompt += f" ({emotion})\n"

'''# Use the zero-shot-classification pipeline to classify the test dataset based on this prompt
classifier = pipeline("zero-shot-classification", model="sentence-transformers/quora-distilbert-base")
test_dataset = dataset_dict["test"]["text"]
results = classifier(test_dataset, prompt, multi_label=False)

# Print the predicted labels for the test dataset
for i, result in enumerate(results):
    print(f"Text {i}: {result['sequence']} --> predicted label: {result['labels'][0]}")'''




  0%|          | 0/3 [00:00<?, ?it/s]



  0%|          | 0/3 [00:00<?, ?it/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

TypeError: ignored

In [59]:
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

'''# Filter the train split to only include examples with 5 for each of all the emotion labels
num_examples_per_emotion = 5
filtered_train_indices = []
for emotion in emotions:
    indices = [i for i, example in enumerate(dataset_dict["train"]) if example["label"] == emotion]
    if len(indices) >= num_examples_per_emotion:
        filtered_train_indices.extend(indices[:num_examples_per_emotion])
        
filtered_train_dataset = dataset_dict["train"].select(filtered_train_indices)

# Create a prompt that includes some examples from the filtered training data and their true labels
prompt_examples = filtered_train_dataset[:10]
prompt_text = "Here are some examples from the training data with 5 examples for each emotion label:\n\n"
for example in prompt_examples:
    prompt_text += f"Text: {example['text']}\nLabel: {example['label']}\n\n"

print(prompt_text)'''

from collections import defaultdict

# Count the number of examples for each emotion label
emotion_counts = defaultdict(int)
for example in dataset_dict["train"]:
    for label in example["label"]:
        emotion_counts[label] += 1

# Filter the train split to only include examples with 5 for each emotion label
filtered_train = []
for example in dataset_dict["train"]:
    if all(emotion_counts[label] >= 5 for label in example["label"]):
        filtered_train.append(example)

# Create a prompt with the filtered examples and their true labels
prompt = ""
for example in filtered_train:
    prompt += f"Example: {example['text']}\nLabels: {example['label']}\n\n"





  0%|          | 0/3 [00:00<?, ?it/s]

TypeError: ignored

In [58]:
prompt_examples

{'text': [], 'label': []}

In [55]:
prompt

'Classify the emotion of the following text: \n\nsadness:  (sadness)\njoy:  (joy)\nlove:  (love)\nanger:  (anger)\nfear:  (fear)\nsurprise:  (surprise)\n'

In [53]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Prepare the examples and labels for the prompt
prompt_examples = {}
for emotion in emotions:
    examples = dataset_dict["train"].filter(lambda example: example["label"] == emotion)["text"][:5000]
    prompt_examples[emotion] = examples
    #vprint(emotion)
    print(f"{emotion}: {len(examples)}")

# Create the prompt
prompt = "Classify the emotion of the following text: \n\n"
for emotion, examples in prompt_examples.items():
    prompt += f"{emotion}: " + " ".join(examples) + " \n"
    print(len(examples))

'''# Define the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification",
                      model="sentence-transformers/quora-distilbert-base")

# Classify the test dataset based on the prompt
test_text = list(dataset_dict["test"]["text"])
results = classifier(test_text, prompt)

# Print the predicted labels
predicted_labels = results["labels"]
for i, label in enumerate(predicted_labels):
    print(f"Text {i+1}: {label}")'''

prompt




  0%|          | 0/3 [00:00<?, ?it/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

sadness: 0


Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

joy: 0


Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

love: 0


Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

anger: 0


Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

fear: 0


Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

surprise: 0
0
0
0
0
0
0


'Classify the emotion of the following text: \n\nsadness:  \njoy:  \nlove:  \nanger:  \nfear:  \nsurprise:  \n'

In [49]:
prompt_examples

{}

In [32]:
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the emotion dataset
dataset = load_dataset("emotion")


# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Prepare the examples and labels for the prompt
prompt_examples = {}
for emotion in emotions:
    examples = dataset_dict["train"].filter(lambda example: example["label"] == emotion)["text"][:5]
    prompt_examples[emotion] = examples

# Create the prompt
prompt = "Classify the emotion of the following text: \n\n"
for emotion, examples in prompt_examples.items():
    prompt += f"{emotion}: " + " ".join(examples) + " \n"

# Print the prompt
print(prompt)

# Define the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification",
                      model="sentence-transformers/quora-distilbert-base"
                      )

# Classify a small sample of the test dataset based on the prompt
test_text = list(dataset_dict["test"]["text"][:5])
results = classifier(test_text, prompt)

# Print the predicted labels
predicted_labels = results["labels"]
for i, label in enumerate(predicted_labels):
    print(f"Text {i+1}: {label}")




  0%|          | 0/3 [00:00<?, ?it/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Classify the emotion of the following text: 

sadness:  
joy:  
love:  
anger:  
fear:  
surprise:  



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


TypeError: ignored

In [39]:
dataset_dict["train"].filter(lambda example: example["label"] == emotion)["text"][:4]



[]

In [40]:
dataset["train"][2]

{'text': 'im grabbing a minute to post i feel greedy wrong', 'label': 3}

In [34]:
["text"][:5]

['text']

In [33]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 5000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1000
    })
})

In [29]:
# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Prepare the examples and labels for the prompt
prompt_examples = {}
'''for emotion in emotions:
    examples = dataset_dict["train"].filter(lambda example: example["label"] == emotion)["text"][:5]
    prompt_examples[emotion] = examples'''
for emotion in emotions:
    examples = dataset_dict["train"].filter(lambda example: example["label"] == emotion)["text"][:10]
    if examples:
        prompt_examples[emotion] = examples

# Create the prompt
prompt = "Classify the emotion of the following text: \n\n"
for emotion, examples in prompt_examples.items():
    prompt += f"{emotion}: " + " ".join(examples) + " \n"


# Define the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification",
                      model="sentence-transformers/quora-distilbert-base",
                      ##device=0
                      )

# Classify the test dataset based on the prompt
test_text = list(dataset_dict["test"]["text"])
results = classifier(test_text, prompt)

# Print the predicted labels
predicted_labels = results["labels"]
for i, label in enumerate(predicted_labels):
    print(f"Text {i+1}: {label}")




  0%|          | 0/3 [00:00<?, ?it/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


TypeError: ignored

In [30]:
prompt_examples

{}

In [26]:
examples

[]

In [31]:
prompt

'Classify the emotion of the following text: \n\n'

In [28]:
results

[{'sequence': 'im a big guy and ive gotten into some of the rigs that weve worked with to try them out and see what they feel like and let me tell you it was less than pleasant',
  'labels': ['Classify the emotion of the following text: \n\nsadness:  \njoy:  \nlove:  \nanger:  \nfear:  \nsurprise:'],
  'scores': [0.5256239175796509]},
 {'sequence': 'im feeling extremely fabulous with my jacket and shoes aint no bitches gonna bring me down hahah',
  'labels': ['Classify the emotion of the following text: \n\nsadness:  \njoy:  \nlove:  \nanger:  \nfear:  \nsurprise:'],
  'scores': [0.4966326653957367]},
 {'sequence': 'i feel somewhat fake in the group',
  'labels': ['Classify the emotion of the following text: \n\nsadness:  \njoy:  \nlove:  \nanger:  \nfear:  \nsurprise:'],
  'scores': [0.5225380063056946]},
 {'sequence': 'i feel like i am doomed to a life of sleep obsession',
  'labels': ['Classify the emotion of the following text: \n\nsadness:  \njoy:  \nlove:  \nanger:  \nfear:  \nsu

In [16]:
from transformers import pipeline, AutoTokenizer

# Load the model and tokenizer
model_name = "sentence-transformers/quora-distilbert-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(emotions))
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define the prompt using examples from the training data
prompt = "Classify the following text based on the emotions: \n"

for emotion in emotions:
    # Get some examples from the training data for this emotion
    examples = [ex["text"] for ex in dataset_dict["train"] if ex["label"] == emotion][:3]
    prompt += f"\n{emotion.capitalize()} examples: {' / '.join(examples)}\n"

# Define the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model=model, tokenizer=tokenizer)

# Classify the test dataset using the prompt
test_texts = [ex["text"] for ex in dataset_dict["test"]]
results = classifier(test_texts, prompt)

# Print the results
for i, ex in enumerate(dataset_dict["test"]):
    print(f"Text: {ex['text']}")
    print(f"True label: {emotions[ex['label']]}")
    print(f"Predicted label: {results['labels'][i]}")
    print(f"Scores: {results['scores'][i]}")
    print("="*50)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


Text: i didnt get to prank anyone throughout the whole day cos i was either too busy or not feeling creative
True label: joy


TypeError: ignored

In [17]:
prompt

'Classify the following text based on the emotions: \n\nAnger examples: \n\nJoy examples: \n\nLove examples: \n\nSadness examples: \n\nSurprise examples: \n'

In [15]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

# Load the model and tokenizer
model_name = "sentence-transformers/quora-distilbert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define the emotion labels
emotions = ["anger", "joy", "love", "sadness", "surprise"]

# Define the prompts
prompts = {
    "anger": ["I am so angry that my boss gave me a bad performance review", 
              "It makes me angry when people don't clean up after themselves"],
    "joy": ["I am so happy that I won the lottery",
            "I feel joy when I spend time with my family"],
    "love": ["I am in love with my partner", 
             "Love is the most important thing in the world to me"],
    "sadness": ["I am so sad that my pet died", 
                "I feel sadness when I think about my childhood"],
    "surprise": ["I was surprised when I saw my friend after many years",
                 "It was a surprise party for my birthday"]
}

# Define the zero-shot classification pipeline
classifier = pipeline(
    "zero-shot-classification",
    model=model,
    tokenizer=tokenizer,
    ##device=0 # GPU index, use -1 for CPU
)

# Test the classifier on a sample sentence
sentence = "I am so happy that I got accepted into my dream school!"
results = classifier(sentence, emotions, prompts["joy"])
predicted_label = results["labels"][0]
score = results["scores"][0]
print(f"Predicted label: {predicted_label}, score: {score}")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


ValueError: ignored

In [12]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the model and the zero-shot classification pipeline
model = "sentence-transformers/quora-distilbert-base"
classifier = pipeline("zero-shot-classification", model=model)

# Define some examples from the training data to craft a prompt
examples = [
    "I feel really happy today",
    "I had a terrible day at work",
    "I am so excited for my vacation",
    "I am feeling so stressed and overwhelmed",
    "I just got a promotion and I am thrilled",
    "I am so disappointed in myself",
    "I am feeling really anxious about my upcoming exam",
    "I just had a great workout at the gym",
    "I am feeling really sad and lonely today",
    "I just got into a huge fight with my partner"
]

# Define the candidate labels
candidate_labels = emotions

# Use the zero-shot classification pipeline to classify the examples
for example in examples:
    result = classifier(example, candidate_labels)
    print(f"Example: {example}")
    print(f"Predicted label: {result['labels'][0]}")
    print(f"Scores: {result['scores'][0]}")
    print()

'''from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))'''



  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


Example: I feel really happy today
Predicted label: love
Scores: 0.17054027318954468

Example: I had a terrible day at work
Predicted label: anger
Scores: 0.1688258796930313

Example: I am so excited for my vacation
Predicted label: joy
Scores: 0.1690274327993393

Example: I am feeling so stressed and overwhelmed
Predicted label: sadness
Scores: 0.1687946319580078

Example: I just got a promotion and I am thrilled
Predicted label: joy
Scores: 0.16823124885559082

Example: I am so disappointed in myself
Predicted label: fear
Scores: 0.1684321165084839

Example: I am feeling really anxious about my upcoming exam
Predicted label: fear
Scores: 0.16840049624443054

Example: I just had a great workout at the gym
Predicted label: fear
Scores: 0.1677825152873993

Example: I am feeling really sad and lonely today
Predicted label: anger
Scores: 0.16795828938484192

Example: I just got into a huge fight with my partner
Predicted label: fear
Scores: 0.1676524430513382



'from sklearn.metrics import classification_report\nprint(classification_report(true_labels, predicted_labels, digits=4))'

In [13]:
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import DatasetDict, load_dataset
from sklearn.metrics import classification_report

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the zero-shot classification pipeline
model_name = "sentence-transformers/quora-distilbert-base"
classifier = pipeline("zero-shot-classification", model=model_name)

# Define the prompt
prompt = "How would you describe the emotion of the following sentence? \n\nI am feeling {} because {}"
##prompt = "I am feeling so stressed and overwhelmed"

# Classify the emotions in the test set
true_labels = dataset_dict["test"]["label"]
sentences = dataset_dict["test"]["text"]
predicted_labels = []
for sentence in sentences:
    result = classifier(sentence, emotions, prompt=prompt)
    predicted_label = result["labels"][0]
    predicted_labels.append(predicted_label)

# Print the classification report
print(classification_report(true_labels, predicted_labels, digits=4))




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


ValueError: ignored

In [14]:
predicted_label

'love'

In [None]:
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the emotion dataset
dataset = load_dataset("emotion")

# Instantiate the zero-shot classification pipeline
classifier = pipeline(
    "zero-shot-classification",
    model="sentence-transformers/quora-distilbert-base",
    tokenizer="sentence-transformers/quora-distilbert-base",
)

# Example prompt for offensive classification
prompt = "Is this text offensive? Answer yes or no.\n"

# Use a few examples from the dataset to create classification prompts
text = dataset["train"]["text"][:5]
labels = dataset["train"]["label"][:5]
prompts = [prompt + t for t in text]

# Use the zero-shot classifier on the test set
test_text = dataset["test"]["text"]
test_labels = dataset["test"]["label"]
zero_shot_preds = classifier(prompts, test_text)

# Evaluate zero-shot classification
correct = 0
total = len(test_labels)
for i, pred in enumerate(zero_shot_preds):
    label_pred = pred["labels"][0]
    if label_pred == labels[i]:
        correct += 1

accuracy = correct / total
print(f"Zero-shot classification accuracy: {accuracy:.4f}")


In [6]:
text

['i didnt feel humiliated',
 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake',
 'im grabbing a minute to post i feel greedy wrong',
 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property',
 'i am feeling grouchy']

In [5]:
labels

[0, 0, 3, 2, 3]

In [None]:
test_text

In [None]:
test_labels

In [8]:
label_pred

'i want to feel less stressed'

In [3]:
prompts

['Is this text offensive? Answer yes or no.\ni didnt feel humiliated',
 'Is this text offensive? Answer yes or no.\ni can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake',
 'Is this text offensive? Answer yes or no.\nim grabbing a minute to post i feel greedy wrong',
 'Is this text offensive? Answer yes or no.\ni am ever feeling nostalgic about the fireplace i will know that it is still on the property',
 'Is this text offensive? Answer yes or no.\ni am feeling grouchy']

In [None]:
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the emotion dataset
dataset = load_dataset("emotion")

# Instantiate the zero-shot classification pipeline
classifier = pipeline(
    "zero-shot-classification",
    model="joeddav/bart-large-mnli-yahoo-answers",
    tokenizer="joeddav/bart-large-mnli-yahoo-answers",
)

# Example prompt for offensive classification
prompt = "Is this text offensive? Answer yes or no.\n"

# Use a few examples from the dataset to create classification prompts
text = dataset["train"]["text"][:5]
labels = dataset["train"]["label"][:5]
prompts = [prompt + t for t in text]

# Use the zero-shot classifier on the test set
test_text = dataset["test"]["text"]
test_labels = dataset["test"]["label"]
zero_shot_preds = classifier(prompts, test_text)

# Evaluate zero-shot classification
correct = 0
total = len(test_labels)
for i, pred in enumerate(zero_shot_preds):
    label_pred = pred["labels"][0]
    if label_pred == labels[i]:
        correct += 1

accuracy = correct / total
print(f"Zero-shot classification accuracy: {accuracy:.4f}")


In [39]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Define the label to index mapping
label2idx = {"sadness": 0, "joy": 1, "love": 2, "anger": 3, "fear": 4, "surprise": 5}

# Define the index to label mapping
idx2label = {idx: label for label, idx in label2idx.items()}

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Define the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", 
                      model="sentence-transformers/quora-distilbert-base"
                      )

# Perform zero-shot classification on the test set
test_results = classifier(list(dataset_dict["test"]["text"]), emotions)

# Convert the predicted labels to numbers
predicted_labels = [label2idx[label] for label in test_results["labels"]]
predicted_labels = test_results

# Report the classification result
print("Predicted labels:", predicted_labels)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


TypeError: ignored

In [41]:
predicted_labels = test_results

# Report the classification result
print("Predicted labels:", predicted_labels)

Predicted labels: [{'sequence': 'im feeling very peaceful about our wedding again now after having', 'labels': ['anger', 'fear', 'sadness', 'joy', 'surprise', 'love'], 'scores': [0.1693752557039261, 0.16738007962703705, 0.16710832715034485, 0.16582679748535156, 0.1655600219964981, 0.16474954783916473]}, {'sequence': 'i aint happy im feeling glad i got sunshine in a bag im useless but not for long the future is coming on', 'labels': ['fear', 'anger', 'sadness', 'surprise', 'joy', 'love'], 'scores': [0.17008842527866364, 0.16944488883018494, 0.16869468986988068, 0.16695572435855865, 0.16467902064323425, 0.16013729572296143]}, {'sequence': 'i told him that i was willing to do whatever it took for me to not have to feel this horrible every day', 'labels': ['anger', 'fear', 'sadness', 'joy', 'surprise', 'love'], 'scores': [0.1699993759393692, 0.16739018261432648, 0.16688929498195648, 0.16576644778251648, 0.16540482640266418, 0.16454993188381195]}, {'sequence': 'im feeling rotten and pretend

In [40]:
test_results

[{'sequence': 'im feeling very peaceful about our wedding again now after having',
  'labels': ['anger', 'fear', 'sadness', 'joy', 'surprise', 'love'],
  'scores': [0.1693752557039261,
   0.16738007962703705,
   0.16710832715034485,
   0.16582679748535156,
   0.1655600219964981,
   0.16474954783916473]},
 {'sequence': 'i aint happy im feeling glad i got sunshine in a bag im useless but not for long the future is coming on',
  'labels': ['fear', 'anger', 'sadness', 'surprise', 'joy', 'love'],
  'scores': [0.17008842527866364,
   0.16944488883018494,
   0.16869468986988068,
   0.16695572435855865,
   0.16467902064323425,
   0.16013729572296143]},
 {'sequence': 'i told him that i was willing to do whatever it took for me to not have to feel this horrible every day',
  'labels': ['anger', 'fear', 'sadness', 'joy', 'surprise', 'love'],
  'scores': [0.1699993759393692,
   0.16739018261432648,
   0.16688929498195648,
   0.16576644778251648,
   0.16540482640266418,
   0.16454993188381195]},
 {

In [None]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Define the label to index mapping
label2idx = {"sadness": 0, "joy": 1, "love": 2, "anger": 3, "fear": 4, "surprise": 5}

# Convert the string labels to numbers
def convert_labels_to_numbers(labels):
    return [label2idx[label] for label in labels]

# Define the index to label mapping
idx2label = {idx: label for label, idx in label2idx.items()}

# Convert the numbers to string labels
def convert_numbers_to_labels(numbers):
    return [idx2label[number] for number in numbers]


# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Define the zero-shot classifier pipeline
classifier = pipeline(
    "zero-shot-classification",
    model="sentence-transformers/quora-distilbert-base",
    ##device=0 # Set to -1 if you want to use the CPU instead of GPU
)

# Classify the test dataset
inputs = list(dataset_dict["test"]["text"])
labels = ["sadness", "joy", "love", "anger", "fear", "surprise"]
formatted_labels = [f"{label}?" for label in labels]
predictions = classifier(inputs, formatted_labels, multi_class=True)

# Get the predicted labels
predicted_labels = [labels[predictions[i]["scores"].index(max(predictions[i]["scores"]))] for i in range(len(inputs))]
# Convert the predicted string labels to numbers
##predicted_labels["labels"] = convert_labels_to_numbers(predicted_labels["labels"])


# Get the true labels
##true_labels = convert_numbers_to_labels(dataset_dict["test"]["label"])
true_labels = [str(l) for l in dataset_dict["test"]["label"]]

# Compute the accuracy
correct_predictions = sum([1 if predicted_labels[i] == emotions[true_labels[i]] else 0 for i in range(len(inputs))])
accuracy = correct_predictions / len(inputs)

# Report the classification result
print(f"Accuracy: {accuracy:.4f}")


In [35]:
idx2label

{0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}

In [33]:
predicted_labels = [labels[predictions[i]["scores"].index(max(predictions[i]["scores"]))] for i in range(len(inputs))]

In [None]:
predicted_labels

In [None]:
true_labels

In [30]:
# Get the predicted labels
predicted_labels = [labels[predictions[i]["scores"].index(max(predictions[i]["scores"]))] for i in range(len(inputs))]
# Convert the predicted string labels to numbers
predicted_labels["labels"] = convert_labels_to_numbers(predicted_labels["labels"])


# Get the true labels
##true_labels = convert_numbers_to_labels(dataset_dict["test"]["label"])
true_labels = [str(l) for l in dataset_dict["test"]["label"]]

# Compute the accuracy
correct_predictions = sum([1 if predicted_labels[i] == emotions[true_labels[i]] else 0 for i in range(len(inputs))])
accuracy = correct_predictions / len(inputs)

# Report the classification result
print(f"Accuracy: {accuracy:.4f}")

TypeError: ignored

In [None]:
    # Define the index to label mapping
idx2label = {idx: label for label, idx in label2idx.items()}

# Convert the numbers to string labels
def convert_numbers_to_labels(numbers):
  return [idx2label[number] for number in numbers]
    
# Get the true labels
true_labels = convert_numbers_to_labels(dataset_dict["test"]["label"])
##true_labels = [str(l) for l in dataset_dict["test"]["label"]]

# Compute the accuracy
correct_predictions = sum([1 if predicted_labels[i] == emotions[true_labels[i]] else 0 for i in range(len(inputs))])
accuracy = correct_predictions / len(inputs)

# Report the classification result
print(f"Accuracy: {accuracy:.4f}")

In [None]:
predicted_labels

In [None]:
true_labels

In [None]:
predicted_labels

In [None]:
true_labels = [str(l) for l in dataset_dict["test"]["label"]]

# Compute the accuracy
correct_predictions = sum([1 if predicted_labels[i] == emotions[true_labels[i]] else 0 for i in range(len(inputs))])
accuracy = correct_predictions / len(inputs)

# Report the classification result
print(f"Accuracy: {accuracy:.4f}")

In [None]:
true_labels

In [None]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Define the label to index mapping
label2idx = {label: idx for idx, label in enumerate(emotions)}

# Convert the string labels to numbers
def convert_labels_to_numbers(labels):
    return [label2idx[label] for label in labels]

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the sentence-transformers/quora-distilbert-base model
model = pipeline("zero-shot-classification", model="sentence-transformers/quora-distilbert-base")

# Classify the test dataset
test_texts = list(dataset_dict["test"]["text"])
test_labels = convert_labels_to_numbers(list(dataset_dict["test"]["label"]))
result = model(test_texts, emotions)

# Convert the predicted labels to numbers
predicted_labels = convert_labels_to_numbers(result["labels"])

# Calculate accuracy
correct_predictions = sum([predicted == true for predicted, true in zip(predicted_labels, test_labels)])
accuracy = correct_predictions / len(test_labels)

# Report the classification result
print(f"Accuracy: {accuracy:.4f}")


In [None]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Define the label to index mapping
label2idx = {label: idx for idx, label in enumerate(emotions)}

# Convert the string labels to numbers
def convert_labels_to_numbers(labels):
  return [label2idx[label] for label in labels]

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the model for zero-shot classification
model = pipeline("zero-shot-classification", model="sentence-transformers/quora-distilbert-base")

'''# Convert the emotion labels to strings
emotions = [str(e) for e in emotions]

label2idx = {label: idx for idx, label in enumerate(emotions)}
'''
# Classify the test dataset
predictions = model(list(dataset_dict["test"]["text"]), emotions)

# Report the classification result
true_labels = [str(l) for l in dataset_dict["test"]["label"]]
# predicted_labels = [str(emotions[i]) for i in predictions["labels"]]

predicted_labels = [emotions[i % len(emotions)] for i in outputs.logits.argmax(dim=-1)]
predicted_labels = convert_labels_to_numbers(predicted_labels)

'''# Convert the true labels to numbers
true_labels = convert_labels_to_numbers(dataset_dict["test"]["label"])'''

from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))


In [17]:
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the small pretrained language NLI model and tokenizer
##model_name = "textattack/roberta-base-MRPC"
model_name = "sentence-transformers/quora-distilbert-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a function to format the inputs for the zero-shot classifier
def format_inputs(inputs):
    return [f"{inp} <{emotion}>" for inp in inputs]

# Classify the test dataset
inputs = list(dataset_dict["test"]["text"])
formatted_inputs = format_inputs(inputs)
encoded_inputs = tokenizer(formatted_inputs, padding=True, truncation=True, return_tensors="pt")
outputs = model(**encoded_inputs)

# Convert the logits to predicted labels
predicted_labels = [emotions[i] for i in outputs.logits.argmax(dim=-1)]

# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(predicted_labels, true_labels)]) / len(true_labels)

print(f"Accuracy: {accuracy:.4f}")




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


NameError: ignored

In [None]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the model for zero-shot classification
model = pipeline("zero-shot-classification", model="sentence-transformers/quora-distilbert-base")

'''# Classify the test dataset
predictions = model(list(dataset_dict["test"]["text"]), emotions)

# Report the classification result
true_labels = dataset_dict["test"]["label"]
##accuracy = sum([predicted == true for predicted, true in zip(predictions["labels"], true_labels)]) / len(true_labels)
##the predictions object is a dictionary and the code is trying to access its "labels" key using square brackets instead of using the predictions object's get method. To fix the error, you can replace predictions["labels"] with predictions.get("labels").
accuracy = sum([predicted == true for predicted, true in zip(predictions.get("labels"), true_labels)]) / len(true_labels)


from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions["labels"], digits=4))
print(f"Accuracy: {accuracy:.4f}")'''

# Classify the test dataset
prediction_list = model(list(dataset_dict["test"]["text"]), emotions, multi_label=True)

# Extract the labels from the list of dictionaries
predictions = [p["labels"] for p in prediction_list]

# Flatten the predictions and true labels
flat_predictions = [label for sublist in predictions for label in sublist]
flat_true_labels = [label for example in dataset_dict["test"] for label in example["labels"]]


# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions, digits=4))
print(f"Accuracy: {accuracy:.4f}")



In [None]:
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import DatasetDict, load_dataset

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the model for zero-shot classification
model_name = "textattack/roberta-base-MNLI"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

# Classify the test dataset
predictions = classifier(list(dataset_dict["test"]["text"]), labels=emotions, multi_label=False)

# Filter out the unknown labels from predictions
known_labels_mask = predictions[0]['scores'].max(axis=1) > 0.5
known_labels = [emotions[label] for label, mask in zip(predictions[0]['label'], known_labels_mask) if mask]
known_scores = [score.tolist() for score, mask in zip(predictions[0]['scores'], known_labels_mask) if mask]

# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(known_labels, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, known_labels, digits=4))
print(f"Accuracy: {accuracy:.4f}")


In [16]:
# Filter out the unknown labels from predictions
known_labels_mask = predictions[0]['scores'].max(axis=1) > 0.5
known_labels = [emotions[label] for label, mask in zip(predictions[0]['label'], known_labels_mask) if mask]
known_scores = [score.tolist() for score, mask in zip(predictions[0]['scores'], known_labels_mask) if mask]

# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(known_labels, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, known_labels, digits=4))
print(f"Accuracy: {accuracy:.4f}")

TypeError: ignored

In [15]:
# Flatten the predictions and true labels
flat_predictions = [label for sublist in predictions for label in sublist]
flat_true_labels = [label for example in dataset_dict["test"] for label in example["label"]]


# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions, digits=4))
print(f"Accuracy: {accuracy:.4f}")

TypeError: ignored

In [9]:
# Report the classification result
true_labels = dataset_dict["test"]["label"]
print("True labels shape:", true_labels.shape)
print("True labels example:", true_labels[0])

print("Predictions shape:", len(predictions))
print("Predictions example:", predictions[0])

accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)


AttributeError: ignored

In [13]:
predictions["labels"]

TypeError: ignored

In [10]:
true_labels


[1,
 1,
 1,
 3,
 3,
 3,
 4,
 1,
 0,
 4,
 1,
 1,
 1,
 3,
 3,
 1,
 0,
 1,
 0,
 0,
 1,
 2,
 5,
 5,
 4,
 4,
 1,
 0,
 4,
 3,
 4,
 3,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 4,
 4,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 4,
 1,
 2,
 5,
 1,
 1,
 4,
 5,
 0,
 1,
 0,
 0,
 3,
 3,
 1,
 0,
 2,
 3,
 0,
 3,
 4,
 1,
 3,
 1,
 1,
 0,
 1,
 1,
 1,
 4,
 5,
 3,
 0,
 2,
 4,
 0,
 3,
 4,
 2,
 3,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 4,
 0,
 1,
 0,
 3,
 1,
 1,
 3,
 0,
 0,
 1,
 2,
 1,
 1,
 0,
 0,
 5,
 3,
 1,
 3,
 0,
 1,
 3,
 1,
 1,
 3,
 3,
 0,
 0,
 3,
 5,
 1,
 1,
 1,
 1,
 5,
 4,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 3,
 1,
 0,
 1,
 1,
 1,
 1,
 3,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 4,
 1,
 2,
 1,
 1,
 0,
 0,
 3,
 0,
 4,
 1,
 3,
 1,
 0,
 2,
 0,
 1,
 2,
 4,
 0,
 4,
 1,
 4,
 0,
 0,
 3,
 1,
 0,
 4,
 4,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 4,
 0,
 0,
 1,
 1,
 0,
 1,
 4,
 0,
 0,
 3,
 2,
 0,
 1,
 0,
 5,
 0,
 1,
 4,
 0,
 5,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 3,
 0,
 1,
 0,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 3,
 1,
 4,
 0,
 1,
 0,


In [None]:
# Classify the test dataset
prediction_list = model(list(dataset_dict["test"]["text"]), emotions, multi_label=True)

# Extract the labels from the list of dictionaries
predictions = [p["labels"] for p in prediction_list]

# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions, digits=4))
print(f"Accuracy: {accuracy:.4f}")


In [8]:
'''from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions["labels"], digits=4))
print(f"Accuracy: {accuracy:.4f}")'''

from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions, digits=4))
print(f"Accuracy: {accuracy:.4f}")

ValueError: ignored

In [2]:
from datasets import DatasetDict, load_dataset
# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(predictions["labels"], true_labels)]) / len(true_labels)
##accuracy = sum([predicted == true for predicted, true in zip(predictions["labels"], true_labels)]) / len(true_labels)

accuracy = sum([predicted == true for predicted, true in zip(predictions.get("labels"), true_labels)]) / len(true_labels)


from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions["labels"], digits=4))
print(f"Accuracy: {accuracy:.4f}")

NameError: ignored

In [7]:
# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions, digits=4))
print(f"Accuracy: {accuracy:.4f}")

ValueError: ignored

In [6]:
print(classification_report(true_labels, predicted, digits=4))

NameError: ignored

In [8]:
from transformers import pipeline
from datasets import DatasetDict, load_dataset
from sklearn.metrics import classification_report

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the model for zero-shot classification
model = pipeline("zero-shot-classification", model="sentence-transformers/quora-distilbert-base")

# Classify the test dataset
predictions = model(list(dataset_dict["test"]["text"]), emotions)

# Filter out the unknown labels from predictions
known_labels_mask = predictions["scores"].max(axis=1) > 0.5
known_labels = [label for label, mask in zip(predictions["labels"], known_labels_mask) if mask]
known_scores = [score.tolist() for score, mask in zip(predictions["scores"], known_labels_mask) if mask]

# Report the classification result
true_labels = dataset_dict["test"]["label"]
accuracy = sum([predicted == true for predicted, true in zip(known_labels, true_labels)]) / len(true_labels)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(true_labels, known_labels, digits=4)
print(report)




  0%|          | 0/3 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


TypeError: ignored

In [9]:
# Filter out the unknown labels from predictions
known_labels_mask = predictions["scores"].max(axis=1) > 0.5
known_labels = [label for label, mask in zip(predictions["labels"], known_labels_mask) if mask]
known_scores = [score.tolist() for score, mask in zip(predictions["scores"], known_labels_mask) if mask]

# Report the classification result
true_labels = dataset_dict["test"]["label"]
known_true_labels = [label for label, mask in zip(true_labels, known_labels_mask) if mask]
accuracy = sum([predicted == true for predicted, true in zip(known_labels, known_true_labels)]) / len(known_true_labels)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(known_true_labels, known_labels, digits=4)
print(report)


TypeError: ignored

In [None]:
# Load the required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

'''# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("textattack/xlm-roberta-base-xnli")
model = AutoModelForSequenceClassification.from_pretrained("textattack/xlm-roberta-base-xnli")'''

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/quora-distilbert-base")
model = AutoModelForSequenceClassification.from_pretrained("sentence-transformers/quora-distilbert-base")

# Perform zero-shot classification on the test set
test_texts = [example["text"] for example in dataset_dict["test"]]
prompts = [f"This text is about {emotion}." for emotion in emotions]
##batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, padding=True, truncation=True, return_tensors="pt")
batch_encoding = tokenizer.batch_encode_plus(
    test_texts, 
    add_special_tokens=True, 
    padding=True, 
    truncation=True,
    max_length=512, 
    return_tensors='pt'
)
inputs = {
    "input_ids": batch_encoding["input_ids"].to(model.device),
    "attention_mask": batch_encoding["attention_mask"].to(model.device)
}
logits = model(**inputs)[0]
predicted_labels = [emotions[index] for index in logits.argmax(dim=1)]
predictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)
print("Accuracy:", accuracy)




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/quora-distilbert-base and are newly initialized: ['pre_classifier.bias', 'classifier.weight', 'classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
# Load the required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
model = AutoModelForSequenceClassification.from_pretrained("facebook/m2m100_418M")

# Perform zero-shot classification on the test set
test_texts = [example["text"] for example in dataset_dict["test"]]
prompts = [f"This text is about {emotion}." for emotion in emotions]
batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, return_tensors="pt")
inputs = {
    "input_ids": batch_encoding["input_ids"].repeat(len(emotions), 1),
    "attention_mask": batch_encoding["attention_mask"].repeat(len(emotions), 1)
}
logits = model(**inputs)[0].reshape(-1, len(emotions))
predicted_labels = [emotions[index] for index, label in enumerate(logits) if label > 0.5]
predictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)
print("Accuracy:", accuracy)




  0%|          | 0/3 [00:00<?, ?it/s]

OSError: ignored

In [2]:
# Load the required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-xnli")
model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-large-xnli")

'''# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
model = AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli")'''

# Perform zero-shot classification on the test set
test_texts = [example["text"] for example in dataset_dict["test"]]
prompts = [f"This text is about {emotion}." for emotion in emotions]
batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, padding=True, truncation=True, return_tensors="pt")
inputs = {
    "input_ids": batch_encoding["input_ids"].repeat(len(emotions), 1),
    "attention_mask": batch_encoding["attention_mask"].repeat(len(emotions), 1)
}
logits = model(**inputs)[0].reshape(-1, len(emotions))
predicted_labels = [emotions[index] for index, label in enumerate(logits) if label > 0.5]
predictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)
print("Accuracy:", accuracy)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))




  0%|          | 0/3 [00:00<?, ?it/s]

OSError: ignored

In [2]:
# Load the required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
model = AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli")

# Perform zero-shot classification on the test set
test_texts = [example["text"] for example in dataset_dict["test"]]
prompts = [f"This text is about {emotion}." for emotion in emotions]
##batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, return_tensors="pt")
batch_encoding = tokenizer.batch_encode_plus(
    test_texts, 
    add_special_tokens=True, 
    padding=True, 
    truncation=True,
    max_length=512, 
    return_tensors='pt'
)
inputs = {
    "input_ids": batch_encoding["input_ids"].repeat(len(emotions), 1),
    "attention_mask": batch_encoding["attention_mask"].repeat(len(emotions), 1)
    ##"token_type_ids": batch_encoding["token_type_ids"].repeat(len(emotions), 1)
}
'''logits = model(**inputs)[0].reshape(-1, len(emotions))
predicted_labels = [emotions[index] for index, label in enumerate(logits) if label > 0.5]
predictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))'''




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


'logits = model(**inputs)[0].reshape(-1, len(emotions))\npredicted_labels = [emotions[index] for index, label in enumerate(logits) if label > 0.5]\npredictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]\n\n# Report the classification result\ntrue_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]\naccuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)\n\nfrom sklearn.metrics import classification_report\nprint(classification_report(true_labels, predicted_labels, digits=4))'

In [None]:
logits = model(**inputs)[0].reshape(-1, len(emotions))
predicted_labels = [emotions[index] for index, label in enumerate(logits) if label > 0.5]
predictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

'''from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))'''

from sklearn.metrics import classification_report

# Flatten the predictions and true labels
flat_predictions = [label for sublist in predictions for label in sublist]
flat_true_labels = [label for example in dataset_dict["test"] for label in example["label"]]

# Generate the classification report
print(classification_report(flat_true_labels, flat_predictions, digits=4))


In [11]:
batch_encoding = tokenizer.batch_encode_plus(
    test_texts, 
    add_special_tokens=True, 
    padding=True, 
    truncation=True,
    max_length=512, 
    return_tensors='pt'
)


In [None]:
# Load the required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-MNLI")
model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-MNLI")

# Perform zero-shot classification on the test set
test_texts = [example["text"] for example in dataset_dict["test"]]
prompts = [f"This text is about {emotion}." for emotion in emotions]
## batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, return_tensors="pt")
## batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, add_special_tokens=True, return_tensors="pt")
## batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, return_tensors="pt", add_special_tokens=True)

batch_encoding = tokenizer.batch_encode_plus(
    test_texts, 
    add_special_tokens=True, 
    padding=True, 
    truncation=True,
    max_length=512, 
    return_tensors='pt'
)
inputs = {
    "input_ids": batch_encoding["input_ids"].repeat(len(emotions), 1),
    "attention_mask": batch_encoding["attention_mask"].repeat(len(emotions), 1)
}
logits = model(**inputs)[0].reshape(-1, len(emotions))
predicted_labels = [emotions[index] for index, label in enumerate(logits) if label > 0.5]
predictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at textattack/roberta-base-MNLI were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
# Load the required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-MNLI")
model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-MNLI")

# Perform zero-shot classification on the test set
test_texts = [example["text"] for example in dataset_dict["test"]]
prompts = [f"This text is about {emotion}." for emotion in emotions]
##batch_encoding = tokenizer.batch_encode_plus(test_texts, prompts, add_special_tokens=True, return_tensors="pt")
batch_encoding = tokenizer.batch_encode_plus(
    test_texts, 
    add_special_tokens=True, 
    padding=True, 
    truncation=True,
    max_length=512, 
    return_tensors='pt'
)
inputs = {
    "input_ids": batch_encoding["input_ids"].repeat(len(emotions), 1),
    "attention_mask": batch_encoding["attention_mask"].repeat(len(emotions), 1)
}
logits = model(**inputs)[0].reshape(-1, len(emotions))
predicted_labels = [emotions[index] for index, label in enumerate(logits) if label > 0.5]
predictions = [predicted_labels[i:i+len(emotions)] for i in range(0, len(predicted_labels), len(emotions))]

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at textattack/roberta-base-MNLI were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


TypeError: ignored

In [4]:
'''
In this code, we first load the required libraries and then load the emotion dataset. We then split the dataset into a training set and a test set.

Next, we load the textattack/roberta-base-MNLI tokenizer and model. We then iterate over the test set and for each text, we generate an NLI prompt that specifies one of the emotions in the emotions list. We then use the encode_plus method of the tokenizer to encode the text and prompt into a format that can be input to the model.

We then pass the encoded input to the model and obtain the logits, which represent the probabilities of the input belonging to each emotion. We then select the emotions that have a probability greater than 0.5 and add them to the list of predicted labels.

Finally, we compare the predicted labels to the true labels for each text in the test set and calculate the accuracy of the classification.
'''

# Load the required libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-MNLI")
model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-MNLI")

# Perform zero-shot classification on the test set
predictions = []
for i in range(len(dataset_dict["test"])):
    text = dataset_dict["test"][i]["text"]
    prompt = f"This text is about {emotions[0]}."
    inputs = tokenizer.encode_plus(text, prompt, add_special_tokens=True, return_tensors="pt")
    logits = model(inputs["input_ids"], attention_mask=inputs["attention_mask"])[0]
    predicted_labels = [emotions[index] for index, label in enumerate(logits[0]) if label > 0.5]
    predictions.append(predicted_labels)

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)
# print("Accuracy:", accuracy)

from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels, digits=4))




  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at textattack/roberta-base-MNLI were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ValueError: ignored

In [3]:
##  to perform zero-shot classification on the emotion dataset using the textattack/roberta-base-MNLI model
'''
In this code, we first load the required libraries and then load the emotion dataset. We then split the dataset into a training set and a test set.

Next, we load the textattack/roberta-base-MNLI tokenizer and model. We then use the pipeline function from the transformers library to create a text classification pipeline that uses the RoBERTa model to classify text into one or more emotions. We set multi_label=True to allow for the possibility of multiple emotions being associated with each text.

We then iterate over the test set and use the text classification pipeline to make predictions for each text. We set hypothesis_template="This text is about {}." to generate NLI prompts that will enable the RoBERTa model to perform zero-shot classification. We also pass in the emotion labels as labels=emotions.

Finally, we compare the predicted labels to the true labels for each text in the test set and calculate the accuracy of the classification.

Note that the accuracy of the classification will depend on the specific RoBERTa model used and the quality of the NLI prompts generated. You may want to experiment with different models and prompts to see if you can improve the accuracy.
'''

# Load the required libraries
from transformers import pipeline, AutoTokenizer
from datasets import load_dataset, DatasetDict


# Load the dataset
dataset = load_dataset("emotion")

# Define the emotion labels
emotions = dataset["train"].features["label"].names

# Split the dataset
dataset_dict = DatasetDict({
    "train": dataset["train"].shuffle().select(range(5000)),
    "test": dataset["test"].shuffle().select(range(1000))
})

# Load the pretrained tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-MNLI")
classifier = pipeline("text-classification", model="textattack/roberta-base-MNLI", tokenizer=tokenizer)

# Perform zero-shot classification on the test set
predictions = []
for i in range(len(dataset_dict["test"])):
    text = dataset_dict["test"][i]["text"]
    result = classifier(text, hypothesis_template="This text is about {}.", multi_label=True, labels=emotions)
    predicted_labels = [emotions[index] for index, label in enumerate(result[0]["scores"]) if label > 0.5]
    predictions.append(predicted_labels)

# Report the classification result
true_labels = [dataset_dict["test"][i]["label"] for i in range(len(dataset_dict["test"]))]
accuracy = sum([predicted == true for predicted, true in zip(predictions, true_labels)]) / len(true_labels)
print("Accuracy:", accuracy)


from sklearn.metrics import classification_report

'''labels = [sample["label"] for sample in test_data]
label_map = {"Positive": 0, "Negative": 1}
label_indices = [label_map[label] for label in labels]'''
print(classification_report(true_labels, predicted_labels, digits=4))



  0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at textattack/roberta-base-MNLI were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


TypeError: ignored

In [None]:
'''
To use word embeddings as input features for a bag-of-words classifier for the "emotion" dataset, we first need to load the dataset using the load_dataset() function from the Hugging Face datasets library:
'''

from datasets import load_dataset

dataset = load_dataset('emotion')
X = dataset['train']['text']
y = dataset['train']['label']

'''
Next, we need to preprocess the text data and convert it into numerical form using word embeddings. We can use the TransformersBaseTokenizer and TransformersWordEmbeddings classes from the nlp library to tokenize the text and convert it into word embeddings:
'''

'''!pip install nlp
from nlp import TransformersBaseTokenizer, TransformersWordEmbeddings

# Load the tokenizer and embeddings model
tokenizer = TransformersBaseTokenizer('bert-base-cased')
embeddings_model = TransformersWordEmbeddings('bert-base-cased')'''

from transformers import BertForSequenceClassification, BertTokenizer
# Load saved model
embeddings_model = BertForSequenceClassification.from_pretrained('emotion_classification_bert')

# Load saved tokenizer
tokenizer = BertTokenizer.from_pretrained('emotion_classification_bert')

# Tokenize and embed the text data
X_embeddings = embeddings_model(tokenizer(X))

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_embeddings, y, test_size=0.2, random_state=42)

from sklearn.linear_model import LogisticRegression

clf = LogisticRegression().fit(X_train, y_train)

from sklearn.metrics import accuracy_score

y_pred = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))

'''
To create a baseline model, we can use the DummyClassifier class from scikit-learn. We can create two types of baseline models: random and majority/target-class.

To create a random baseline model, we can set the strategy parameter of the DummyClassifier to 'uniform':
'''

from sklearn.dummy import DummyClassifier

# Random baseline
dummy_random = DummyClassifier(strategy='uniform')
dummy_random.fit(X_train, y_train)
y_pred_random = dummy_random.predict(X_test)

print('Random baseline accuracy:', accuracy_score(y_test, y_pred_random))

'''
To create a majority/target-class baseline model, we can set the strategy parameter of the DummyClassifier to 'most_frequent':
'''
# Majority/target-class baseline
dummy_majority = DummyClassifier(strategy='most_frequent')
dummy_majority.fit(X_train, y_train)
y_pred_majority = dummy_majority.predict(X_test)

print('Majority/target-class baseline accuracy:', accuracy_score(y_test, y_pred_majority))




  0%|          | 0/3 [00:00<?, ?it/s]

AttributeError: ignored

In [None]:
inputs = tokenizer_bert('Hello, how are you?', return_tensors='pt')
outputs = model_bert(**inputs)
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[-0.4119,  2.9381, -0.9873,  0.1655, -0.1946, -0.9263]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

# Load the dataset
dataset = datasets.load_dataset('go_emotions', split='train[:80%]')

# Load the tokenizer and encode the dataset
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
encoded_dataset = dataset.map(lambda examples: tokenizer(examples['text'], padding=True, truncation=True), batched=True)

# Load the pre-trained BERT model
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=28)

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    evaluation_strategy='epoch',     # evaluate every epoch
    learning_rate=2e-5,              # learning rate
    per_device_train_batch_size=32,  # batch size for training
    per_device_eval_batch_size=64,   # batch size for evaluation
    num_train_epochs=3,              # total number of training epochs
    weight_decay=0.01,               # weight decay
    push_to_hub=False,               # whether to push the fine-tuned model to the Hugging Face model hub
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    load_best_model_at_end=True,     # load the best model at the end of training
    metric_for_best_model="accuracy",
)

# Define the Trainer object and fine-tune the BERT model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

# Save the fine-tuned BERT model
trainer.save_model('./models/bert_emotion_classification')

# Fine-tune the DistilBERT model
distilbert_model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=28)
distilbert_trainer = Trainer(
    model=distilbert_model,
    args=training_args,
    train_dataset=encoded_dataset,
    compute_metrics=compute_metrics,
)
distilbert_trainer.train()

# Save the fine-tuned DistilBERT model
distilbert_trainer.save_model('./models/distilbert_emotion_classification')

# Load the test dataset and encode it
test_dataset = datasets.load_dataset('go_emotions', split='train[80%:]')
encoded_test_dataset = test_dataset.map(lambda examples: tokenizer(examples['text'], padding=True, truncation=True), batched=True)

# Define the function to compute the metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    accuracy = (predictions == labels).mean()
    precision = precision_score(labels, predictions, average='macro')
    recall = recall_score(labels, predictions, average='macro')
    f1 = f1_score(labels, predictions, average='macro')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

# Evaluate the fine-tuned BERT model on the test dataset
bert_eval_result = trainer.evaluate(encoded_test_dataset)
print("BERT Evaluation Result:")
for key, value in bert_eval_result.items():
    print(f"{key}: {value:.4f}")

# Evaluate the fine-tuned DistilBERT model on the test dataset
distilbert_eval_result = distilbert_trainer.evaluate(encoded_test_dataset)
print("DistilBERT Evaluation Result:")
for key, value in distilbert_eval_result.items():
  print(f"{key}: {value:.4f}")


In [None]:
# Define output directory
output_dir = './emotion_classification_bert/'

# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save model to output directory
model_bert.save_pretrained(output_dir)

# Load saved model
model_bert = BertForSequenceClassification.from_pretrained(output_dir)

# Save tokenizer to output directory
tokenizer.save_pretrained(output_dir)
# Load saved tokenizer
tokenizer = BertTokenizer.from_pretrained(output_dir)


In [None]:
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the emotion dataset
dataset = load_dataset("emotion")

# Instantiate the zero-shot classification pipeline with a smaller model
# model_name = "distilbert-base-uncased"
model_name = "textattack/roberta-base-MNLI"
classifier = pipeline(
    "zero-shot-classification",
    model=model_name,
    tokenizer=model_name,
)

# Example prompt for offensive classification
prompt = "Is this text offensive? Answer yes or no.\n"

# Use a few examples from the dataset to create classification prompts
text = dataset["train"]["text"][:5]
labels = dataset["train"]["label"][:5]
prompts = [prompt + t for t in text]

# Use the zero-shot classifier on the test set
test_text = dataset["test"]["text"]
test_labels = dataset["test"]["label"]
zero_shot_preds = classifier(prompts, test_text)

# Evaluate zero-shot classification
correct = 0
total = len(test_labels)
for i, pred in enumerate(zero_shot_preds):
    label_pred = pred["labels"][0]
    if label_pred == labels[i]:
        correct += 1

'''accuracy = correct / total
print(f"Zero-shot classification accuracy: {accuracy:.4f}")'''

from sklearn.metrics import classification_report
print(classification_report(test_labels, label_pred, digits=4))

print(prompts)


In [None]:
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the emotion dataset
dataset = load_dataset("emotion")

# Instantiate the zero-shot classification pipeline
classifier = pipeline(
    "zero-shot-classification",
    model="joeddav/bart-large-mnli-yahoo-answers",
    tokenizer="joeddav/bart-large-mnli-yahoo-answers",
)

# Example prompt for offensive classification
prompt = "Is this text offensive? Answer yes or no.\n"

# Use a few examples from the dataset to create classification prompts
text = dataset["train"]["text"][:5]
labels = dataset["train"]["label"][:5]
prompts = [prompt + t for t in text]

# Use the zero-shot classifier on the test set
test_text = dataset["test"]["text"]
test_labels = dataset["test"]["label"]
zero_shot_preds = classifier(prompts, test_text)

# Evaluate zero-shot classification
correct = 0
total = len(test_labels)
for i, pred in enumerate(zero_shot_preds):
    label_pred = pred["labels"][0]
    if label_pred == labels[i]:
        correct += 1

'''accuracy = correct / total
print(f"Zero-shot classification accuracy: {accuracy:.4f}")'''

from sklearn.metrics import classification_report
print(classification_report(test_labels, label_pred, digits=4))

print(prompts)


In [None]:
from sklearn.metrics import classification_report
print(classification_report(test_labels, label_pred, digits=4))

print(prompts)

In [None]:
from transformers import pipeline

zero_shot_classifier = pipeline("zero-shot-classification", model="EleutherAI/gpt-neo-2.7B")

prompts = ["Is this text about anger, fear, joy, love, sadness, or surprise?",
           "What is the emotion expressed in this text?",
           "Can you classify the emotion in this text?",
           "Which of these emotions best describes the sentiment in this text?"]

zero_shot_results = []
for prompt in prompts:
    zero_shot_results.append(zero_shot_classifier(test_data["text"], candidate_labels=["anger", "fear", "joy", "love", "sadness", "surprise"], prompt=prompt))

print("Zero-shot classification results:", zero_shot_results)


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/10.7G [00:00<?, ?B/s]