In [2]:
import os
from string import punctuation
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig, prepare_model_for_kbit_training
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split
from datetime import datetime
import csv
from dotenv import load_dotenv

load_dotenv()
access_token = os.getenv("HF_TOKEN1")
if access_token is None:
    raise ValueError(f"HF access_token is None. Please set up token in system environment.")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

################################ Start of Utility Functions ################################################

# def query(payload):
# 	response = requests.post(API_URL, headers=headers, json=payload)
# 	return response.json()

def generate_sentiment_prompt(text):
    return f"""
            Analyze the sentiment of the text enclosed in angle brackets, 
            determine if it is positive, neutral, or negative, and 
            return the answer as the corresponding sentiment label "positive" or "neutral" or "negative".
            For example: 
            <You’ve had over a month to get this finalized ! Why are things delayed ?> = negative
            <WOW! Drone Delivery Startup, @zipline Raises $25m To Expand Its Operations In Africa> = positive
            <The environment can and has survived much hotter conditions.> = neutral

            <{text}> = """.strip()


def generate_emotion_prompt(text):
    return f"""
            Analyze the emotion of the text enclosed in angle brackets, 
            determine if it is happiness, anger, disgust, fear, sadness, surprise or other emotion, and 
            return the answer as the corresponding emotion label "happiness" or "anger" or "disgust" or "fear" or "sadness" or "surprise" or "other".
            For example:
            <You’ve had over a month to get this finalized ! Why are things delayed ?> = anger
            <WOW! Drone Delivery Startup, @zipline Raises $25m To Expand Its Operations In Africa> = surprise
            <The environment can and has survived much hotter conditions.> = other

            <{text}> = """.strip()

def generate_emotion_and_sentiment_prompt(text):
    return f"""
            Analyze the emotion and sentiment of the text enclosed in angle brackets. 
            For emotion, determine if it is happiness, anger, disgust, fear, sadness, surprise or other emotion.
            For sentiment, determine if it is happiness, anger, disgust, fear, sadness, surprise or other emotion.
            Return the answer as "emotion" "sentiment" where emotion is from the corresponding emotion label "happiness" or "anger" or "disgust" 
            or "fear" or "sadness" or "surprise" or "other"; and sentiment is from the corresponding sentiment label "positive" or "neutral" or "negative"; 
            emotion followed by sentiment, separated by a space.
            
            For example:
            <You’ve had over a month to get this finalized ! Why are things delayed ?> = anger negative
            <WOW! Drone Delivery Startup, @zipline Raises $25m To Expand Its Operations In Africa> = surprise positive
            <The environment can and has survived much hotter conditions.> = other neutral

            <{text}> = """.strip()

def extract_label(generated_text, target_labels):
    """Extract from the generated text the first label that is defined in the set of target labels"""
    tokens = generated_text.split()
    for token in tokens:
        token = token.strip(punctuation)
        if token.lower() in target_labels:
            return token.lower()
    
    return None

def predict(model, tokenizer, datafile, outfile1, outfile2):

    pipe = pipeline(
        task="text-generation", 
        model=model, 
        tokenizer=tokenizer,
        max_new_tokens=64,
        # device=2,
        device_map="auto",
        # padding=True,
    )

    with open(datafile, 'r', newline='') as infile, open(outfile1, 'w', newline='') as out_file1, open(outfile2, 'w', newline='') as out_file2:
        csv_reader = csv.DictReader(infile)
        
        fieldnames1 = csv_reader.fieldnames + ["llama3_sentiment", "llama3_emotion"]
        csv_writer1 = csv.DictWriter(out_file1, fieldnames=fieldnames1)
        csv_writer1.writeheader()

        fieldnames2 = csv_reader.fieldnames + ["llama3_raw"]  # changed from 2 separate columns for emotion and sentiment
        csv_writer2 = csv.DictWriter(out_file2, fieldnames=fieldnames2)
        csv_writer2.writeheader()
        
        start_time = datetime.now()
        last_time = start_time
        counter = 1
        MAX_ROW = 2
        
        for row in csv_reader:
            if counter > MAX_ROW: break
            prompt = generate_emotion_and_sentiment_prompt(row['text'])

            print("Inferencing row", counter)
            # print("tokenized input:", tokenizer(prompt, padding="max_length", max_length=512))
            output = pipe(prompt)
            # print(f"inference time: {(datetime.now()-last_time).total_seconds()}s")
            llama3_sentiment = None
            llama3_emotion = None

            raw = output[0]["generated_text"].split(prompt)[-1]
            llama3_sentiment = extract_label(raw, target_labels=["positive", "negative", "neutral"])
            llama3_emotion = extract_label(raw, target_labels=["happiness", "anger", "disgust", "fear", "sadness", "surprise", "other"])

            # print(output[0]["generated_text"])
            # print(llama3_sentiment)
            # print(llama3_emotion)

            row["llama3_sentiment"] = llama3_sentiment
            row["llama3_emotion"] = llama3_emotion
            # print(row)
            csv_writer1.writerow(row)
            
            row.pop("llama3_sentiment", None)
            row.pop("llama3_emotion", None)
            row["llama3_raw"] = raw
            csv_writer2.writerow(row)

            t_delta = (datetime.now()-last_time).total_seconds()*1000
            print("Time elapsed (ms): ", t_delta)
            last_time = datetime.now()
            
            counter += 1

    print(f"Total time elapsed (s): {(last_time-start_time).total_seconds()}")

def predict(model, tokenizer, df):
    pipe = pipeline(
        task="text-generation", 
        model=model, 
        tokenizer=tokenizer,
        max_new_tokens=64,
        # device=2,
        device_map="auto",
        # padding=True,
    )
    output = pd.DataFrame(columns=["emotion", "sentiment"])
    sentiments = []
    emotions = []
    for _, row in df.iterrows():
        prompt = row['text']
        output = pipe(prompt)
        raw = output[0]["generated_text"].split(prompt)[-1]
        llama3_sentiment = extract_label(raw, target_labels=["positive", "negative", "neutral"])
        llama3_emotion = extract_label(raw, target_labels=["happiness", "anger", "disgust", "fear", "sadness", "surprise", "other"])
        sentiments.append(llama3_sentiment)
        emotions.append(llama3_emotion)

    output = pd.DataFrame(
        {
            "emotion": emotions,
            "sentiment": sentiments
        }
    )

    # print(output.describe())
    return output

########################################## End of Utility Functions ##############################################################


In [3]:

# datafile = "data/drone/masked_all_tweets.csv"
# outfile1 = "output/drone/local_llama3_8B/test/masked_all_tweets_llama3.csv"
# outfile2 = "output/drone/local_llama3_8B/test/masked_all_tweets_llama3_raw.csv"
datafile = "data/drone/drone_tweets_qc_annotated.csv"
df = pd.read_csv(datafile, encoding="utf-8", encoding_errors="replace")

emotions = ["happiness", "anger", "disgust", "fear", "sadness", "surprise", "other"]
sentiments = ["positive", "negative", "neutral"]
sent_mapping = {'positive': 2, 'neutral': 1, 'negative': 0}
emotion_mapping = {"happiness":0, "anger":1, "disgust":2, "fear":3, "sadness":4, "surprise":5, "other":6}

df["emotion"] = df["golden emotion"]
df["sentiment"] = df["golden sentiment"]
df = df[["text","emotion","sentiment"]]
df["text"] = df["text"].map(generate_emotion_and_sentiment_prompt)
df_train, df_test = train_test_split(df, test_size=0.15, random_state=88)

emotion_df = df_train.rename(columns={"emotion": "label"})
sentiment_df = df_train.rename(columns={"sentiment": "label"})

emotion_dataset = Dataset.from_pandas(emotion_df).remove_columns("__index_level_0__")
sentiment_dataset = Dataset.from_pandas(sentiment_df).remove_columns("__index_level_0__")


# pred_path = "output/drone/local_llama3_8B/few_shots/masked_all_tweets_llama3.csv"
# preds_df = pd.read_csv(pred_path)
# preds_df["emotion"] = preds_df["llama3_emotion"]
# preds_df["sentiment"] = preds_df["llama3_sentiment"]



In [4]:
emotion_dataset = emotion_dataset.train_test_split(test_size=0.2, seed=88)
sentiment_dataset = sentiment_dataset.train_test_split(test_size=0.2, seed=88)

In [5]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cuda"

cache_dir = "cache/llama3_8B"
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, 
    bnb_4bit_quant_type="nf4", 
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=compute_dtype,
    token=access_token,
    # quantization_config=bnb_config, 
    cache_dir=cache_dir,
)

prepare_model_for_kbit_training(model=model)

# model.to(device)
model.config.use_cache = False
model.config.pretraining_tp = 1
model.train()

tokenizer = AutoTokenizer.from_pretrained(
    model_name, 
    trust_remote_code=True,
    token=access_token,
    cache_dir=cache_dir,
    padding=True
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"  # 　left for inference

output_dir="output/drone/local_llama3_8B/trained_weigths"

peft_config = LoraConfig(
        lora_alpha=16, 
        lora_dropout=0.1,
        r=64,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)

training_arguments = TrainingArguments(
    output_dir=output_dir,                    # directory to save and repository id
    num_train_epochs=3,                       # number of training epochs
    per_device_train_batch_size=1,            # batch size per device during training
    gradient_accumulation_steps=8,            # number of steps before performing a backward/update pass
    gradient_checkpointing=True,              # use gradient checkpointing to save memory
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=10,                         # log every 10 steps
    learning_rate=2e-4,                       # learning rate, based on QLoRA paper
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,                        # max gradient norm based on QLoRA paper
    max_steps=-1,
    warmup_ratio=0.03,                        # warmup ratio based on QLoRA paper
    group_by_length=True,
    lr_scheduler_type="cosine",               # use cosine learning rate scheduler
    # report_to="tensorboard",                  # report metrics to tensorboard
    evaluation_strategy="epoch"               # save checkpoint every epoch
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=emotion_dataset["train"],
    eval_dataset=emotion_dataset["test"],
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=1024,
    packing=False,
    dataset_kwargs={
        "add_special_tokens": False,
        "append_concat_token": False,
    }
)


# model, tokenizer = setup_chat_format(model, tokenizer)

# predict(model, tokenizer, datafile, outfile1, outfile2)

Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.71s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.

Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
Map: 100%|██████████| 204/204 [00:00<00:00, 2469.26 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 2168.54 examples/s]


In [6]:
trainer.train()



Epoch,Training Loss,Validation Loss
0,0.5301,0.473508
2,0.3579,0.460372


TrainOutput(global_step=75, training_loss=0.60627303759257, metrics={'train_runtime': 283.528, 'train_samples_per_second': 2.159, 'train_steps_per_second': 0.265, 'total_flos': 7110656443662336.0, 'train_loss': 0.60627303759257, 'epoch': 2.9411764705882355})

In [10]:
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)



In [15]:
import gc

for _ in range(100):
    torch.cuda.empty_cache()
    gc.collect()

In [16]:
from peft import AutoPeftModelForCausalLM

finetuned_model = output_dir
compute_dtype = getattr(torch, "float16")
tokenizer = AutoTokenizer.from_pretrained(output_dir)

model = AutoPeftModelForCausalLM.from_pretrained(
     finetuned_model,
     torch_dtype=compute_dtype,
     return_dict=False,
     low_cpu_mem_usage=True,
     device_map=device,
)

merged_model = model.merge_and_unload()
merged_model.save_pretrained("./finetuned_llama3_8B",safe_serialization=True, max_shard_size="2GB")
tokenizer.save_pretrained("./finetuned_llama3_8B")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


('./finetuned_llama3_8B/tokenizer_config.json',
 './finetuned_llama3_8B/special_tokens_map.json',
 './finetuned_llama3_8B/tokenizer.json')

In [28]:
preds = predict(merged_model, tokenizer, df_test)

        emotions sentiments
count         45         45
unique         5          3
top     surprise   positive
freq          25         24


In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)

# def map_func(x):
#     return mapping.get(x, 1)

def evaluate(y_true, y_pred, labels):

    y_true = y_true.tolist()
    y_pred = y_pred.tolist()
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.5f}')
    
    
    for label in labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.5f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred, digits=5)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels)
    print('\nConfusion Matrix:')
    print(conf_matrix)



In [39]:
evaluate(df_test['emotion'], preds['emotions'], emotions)
evaluate(df_test['sentiment'], preds['sentiments'], sentiments)

Accuracy: 0.11111
Accuracy for label happiness: 0.20000
Accuracy for label anger: nan
Accuracy for label disgust: 0.00000
Accuracy for label fear: nan
Accuracy for label sadness: nan
Accuracy for label surprise: nan
Accuracy for label other: 0.10526

Classification Report:
              precision    recall  f1-score   support

       anger    0.00000   0.00000   0.00000         0
     disgust    0.00000   0.00000   0.00000         2
   happiness    1.00000   0.20000   0.33333         5
       other    1.00000   0.10526   0.19048        38
    surprise    0.00000   0.00000   0.00000         0

    accuracy                        0.11111        45
   macro avg    0.40000   0.06105   0.10476        45
weighted avg    0.95556   0.11111   0.19788        45


Confusion Matrix:
[[ 1  1  0  0  0  3  0]
 [ 0  0  0  0  0  0  0]
 [ 0  2  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0 10  2  0  0 22  4]]
Accuracy: 0.48889
Accuracy for label positive: 0.

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [1]:
import os
from string import punctuation
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig, prepare_model_for_kbit_training
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split
from datetime import datetime
import csv
from dotenv import load_dotenv
from peft import AutoPeftModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("./finetuned_llama3_8B/")

# model = AutoModelForCausalLM.from_pretrained("./finetuned_llama3_8B/", device_map="auto")

  from .autonotebook import tqdm as notebook_tqdm


In [10]:

def predict(model, tokenizer, df):
    pipe = pipeline(
        task="text-generation", 
        model=model, 
        tokenizer=tokenizer,
        max_new_tokens=64,
        # device=2,
        device_map="auto",
        # padding=True,
    )
    output = pd.DataFrame(columns=["emotion", "sentiment"])
    sentiments = []
    emotions = []
    for _, row in df.iterrows():
        prompt = row['text']
        output = pipe(prompt)
        raw = output[0]["generated_text"].split(prompt)[-1]
        llama3_sentiment = extract_label(raw, target_labels=["positive", "negative", "neutral"])
        llama3_emotion = extract_label(raw, target_labels=["happiness", "anger", "disgust", "fear", "sadness", "surprise", "other"])
        sentiments.append(llama3_sentiment)
        emotions.append(llama3_emotion)

    output = pd.DataFrame(
        {
            "emotion": emotions,
            "sentiment": sentiments
        }
    )

    # print(output.describe())
    return output

# def map_func(x):
#     return mapping.get(x, 1)

def evaluate(y_true, y_pred, labels):

    y_true = y_true.tolist()
    y_pred = y_pred.tolist()
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.5f}')
    
    
    for label in labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.5f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred, digits=5)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels)
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [2]:
def generate_emotion_and_sentiment_prompt(text):
    return f"""
            Analyze the emotion and sentiment of the text enclosed in angle brackets. 
            For emotion, determine if it is happiness, anger, disgust, fear, sadness, surprise or other emotion.
            For sentiment, determine if it is happiness, anger, disgust, fear, sadness, surprise or other emotion.
            Return the answer as "emotion" "sentiment" where emotion is from the corresponding emotion label "happiness" or "anger" or "disgust" 
            or "fear" or "sadness" or "surprise" or "other"; and sentiment is from the corresponding sentiment label "positive" or "neutral" or "negative"; 
            emotion followed by sentiment, separated by a space.
            
            For example:
            <You’ve had over a month to get this finalized ! Why are things delayed ?> = anger negative
            <WOW! Drone Delivery Startup, @zipline Raises $25m To Expand Its Operations In Africa> = surprise positive
            <The environment can and has survived much hotter conditions.> = other neutral

            <{text}> = """.strip()

datafile = "data/drone/responses/all_tweets_full_responses.csv"
df = pd.read_csv(datafile, encoding="utf-8", encoding_errors="replace")

emotions = ["happiness", "anger", "disgust", "fear", "sadness", "surprise", "other"]
sentiments = ["positive", "negative", "neutral"]
sent_mapping = {'positive': 2, 'neutral': 1, 'negative': 0}
emotion_mapping = {"happiness":0, "anger":1, "disgust":2, "fear":3, "sadness":4, "surprise":5, "other":6}

df["emotion"] = df["voted_emotion"]
df["sentiment"] = df["voted_sentiment"]
df = df[["text","emotion","sentiment"]]
df["text"] = df["text"].map(generate_emotion_and_sentiment_prompt)
df_train, df_test = train_test_split(df, test_size=0.7, random_state=88, stratify=df['emotion'])

emotion_df = df_train.rename(columns={"emotion": "label"})
sentiment_df = df_train.rename(columns={"sentiment": "label"})

emotion_dataset = Dataset.from_pandas(emotion_df).remove_columns("__index_level_0__")
sentiment_dataset = Dataset.from_pandas(sentiment_df).remove_columns("__index_level_0__")

In [7]:
emotion_df.to_csv("data/drone/tweets_emotion_train.csv")
sentiment_df.to_csv("data/drone/tweets_sentiment_train.csv")

In [9]:
import pandas as pd
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split

# Load a dataset from the Hugging Face Hub
datafile = "data/drone/responses/all_tweets_full_responses.csv"
df = pd.read_csv(datafile, encoding="utf-8", encoding_errors="replace")

df["emotion"] = df["voted_emotion"]
df["sentiment"] = df["voted_sentiment"]
df = df[["text","emotion","sentiment"]]
emotion_df = df.rename(columns={"emotion": "label"})
sentiment_df = df.rename(columns={"sentiment": "label"})

emotion_df_train, emotion_df_test = train_test_split(emotion_df, test_size=0.7, random_state=88, stratify=df['emotion'])
sentiment_df_train, sentiment_df_test = train_test_split(sentiment_df, test_size=0.7, random_state=88, stratify=df['sentiment'])

emotion_ds_train = Dataset.from_pandas(emotion_df_train).remove_columns("__index_level_0__")
emotion_ds_test = Dataset.from_pandas(emotion_df_test).remove_columns("__index_level_0__")
sentiment_ds_train = Dataset.from_pandas(sentiment_df_train).remove_columns("__index_level_0__")
sentiment_ds_test = Dataset.from_pandas(sentiment_df_test).remove_columns("__index_level_0__")

emotion_ds = DatasetDict({
    'train': emotion_ds_train,
    'test': emotion_ds_test
})

sentiment_ds = DatasetDict({
    'train': sentiment_ds_train,
    'test': sentiment_ds_test
})

In [11]:
sentiment_ds

DatasetDict({
    train: Dataset({
        features: ['text', 'emotion', 'label'],
        num_rows: 750
    })
    test: Dataset({
        features: ['text', 'emotion', 'label'],
        num_rows: 1751
    })
})

In [13]:
df['emotion'].value_counts()

emotion
other        2245
disgust        68
surprise       59
happiness      50
anger          47
fear           26
sadness         6
Name: count, dtype: int64

In [3]:
df_test['emotion'].value_counts()

emotion
other        1572
disgust        48
surprise       41
happiness      35
anger          33
fear           18
sadness         4
Name: count, dtype: int64

In [12]:
preds = predict(model, tokenizer, df_test)

evaluate(df_test['emotion'], preds['emotion'], emotions)
evaluate(df_test['sentiment'], preds['sentiment'], sentiments)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Accuracy: 0.35556
Accuracy for label happiness: 0.00000
Accuracy for label anger: nan
Accuracy for label disgust: 0.00000
Accuracy for label fear: nan
Accuracy for label sadness: nan
Accuracy for label surprise: nan
Accuracy for label other: 0.42105

Classification Report:
              precision    recall  f1-score   support

       anger    0.00000   0.00000   0.00000         0
     disgust    0.00000   0.00000   0.00000         2
   happiness    0.00000   0.00000   0.00000         5
       other    0.84211   0.42105   0.56140        38
    surprise    0.00000   0.00000   0.00000         0

    accuracy                        0.35556        45
   macro avg    0.16842   0.08421   0.11228        45
weighted avg    0.71111   0.35556   0.47407        45


Confusion Matrix:
[[ 0  1  0  0  0  1  3]
 [ 0  0  0  0  0  0  0]
 [ 0  2  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0 12  0  0  0 10 16]]
Accuracy: 0.62222
Accuracy for label positive: 0.

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
from datasets import Dataset, load_dataset

ds = load_dataset("csv", data_files="./data/drone/masked_all_tweets.csv", split="train")
def transform_text(example):
    example['text'] = generate_emotion_and_sentiment_prompt(example['text'])
    return example
# ds = ds.map(transform_text)
# ds.map(generate_emotion_and_sentiment_prompt)