GoEmotions

Combined Datasets

In [None]:
using_colab = True

if using_colab:
  !pip install -q condacolab
  import condacolab
  condacolab.install()

# For Localhost
if not using_colab:
  !conda create -n st python pandas tqdm
  !conda activate st
# End For Localhost

!pip install transformers==4.30.2
!pip install simpletransformers
#!pip install --upgrade simpletransformers
#!conda install pytorch pytorch-cuda=11.7 -c pytorch -c nvidia

⏬ Downloading https://github.com/jaimergp/miniforge/releases/download/24.11.2-1_colab/Miniforge3-colab-24.11.2-1_colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:18
🔁 Restarting kernel...
Collecting transformers==4.30.2
  Downloading transformers-4.30.2-py3-none-any.whl.metadata (113 kB)
Collecting filelock (from transformers==4.30.2)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers==4.30.2)
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Collecting numpy>=1.17 (from transformers==4.30.2)
  Downloading numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting pyyaml>=5.1 (from transformers==4.30.2)
  Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers==4.30.2)
  Downloading regex-2024.11.6-cp

Project Objective:

**Run emotion prediction on movie synopsis. Train a new model to associate the predicted emotions to the genre provided.
We then attempt to predict genres given the predicted emotions for the test data.**

Movies Database: https://www.kaggle.com/datasets/thedevastator/rotten-tomatoes-top-movies-ratings-and-technical/data


In [None]:
import condacolab
condacolab.check()

✨🍰✨ Everything looks OK!


In [None]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import torch

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
# Map label to emotion.
emotions_list = [
    "admiration",
    "amusement",
    "anger",
    "annoyance",
    "approval",
    "caring",
    "confusion",
    "curiosity",
    "desire",
    "disappointment",
    "disapproval",
    "disgust",
    "embarrassment",
    "excitement",
    "fear",
    "gratitude",
    "grief",
    "joy",
    "love",
    "nervousness",
    "optimism",
    "pride",
    "realization",
    "relief",
    "remorse",
    "sadness",
    "surprise",
    "neutral"
]

emotions_dict = {}
emotion_to_label = {}
for index, emotion in enumerate(emotions_list):
    emotions_dict[index] = emotion
    emotion_to_label[emotion] = index

print(emotions_dict)
print(emotion_to_label)

{0: 'admiration', 1: 'amusement', 2: 'anger', 3: 'annoyance', 4: 'approval', 5: 'caring', 6: 'confusion', 7: 'curiosity', 8: 'desire', 9: 'disappointment', 10: 'disapproval', 11: 'disgust', 12: 'embarrassment', 13: 'excitement', 14: 'fear', 15: 'gratitude', 16: 'grief', 17: 'joy', 18: 'love', 19: 'nervousness', 20: 'optimism', 21: 'pride', 22: 'realization', 23: 'relief', 24: 'remorse', 25: 'sadness', 26: 'surprise', 27: 'neutral'}
{'admiration': 0, 'amusement': 1, 'anger': 2, 'annoyance': 3, 'approval': 4, 'caring': 5, 'confusion': 6, 'curiosity': 7, 'desire': 8, 'disappointment': 9, 'disapproval': 10, 'disgust': 11, 'embarrassment': 12, 'excitement': 13, 'fear': 14, 'gratitude': 15, 'grief': 16, 'joy': 17, 'love': 18, 'nervousness': 19, 'optimism': 20, 'pride': 21, 'realization': 22, 'relief': 23, 'remorse': 24, 'sadness': 25, 'surprise': 26, 'neutral': 27}


In [None]:
# Map label to ekman.
ekman_emotions_map = {
    "anger": ["anger", "annoyance", "disapproval"],
    "disgust": ["disgust"],
    "fear": ["fear", "nervousness"],
    "joy": ["joy", "amusement", "approval", "excitement", "gratitude",  "love", "optimism", "relief", "pride", "admiration", "desire", "caring", "neutral"],
    "sadness": ["sadness", "disappointment", "embarrassment", "grief",  "remorse"],
    "surprise": ["surprise", "realization", "confusion", "curiosity"]
}

label_to_ekman = {}

for index, emotion in emotions_dict.items():
    for simplified_emotion, emotion_list in ekman_emotions_map.items():
        if emotion in emotion_list:
            label_to_ekman[index] = simplified_emotion
            break
    else:
        # Deal with 'neutral'.
        label_to_ekman[index] = "neutral"

print(label_to_ekman)

{0: 'joy', 1: 'joy', 2: 'anger', 3: 'anger', 4: 'joy', 5: 'joy', 6: 'surprise', 7: 'surprise', 8: 'joy', 9: 'sadness', 10: 'anger', 11: 'disgust', 12: 'sadness', 13: 'joy', 14: 'fear', 15: 'joy', 16: 'sadness', 17: 'joy', 18: 'joy', 19: 'fear', 20: 'joy', 21: 'joy', 22: 'surprise', 23: 'joy', 24: 'sadness', 25: 'sadness', 26: 'surprise', 27: 'joy'}


In [None]:
# Map ekman to new label.
ekman_emotions_to_label = {
    "anger": 0,
    "disgust": 1,
    "fear": 2,
    "joy": 3,
    "sadness": 4,
    "surprise": 5
}

ekman_label_to_emotion = {
    0: "anger",
    1: "disgust",
    2: "fear",
    3: "joy",
    4: "sadness",
    5: "surprise"
}

In [None]:
train = pd.read_csv('train.tsv', sep='\t')
test = pd.read_csv('test.tsv', sep='\t')

label_name = 'labels'
data_name = 'text'
labels = [data_name, label_name, 'code']
train = train.set_axis(labels, axis=1)
train = train.drop('code', axis=1)
test = test.set_axis(labels, axis=1)
test = test.drop('code', axis=1)
print(len(train))

# Filter out rows with multiple emotion tags.
# Taken from https://stackoverflow.com/questions/13851535/how-to-delete-rows-from-a-pandas-dataframe-based-on-a-conditional-expression
train = train[train[label_name].map(lambda x: len(x.split(sep=',')) == 1)]
test = test[test[label_name].map(lambda x: len(x.split(sep=',')) == 1)]

print(len(train))

43409
36307


In [None]:
train[label_name] = train[label_name].astype(int) #pd.to_numeric(train[label_name], downcast='integer', errors='coerce')
train = train.dropna()
test[label_name] = train[label_name].astype(int) #pd.to_numeric(test[label_name], downcast='integer', errors='coerce')
test = test.dropna()
print(train.head)
print(test.head)

<bound method NDFrame.head of                                                     text  labels
0      Now if he does off himself, everyone will thin...      27
1                         WHY THE FUCK IS BAYLESS ISOING       2
2                            To make her feel threatened      14
3                                 Dirty Southern Wankers       3
4      OmG pEyToN iSn'T gOoD eNoUgH tO hElP uS iN tHe...      26
...                                                  ...     ...
43404  Added you mate well I’ve just got the bow and ...      18
43405  Always thought that was funny but is it a refe...       6
43406  What are you talking about? Anything bad that ...       3
43407            More like a baptism, with sexy results!      13
43408                                    Enjoy the ride!      17

[36307 rows x 2 columns]>
<bound method NDFrame.head of                                                    text  labels
0       It's wonderful because it's awful. At not with.    27.0
1    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test[label_name] = train[label_name].astype(int) #pd.to_numeric(test[label_name], downcast='integer', errors='coerce')


In [None]:
args = ClassificationArgs(
    save_best_model=True,
    overwrite_output_dir=True,
    output_dir="./output"
)

model = ClassificationModel(
    "bert", #"roberta" | "bert"
    "google-bert/bert-base-uncased", #"SamLowe/roberta-base-go_emotions", #"google-bert/bert-base-uncased",
    num_labels=28,
    args=args,
    use_cuda = torch.cuda.is_available()
)
model.train_model(train)

model_untrained = ClassificationModel(
    "bert", #"roberta" | "bert"
    "google-bert/bert-base-uncased", #"SamLowe/roberta-base-go_emotions", #"google-bert/bert-base-uncased",
    num_labels=28,
    args=args,
    use_cuda = torch.cuda.is_available()
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

  0%|          | 0/72 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 1:   0%|          | 0/4539 [00:00<?, ?it/s]

  with amp.autocast():
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Untrained Model Results
untrained_predictions_bert, _ = model_untrained.predict(test['text'].tolist())

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/39 [00:00<?, ?it/s]

  with amp.autocast():


In [None]:
def print_results(predictions, test_answers):
  accuracy = accuracy_score(test_answers, predictions)
  precision, recall, f1, _ = precision_recall_fscore_support(test_answers, predictions)
  print(f"Accuracy: {accuracy:.4f}")
  print(f"Precision: {precision.mean():.4f}, Recall: {recall.mean():.4f}, F1-Score: {f1.mean():.4f}")

In [None]:
# Predict
predictions, _ = model.predict(test['text'].tolist())

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/39 [00:00<?, ?it/s]

In [None]:
# Untrained Bert Results
print("Bert Untrained Results")
print_results(untrained_predictions_bert, test['labels'])

Bert Untrained Results
Accuracy: 0.0382
Precision: 0.0118, Recall: 0.0382, F1-Score: 0.0082


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# The reason accuracy is low is because there are 28 classes, most of which are very similar to each other.
# This may not be a big issue for our purposes because we will be using the ratios of emotions to
# predict the genre.
print("Bert Results")
print_results(predictions, test['labels'])

Bert Results
Accuracy: 0.1845
Precision: 0.0364, Recall: 0.0354, F1-Score: 0.0349


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from transformers import pipeline

In [None]:
roberta_pipe = pipeline('text-classification', "SamLowe/roberta-base-go_emotions")

config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
pipe_predictions = roberta_pipe(test['text'].tolist())

In [None]:
print("Roberta Results")
#print(pipe_predictions)
pipe_df_predictions = pd.DataFrame(pipe_predictions)
pipe_df_predictions_as_labels = pipe_df_predictions['label'].map(lambda x: emotion_to_label[x])
print(pipe_df_predictions.head())
print(pipe_df_predictions_as_labels.head())

pipe_results = pd.DataFrame(pipe_df_predictions)
print_results(pipe_df_predictions_as_labels, test['labels'])

Roberta Results
        label     score
0  admiration  0.660624
1    optimism  0.549406
2   gratitude  0.982980
3     neutral  0.868578
4   gratitude  0.989375
0     0
1    20
2    15
3    27
4    15
Name: label, dtype: int64
Accuracy: 0.1562
Precision: 0.0377, Recall: 0.0362, F1-Score: 0.0365


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Simplify Emotions**

In [None]:
def convert_to_ekman(x):
  return ekman_emotions_to_label[label_to_ekman[int(x)]]

In [None]:
# Bert Simplified Results
test_simplified_answers = test['labels'].map(lambda x: convert_to_ekman(x))
untrained_bert_simplified_predictions = pd.Series(untrained_predictions_bert).map(lambda x: convert_to_ekman(x))
bert_simplified_predictions = pd.Series(predictions).map(lambda x: convert_to_ekman(x))
roberta_simplified_predictions = pipe_df_predictions_as_labels.map(lambda x: convert_to_ekman(x))

print(test_simplified_answers)
print(untrained_bert_simplified_predictions)
print(bert_simplified_predictions)
print(roberta_simplified_predictions)

0       3
1       0
2       2
3       0
4       5
       ..
5420    0
5421    3
5422    3
5423    3
5425    3
Name: labels, Length: 3821, dtype: int64
0       3
1       5
2       3
3       3
4       5
       ..
3816    3
3817    3
3818    3
3819    5
3820    5
Length: 3821, dtype: int64
0       3
1       3
2       3
3       3
4       3
       ..
3816    3
3817    3
3818    3
3819    3
3820    3
Length: 3821, dtype: int64
0       3
1       3
2       3
3       3
4       3
       ..
3816    3
3817    3
3818    3
3819    3
3820    3
Name: label, Length: 3821, dtype: int64


In [None]:
# Test Simplified Results
print("Untrained Bert Results")
print_results(untrained_bert_simplified_predictions, test_simplified_answers)
print("\nBert Results")
print_results(bert_simplified_predictions, test_simplified_answers)
print("\nRoberta Results")
print_results(roberta_simplified_predictions, test_simplified_answers)

Untrained Bert Results
Accuracy: 0.5543
Precision: 0.1783, Recall: 0.1724, F1-Score: 0.1522

Bert Results
Accuracy: 0.5677
Precision: 0.1666, Recall: 0.1671, F1-Score: 0.1660

Roberta Results
Accuracy: 0.5391
Precision: 0.1630, Recall: 0.1626, F1-Score: 0.1628


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Now to predict movie genres based on reviews.

In [None]:
# Load Data
movie_data = pd.read_csv('rotten_tomatoes_top_movies.csv', header=0)
print(movie_data.head)

<bound method NDFrame.head of       Unnamed: 0                              title  year  \
0              0                      Black Panther  2018   
1              1                  Avengers: Endgame  2019   
2              2     Mission: Impossible -- Fallout  2018   
3              3                 Mad Max: Fury Road  2015   
4              4  Spider-Man: Into the Spider-Verse  2018   
...          ...                                ...   ...   
1605        1605                             Priest  2011   
1606        1606                     September Dawn  2006   
1607        1607                   American Outlaws  2001   
1608        1608                          Jonah Hex  2010   
1609        1609                      Texas Rangers  2001   

                                               synopsis  critic_score  \
0     After the death of his father, T'Challa return...            96   
1     Adrift in space with no food or water, Tony St...            94   
2     Ethan Hunt a

In [None]:
# Drop columns.
movie_data = movie_data.drop(["crew", "view_the_collection", "aspect_ratio", "sound_mix", "title", "year",
                              "critic_score", "people_score", "total_reviews", "total_ratings", "original_language",
                              "director", "producer", "writer", "release_date_(theaters)", "release_date_(streaming)",
                              "box_office_(gross_usa)", "runtime", "consensus", "rating", "production_co", "link", "genre", "Unnamed: 0"], axis=1)
print(movie_data.head)

<bound method NDFrame.head of                                                synopsis                type
0     After the death of his father, T'Challa return...  Action & Adventure
1     Adrift in space with no food or water, Tony St...  Action & Adventure
2     Ethan Hunt and the IMF team join forces with C...  Action & Adventure
3     Years after the collapse of civilization, the ...  Action & Adventure
4     Bitten by a radioactive spider in the subway, ...  Action & Adventure
...                                                 ...                 ...
1605  In a society ravaged by centuries of war betwe...             Western
1606  In 1857 Capt. Alexander Fancher leads a wagon ...             Western
1607  After the Civil War ends, Confederate soldiers...             Western
1608  Having cheated death, gunslinger and bounty hu...             Western
1609  Texas, 1875. In a land without justice, where ...             Western

[1610 rows x 2 columns]>


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# Perform train test split.
x_train, x_test, y_train, y_test = train_test_split(movie_data['synopsis'], movie_data['type'], test_size=0.4, stratify=movie_data['type'], random_state=42)
print(x_train)
print(x_test)
print(y_train)
print(y_test)

179     The days of canine superstar Bolt (John Travol...
634     The joys and pitfalls of growing up are seen t...
1487    Gertrude Berg rose to prominence in the 1930s ...
1023    Toby is a divorced father who's trying to make...
1127    Comedian Alvy Singer (Woody Allen) examines th...
                              ...                        
234     Mr. Watanabe suddenly finds that he has termin...
1164    Restless femme fatale Florence Carala (Jeanne ...
551     Director Havana Marking dives into the booming...
949     Legendary jazz musician Clark Terry, who taugh...
651     Paratrooper commander Colonel Mathieu (Jean Ma...
Name: synopsis, Length: 966, dtype: object
818     An adventurous teenager sails out on a daring ...
71      This engaging stop-motion, claymation adventur...
422     Woody (Tom Hanks) is stolen from his home by t...
1130    In this Christmas classic, an old man going by...
216     Geneviève (Catherine Deneuve), a beautiful you...
                             

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
def get_emotions(x, model):
  emotion_dict = {}
  sentences = x.split('. ')
  predictions = model.predict(sentences)
  return predictions

#print(get_emotions(x_train[634], model)[0])

In [None]:
def call_model(strings, model_type: str, use_ekman_labels: bool):
  if model_type == 'bert' or model_type != 'roberta':
    if model_type == 'bert':
      emotions, _ = model.predict(strings)
    else:
      emotions, _ = model_untrained.predict(strings)

    emotions = pd.Series(emotions)

  elif model_type == 'roberta':
    emotions = pd.DataFrame(roberta_pipe(strings))['label'].map(lambda x: emotion_to_label[x])

  if use_ekman_labels:
    return emotions.map(lambda x: label_to_ekman[x]).map(lambda x: ekman_emotions_to_label[x]).tolist()
  else:
    return emotions.tolist()

def get_emotion_vector(x, model_type: str):
  emotions = []

  if model_type == 'bert':
    emotions, _ = get_emotions(x, model)
    return emotions.tolist()

  elif model_type == 'roberta':
    return pd.DataFrame(get_emotions(x, roberta_pipe))['label'].map(lambda x: emotion_to_label[x]).tolist()

  return emotions

def get_emotion_map(data, model_type: str, use_ekman_labels: bool):
  emotion_count = [len(x.split('. ')) for x in data]
  print("Total Sentence Count:", pd.Series(emotion_count).sum())
  print(emotion_count)

  split_paragraphs = []
  for paragraph in data:
    for sentence in paragraph.split('. '):
      split_paragraphs.append(sentence)

  emotions = call_model(split_paragraphs, model_type=model_type, use_ekman_labels=use_ekman_labels)
  print(len(emotions))
  print(emotions)

  vectors = []

  i = 0
  emotions_counted = 0
  while i < len(emotion_count):
    emotion_counts = {}

    if use_ekman_labels:
      emotion_counts = {
          0: 0,
          1: 0,
          2: 0,
          3: 0,
          4: 0,
          5: 0,
      }
    else:
      for k in range(28):
        emotion_counts[k] = 0

    emotions_total = emotion_count[i]
    #print("Paragraph Sentence Count:", emotions_total)
    for j in range(emotions_total):
      #print("Total Emotions Counted:", emotions_counted)
      label = emotions[emotions_counted]

      # For emotion vector normalization.
      emotion_counts[label] += 1 / emotions_total
      emotions_counted += 1

    vectors.append(emotion_counts)
    #print(emotion_counts)
    i += 1

  return vectors




In [None]:
# Genre Map
unique_genres = movie_data['type'].unique()
print(unique_genres)

genre_to_label = {}
label_to_genre = {}

for label, genre in enumerate(unique_genres):
  genre_to_label[genre] = label
  label_to_genre[label] = genre

print(genre_to_label)
print(label_to_genre)

['Action & Adventure' 'Animation' 'Art House & International' 'Classics'
 'Comedy' 'Documentary' 'Drama' 'Horror' 'Kids & Family'
 'Musical & Performing Arts' 'Mystery & Suspense' 'Romance'
 'Science Fiction & Fantasy' 'Special Interest' 'Sports & Fitness'
 'Television' 'Western']
{'Action & Adventure': 0, 'Animation': 1, 'Art House & International': 2, 'Classics': 3, 'Comedy': 4, 'Documentary': 5, 'Drama': 6, 'Horror': 7, 'Kids & Family': 8, 'Musical & Performing Arts': 9, 'Mystery & Suspense': 10, 'Romance': 11, 'Science Fiction & Fantasy': 12, 'Special Interest': 13, 'Sports & Fitness': 14, 'Television': 15, 'Western': 16}
{0: 'Action & Adventure', 1: 'Animation', 2: 'Art House & International', 3: 'Classics', 4: 'Comedy', 5: 'Documentary', 6: 'Drama', 7: 'Horror', 8: 'Kids & Family', 9: 'Musical & Performing Arts', 10: 'Mystery & Suspense', 11: 'Romance', 12: 'Science Fiction & Fantasy', 13: 'Special Interest', 14: 'Sports & Fitness', 15: 'Television', 16: 'Western'}


In [None]:
x_train_strings = x_train.astype(str).tolist()
y_train_labels = y_train.astype(str).map(lambda x: genre_to_label[x]).tolist()
x_test_strings = x_test.astype(str).tolist()
y_test_labels = y_test.astype(str).map(lambda x: genre_to_label[x]).tolist()

print(x_train_strings)
print(y_train_labels)
print(x_test_strings)
print(y_test_labels)

[1, 6, 15, 10, 11, 16, 12, 13, 14, 11, 9, 7, 11, 16, 16, 3, 2, 2, 7, 0, 8, 11, 7, 3, 4, 7, 0, 9, 9, 10, 1, 11, 8, 3, 12, 11, 13, 1, 0, 11, 15, 5, 4, 7, 2, 16, 13, 13, 3, 13, 15, 12, 16, 16, 0, 10, 5, 15, 0, 1, 0, 7, 10, 5, 8, 10, 10, 3, 9, 9, 3, 10, 12, 7, 15, 11, 3, 0, 9, 16, 13, 0, 11, 9, 13, 2, 9, 6, 5, 8, 14, 1, 6, 9, 0, 3, 6, 4, 6, 2, 16, 13, 5, 2, 1, 4, 13, 0, 8, 4, 13, 7, 7, 7, 7, 0, 10, 3, 15, 8, 11, 10, 11, 11, 11, 7, 16, 3, 2, 10, 10, 3, 2, 1, 9, 1, 2, 2, 7, 13, 4, 4, 15, 0, 16, 9, 0, 1, 15, 3, 14, 16, 10, 10, 15, 2, 1, 0, 9, 10, 12, 9, 10, 3, 13, 13, 2, 4, 16, 9, 16, 11, 7, 3, 4, 5, 6, 7, 16, 9, 12, 1, 2, 0, 8, 13, 11, 14, 3, 15, 15, 8, 6, 4, 11, 2, 2, 4, 5, 4, 7, 8, 8, 12, 1, 5, 14, 0, 3, 12, 0, 6, 15, 10, 12, 4, 10, 7, 1, 10, 16, 6, 4, 5, 12, 4, 11, 1, 9, 2, 9, 11, 11, 7, 12, 12, 16, 12, 5, 13, 7, 10, 3, 1, 12, 10, 5, 2, 14, 10, 0, 16, 15, 10, 6, 10, 3, 2, 4, 4, 10, 15, 12, 9, 2, 16, 0, 12, 10, 6, 13, 11, 4, 12, 10, 1, 1, 10, 0, 7, 8, 5, 9, 7, 0, 13, 8, 8, 14, 15, 5, 11, 8

In [None]:
bert_train_emotions, _ = model.predict(x_train_strings)
bert_train_emotions_complex = bert_train_emotions

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  with amp.autocast():


In [None]:
bert_untrained_emotions, _ = model_untrained.predict(x_train_strings)
bert_untrained_emotions_complex = bert_untrained_emotions

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
roberta_train_emotions = pd.DataFrame(roberta_pipe.predict(x_train_strings))
roberta_train_emotions_complex = roberta_train_emotions['label'].map(lambda x: emotion_to_label[x]).tolist()

In [None]:
print(bert_train_emotions_complex)
print(bert_untrained_emotions_complex)
print(roberta_train_emotions_complex)

[27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 20, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 25, 27, 0, 27, 27, 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 20, 14, 27, 27, 27, 27, 27, 27, 27, 14, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 14, 25, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 0, 27, 14, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 25, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 25, 27, 27, 27, 27, 27, 27, 27, 25, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 25, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 14, 27, 27, 27, 25, 27, 27, 27, 27, 25, 27, 27, 27, 27, 27, 27, 

In [None]:
# Get Emotions
bert_simplified_train_emotions = pd.Series(bert_train_emotions).map(lambda x: convert_to_ekman(x))
print(bert_simplified_train_emotions.head)
bert_untrained_simplified_train_emotions = pd.Series(bert_untrained_emotions).map(lambda x: convert_to_ekman(x))
print(bert_untrained_simplified_train_emotions.head)
roberta_simplified_train_emotions = roberta_train_emotions['label'].map(lambda x: convert_to_ekman(emotion_to_label[x]))
print(roberta_simplified_train_emotions.head)

<bound method NDFrame.head of 0      3
1      3
2      3
3      3
4      3
      ..
961    3
962    3
963    3
964    3
965    3
Length: 966, dtype: int64>
<bound method NDFrame.head of 0      5
1      5
2      5
3      5
4      5
      ..
961    5
962    5
963    5
964    5
965    5
Length: 966, dtype: int64>
<bound method NDFrame.head of 0      3
1      3
2      3
3      3
4      3
      ..
961    4
962    3
963    5
964    3
965    3
Name: label, Length: 966, dtype: int64>


In [None]:
# Turn into 2D arrays.
train_emotions_data_bert_trained = [list(x.values()) for x in get_emotion_map(x_train_strings, 'bert', True)]
train_emotions_data_bert_untrained = [list(x.values()) for x in get_emotion_map(x_train_strings, 'bert untrained', True)]
train_emotions_data_roberta = [list(x.values()) for x in get_emotion_map(x_train_strings, 'roberta', True)]

print(train_emotions_data_bert_trained)
print(train_emotions_data_bert_untrained)
print(train_emotions_data_roberta)

Total Sentence Count: 2737
[4, 4, 3, 5, 2, 3, 4, 1, 1, 1, 3, 3, 2, 4, 3, 2, 1, 3, 4, 2, 4, 3, 2, 2, 4, 4, 3, 3, 1, 1, 4, 4, 3, 3, 5, 3, 1, 3, 3, 3, 1, 3, 3, 1, 2, 4, 1, 3, 4, 1, 2, 3, 3, 3, 3, 3, 2, 3, 2, 2, 4, 6, 3, 1, 2, 1, 4, 2, 1, 2, 3, 3, 1, 3, 4, 3, 2, 3, 1, 5, 1, 3, 4, 3, 3, 3, 1, 3, 4, 3, 2, 3, 4, 3, 3, 4, 1, 4, 4, 2, 3, 1, 1, 3, 4, 3, 2, 2, 3, 3, 1, 1, 3, 2, 4, 2, 3, 5, 1, 4, 3, 3, 3, 1, 4, 1, 4, 3, 3, 3, 4, 4, 3, 1, 3, 3, 2, 5, 1, 4, 4, 4, 2, 4, 5, 5, 1, 1, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 1, 4, 2, 1, 3, 2, 4, 3, 2, 3, 1, 4, 3, 4, 1, 3, 5, 2, 4, 3, 3, 4, 3, 3, 3, 3, 4, 2, 1, 3, 5, 3, 3, 4, 2, 1, 2, 1, 3, 1, 2, 5, 4, 1, 2, 1, 4, 3, 3, 4, 3, 4, 3, 1, 3, 3, 2, 4, 2, 3, 2, 3, 1, 2, 4, 3, 5, 4, 2, 5, 1, 1, 1, 4, 4, 4, 3, 1, 2, 4, 2, 4, 4, 4, 3, 1, 3, 4, 4, 4, 1, 2, 7, 2, 6, 5, 1, 6, 3, 3, 4, 3, 2, 4, 3, 4, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 4, 3, 1, 3, 2, 1, 1, 2, 1, 3, 2, 4, 1, 1, 4, 3, 2, 3, 1, 6, 4, 1, 3, 2, 3, 1, 2, 1, 3, 3, 3, 3, 4, 3, 1, 4, 2, 4, 4, 3, 1, 2, 3, 1, 1, 4, 4, 

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  with amp.autocast():


2737
[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 4, 4, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

2737
[5, 4, 5, 5, 5, 3, 3, 5, 5, 3, 5, 3, 5, 3, 3, 5, 5, 5, 5, 5, 5, 4, 5, 0, 5, 5, 5, 5, 3, 5, 5, 3, 3, 5, 5, 3, 5, 3, 5, 5, 5, 5, 3, 0, 5, 5, 5, 5, 3, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 0, 3, 5, 3, 5, 3, 0, 5, 3, 3, 5, 3, 5, 4, 5, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 0, 0, 5, 0, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 3, 5, 5, 3, 3, 5, 5, 3, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 0, 3, 5, 5, 3, 5, 5, 5, 3, 5, 5, 5, 3, 5, 3, 5, 3, 5, 5, 5, 3, 0, 5, 5, 3, 5, 3, 5, 5, 5, 5, 3, 0, 5, 5, 5, 3, 5, 3, 5, 5, 5, 5, 5, 5, 3, 5, 3, 5, 5, 3, 3, 5, 3, 5, 5, 3, 4, 5, 5, 5, 5, 3, 5, 3, 4, 5, 5, 5, 5, 0, 5, 5, 3, 3, 3, 3, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 3, 3, 5, 5, 5, 5, 5, 3, 4, 5, 5, 3, 5, 0, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 3, 5, 5, 5, 0, 5, 5, 5, 3, 3, 3, 3, 5, 5, 4, 5, 5, 5, 5, 5, 3, 3, 3

In [None]:
train_emotions_data_bert_trained_complex = [list(x.values()) for x in get_emotion_map(x_train_strings, 'bert', False)]
train_emotions_data_bert_untrained_complex = [list(x.values()) for x in get_emotion_map(x_train_strings, 'bert untrained', False)]
train_emotions_data_roberta_complex = [list(x.values()) for x in get_emotion_map(x_train_strings, 'roberta', False)]

print(train_emotions_data_bert_trained_complex)
print(train_emotions_data_bert_untrained_complex)
print(train_emotions_data_roberta_complex)

Total Sentence Count: 2737
[4, 4, 3, 5, 2, 3, 4, 1, 1, 1, 3, 3, 2, 4, 3, 2, 1, 3, 4, 2, 4, 3, 2, 2, 4, 4, 3, 3, 1, 1, 4, 4, 3, 3, 5, 3, 1, 3, 3, 3, 1, 3, 3, 1, 2, 4, 1, 3, 4, 1, 2, 3, 3, 3, 3, 3, 2, 3, 2, 2, 4, 6, 3, 1, 2, 1, 4, 2, 1, 2, 3, 3, 1, 3, 4, 3, 2, 3, 1, 5, 1, 3, 4, 3, 3, 3, 1, 3, 4, 3, 2, 3, 4, 3, 3, 4, 1, 4, 4, 2, 3, 1, 1, 3, 4, 3, 2, 2, 3, 3, 1, 1, 3, 2, 4, 2, 3, 5, 1, 4, 3, 3, 3, 1, 4, 1, 4, 3, 3, 3, 4, 4, 3, 1, 3, 3, 2, 5, 1, 4, 4, 4, 2, 4, 5, 5, 1, 1, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 1, 4, 2, 1, 3, 2, 4, 3, 2, 3, 1, 4, 3, 4, 1, 3, 5, 2, 4, 3, 3, 4, 3, 3, 3, 3, 4, 2, 1, 3, 5, 3, 3, 4, 2, 1, 2, 1, 3, 1, 2, 5, 4, 1, 2, 1, 4, 3, 3, 4, 3, 4, 3, 1, 3, 3, 2, 4, 2, 3, 2, 3, 1, 2, 4, 3, 5, 4, 2, 5, 1, 1, 1, 4, 4, 4, 3, 1, 2, 4, 2, 4, 4, 4, 3, 1, 3, 4, 4, 4, 1, 2, 7, 2, 6, 5, 1, 6, 3, 3, 4, 3, 2, 4, 3, 4, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 4, 3, 1, 3, 2, 1, 1, 2, 1, 3, 2, 4, 1, 1, 4, 3, 2, 3, 1, 6, 4, 1, 3, 2, 3, 1, 2, 1, 3, 3, 3, 3, 4, 3, 1, 4, 2, 4, 4, 3, 1, 2, 3, 1, 1, 4, 4, 

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  with amp.autocast():


2737
[27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 20, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 27, 27, 25, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 2, 27, 27, 27, 27, 25, 27, 27, 27, 27, 27, 27, 25, 9, 7, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 18, 27, 27, 27, 27, 27, 27, 27, 20, 27, 27, 14, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 14, 27, 27, 27, 27, 27, 22, 27, 27, 27, 0, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 14, 25, 27, 27, 27, 18, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 0, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 2

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

2737
[26, 25, 26, 26, 26, 1, 1, 26, 7, 5, 7, 1, 26, 1, 1, 26, 7, 26, 26, 26, 26, 16, 26, 2, 26, 26, 26, 26, 1, 26, 7, 1, 1, 26, 26, 1, 26, 1, 26, 26, 26, 26, 1, 2, 26, 26, 26, 26, 1, 26, 1, 26, 26, 26, 26, 26, 26, 26, 26, 7, 26, 26, 26, 26, 26, 26, 1, 1, 1, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 1, 3, 1, 26, 1, 26, 1, 2, 26, 1, 1, 26, 1, 26, 25, 26, 26, 26, 1, 1, 26, 26, 26, 26, 26, 26, 3, 3, 26, 2, 26, 26, 26, 26, 1, 26, 26, 26, 26, 26, 26, 7, 26, 26, 1, 26, 26, 1, 26, 7, 1, 1, 26, 26, 1, 26, 26, 1, 26, 26, 26, 26, 26, 26, 26, 26, 1, 26, 7, 26, 26, 7, 26, 26, 1, 26, 26, 26, 26, 26, 26, 26, 26, 26, 1, 1, 1, 1, 1, 26, 26, 26, 26, 26, 3, 1, 7, 26, 1, 26, 26, 26, 1, 26, 26, 26, 1, 26, 1, 26, 1, 26, 26, 26, 1, 3, 7, 26, 1, 26, 1, 26, 26, 26, 26, 1, 2, 26, 26, 26, 1, 26, 1, 26, 26, 7, 26, 26, 7, 1, 26, 1, 26, 26, 1, 1, 26, 1, 26, 26, 1, 25, 26, 26, 26, 26, 1, 26, 1, 16, 26, 26, 26, 26, 3, 26, 26, 1, 1, 1, 1, 7, 26, 26, 26, 1, 26, 26, 26, 26, 26, 26, 7, 1, 26, 26, 26, 26, 1,

In [None]:
test_emotions_data_bert_trained = [list(x.values()) for x in get_emotion_map(x_test_strings, 'bert', True)]
test_emotions_data_bert_untrained = [list(x.values()) for x in get_emotion_map(x_test_strings, 'bert untrained', True)]
test_emotions_data_roberta = [list(x.values()) for x in get_emotion_map(x_test_strings, 'roberta', True)]

print(test_emotions_data_bert_trained)
print(test_emotions_data_bert_untrained)
print(test_emotions_data_roberta)

Total Sentence Count: 1851
[4, 3, 2, 3, 4, 4, 1, 4, 3, 3, 3, 5, 2, 4, 3, 3, 3, 5, 2, 1, 3, 3, 3, 4, 4, 3, 3, 3, 4, 3, 1, 3, 3, 3, 2, 5, 1, 3, 1, 1, 1, 2, 2, 2, 3, 3, 5, 3, 3, 5, 4, 3, 3, 3, 5, 3, 3, 3, 4, 4, 5, 4, 3, 4, 3, 2, 1, 3, 1, 2, 1, 4, 1, 1, 3, 4, 3, 1, 4, 4, 3, 3, 4, 3, 3, 1, 3, 5, 3, 3, 3, 3, 2, 3, 1, 3, 4, 3, 4, 1, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 3, 5, 3, 1, 4, 3, 3, 3, 4, 3, 4, 2, 3, 2, 2, 3, 3, 3, 4, 2, 1, 1, 4, 1, 4, 5, 5, 4, 3, 1, 2, 1, 2, 6, 3, 2, 1, 6, 3, 4, 5, 2, 5, 4, 3, 5, 2, 7, 3, 1, 3, 1, 3, 3, 2, 3, 4, 3, 4, 4, 3, 3, 2, 3, 5, 2, 1, 1, 2, 2, 4, 4, 2, 3, 2, 3, 2, 3, 1, 1, 4, 4, 2, 2, 3, 4, 3, 2, 1, 4, 4, 4, 2, 1, 5, 4, 1, 2, 3, 3, 1, 1, 1, 1, 3, 2, 2, 3, 1, 1, 4, 2, 1, 4, 3, 2, 4, 4, 4, 4, 2, 1, 7, 1, 1, 3, 6, 4, 2, 1, 3, 3, 6, 1, 3, 3, 2, 3, 3, 3, 4, 1, 3, 3, 3, 2, 2, 4, 2, 1, 3, 2, 3, 4, 3, 4, 1, 1, 3, 4, 2, 3, 3, 3, 5, 1, 2, 4, 5, 4, 4, 1, 1, 2, 4, 3, 5, 3, 2, 2, 3, 3, 3, 1, 3, 1, 3, 4, 4, 4, 4, 5, 1, 4, 2, 6, 2, 2, 4, 3, 1, 3, 2, 1, 1, 4, 3, 3, 3, 2, 4, 4, 4, 

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?it/s]

1851
[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?it/s]

1851
[3, 5, 5, 5, 5, 3, 3, 5, 5, 5, 3, 5, 5, 3, 5, 3, 5, 5, 3, 3, 5, 3, 5, 5, 5, 5, 5, 5, 3, 5, 3, 5, 0, 5, 5, 0, 5, 5, 3, 5, 5, 5, 5, 5, 3, 5, 3, 3, 3, 3, 3, 5, 5, 5, 3, 5, 3, 0, 5, 0, 5, 5, 3, 3, 5, 5, 5, 5, 3, 3, 5, 0, 3, 0, 5, 3, 5, 5, 3, 5, 3, 0, 4, 5, 5, 3, 5, 3, 3, 5, 3, 5, 5, 3, 5, 5, 5, 5, 3, 5, 5, 3, 5, 3, 5, 5, 3, 3, 3, 5, 3, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 3, 3, 3, 0, 3, 5, 3, 0, 5, 3, 5, 5, 5, 5, 3, 5, 3, 4, 0, 3, 5, 5, 3, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 3, 5, 3, 3, 5, 5, 5, 5, 5, 3, 3, 3, 5, 5, 5, 5, 5, 5, 3, 5, 5, 0, 5, 3, 5, 5, 0, 5, 5, 5, 0, 3, 3, 5, 3, 5, 5, 3, 5, 0, 5, 5, 5, 4, 5, 3, 3, 3, 5, 5, 5, 5, 5, 3, 3, 0, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 3, 5, 5, 3, 5, 3, 5, 5, 3, 3, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 3, 3, 5, 3, 5, 0, 5, 5, 3, 3, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 3, 5, 5, 0, 3, 5, 0, 3, 5, 3, 5, 3, 5, 5, 3, 5, 5, 5, 5, 0, 3, 0, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 5, 0, 5, 5, 0, 5, 3, 5

In [None]:
# Complex Models
test_emotions_data_bert_trained_complex = [list(x.values()) for x in get_emotion_map(x_test_strings, 'bert', False)]
test_emotions_data_bert_untrained_complex = [list(x.values()) for x in get_emotion_map(x_test_strings, 'bert untrained', False)]
test_emotions_data_roberta_complex = [list(x.values()) for x in get_emotion_map(x_test_strings, 'roberta', False)]

print(test_emotions_data_bert_trained_complex)
print(test_emotions_data_bert_untrained_complex)
print(test_emotions_data_roberta_complex)

Total Sentence Count: 1851
[4, 3, 2, 3, 4, 4, 1, 4, 3, 3, 3, 5, 2, 4, 3, 3, 3, 5, 2, 1, 3, 3, 3, 4, 4, 3, 3, 3, 4, 3, 1, 3, 3, 3, 2, 5, 1, 3, 1, 1, 1, 2, 2, 2, 3, 3, 5, 3, 3, 5, 4, 3, 3, 3, 5, 3, 3, 3, 4, 4, 5, 4, 3, 4, 3, 2, 1, 3, 1, 2, 1, 4, 1, 1, 3, 4, 3, 1, 4, 4, 3, 3, 4, 3, 3, 1, 3, 5, 3, 3, 3, 3, 2, 3, 1, 3, 4, 3, 4, 1, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 3, 5, 3, 1, 4, 3, 3, 3, 4, 3, 4, 2, 3, 2, 2, 3, 3, 3, 4, 2, 1, 1, 4, 1, 4, 5, 5, 4, 3, 1, 2, 1, 2, 6, 3, 2, 1, 6, 3, 4, 5, 2, 5, 4, 3, 5, 2, 7, 3, 1, 3, 1, 3, 3, 2, 3, 4, 3, 4, 4, 3, 3, 2, 3, 5, 2, 1, 1, 2, 2, 4, 4, 2, 3, 2, 3, 2, 3, 1, 1, 4, 4, 2, 2, 3, 4, 3, 2, 1, 4, 4, 4, 2, 1, 5, 4, 1, 2, 3, 3, 1, 1, 1, 1, 3, 2, 2, 3, 1, 1, 4, 2, 1, 4, 3, 2, 4, 4, 4, 4, 2, 1, 7, 1, 1, 3, 6, 4, 2, 1, 3, 3, 6, 1, 3, 3, 2, 3, 3, 3, 4, 1, 3, 3, 3, 2, 2, 4, 2, 1, 3, 2, 3, 4, 3, 4, 1, 1, 3, 4, 2, 3, 3, 3, 5, 1, 2, 4, 5, 4, 4, 1, 1, 2, 4, 3, 5, 3, 2, 2, 3, 3, 3, 1, 3, 1, 3, 4, 4, 4, 4, 5, 1, 4, 2, 6, 2, 2, 4, 3, 1, 3, 2, 1, 1, 4, 3, 3, 3, 2, 4, 4, 4, 

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?it/s]

1851
[27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 0, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 14, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 14, 20, 20, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 1, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 25, 27, 0, 27, 27, 27, 9, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 25, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 14, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 18, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 18, 27, 27, 27, 27, 27, 27,

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?it/s]

1851
[1, 26, 26, 26, 26, 1, 1, 26, 26, 26, 1, 26, 7, 1, 26, 1, 26, 7, 1, 1, 26, 1, 26, 26, 26, 26, 26, 26, 5, 26, 1, 26, 2, 26, 7, 2, 26, 26, 1, 26, 26, 7, 26, 26, 1, 26, 1, 1, 1, 1, 1, 26, 26, 26, 5, 26, 1, 3, 26, 2, 26, 26, 1, 1, 26, 26, 26, 26, 1, 1, 26, 3, 1, 3, 26, 1, 26, 26, 1, 26, 1, 3, 16, 26, 26, 1, 26, 1, 1, 26, 1, 26, 26, 1, 26, 26, 7, 7, 1, 26, 26, 1, 26, 1, 26, 26, 1, 1, 1, 26, 1, 26, 26, 26, 1, 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, 1, 1, 1, 2, 1, 26, 1, 2, 26, 1, 26, 26, 26, 26, 1, 26, 1, 25, 2, 1, 26, 26, 1, 26, 1, 26, 7, 7, 26, 26, 26, 26, 5, 26, 1, 7, 1, 17, 26, 26, 26, 26, 26, 1, 1, 1, 26, 26, 26, 26, 26, 26, 1, 26, 7, 2, 26, 1, 26, 26, 2, 26, 26, 26, 3, 1, 1, 26, 1, 26, 7, 1, 26, 3, 26, 26, 26, 16, 26, 1, 1, 1, 26, 26, 26, 26, 26, 17, 1, 2, 26, 26, 1, 26, 26, 26, 26, 26, 26, 26, 1, 26, 26, 26, 1, 26, 26, 1, 7, 1, 26, 26, 1, 1, 26, 26, 26, 7, 1, 26, 26, 26, 26, 26, 1, 1, 26, 1, 26, 2, 26, 7, 1, 17, 26, 26, 26, 26, 26, 1, 26, 26, 26, 26, 26, 7, 1, 26, 26, 26, 26, 1, 2

In [None]:
bert_untrained_classifier = MLPClassifier(hidden_layer_sizes=(200), random_state=42, max_iter=1000)
bert_trained_classifier = MLPClassifier(hidden_layer_sizes=(200), random_state=42, max_iter=1000)
roberta_classifier = MLPClassifier(hidden_layer_sizes=(200), random_state=42, max_iter=1000)

In [None]:
bert_untrained_classifier_complex = MLPClassifier(hidden_layer_sizes=(200, 100), random_state=42, max_iter=2000)
bert_trained_classifier_complex = MLPClassifier(hidden_layer_sizes=(200, 100), random_state=42, max_iter=2000)
roberta_classifier_complex = MLPClassifier(hidden_layer_sizes=(200, 100), random_state=42, max_iter=2000)

In [None]:
bert_untrained_classifier.fit(train_emotions_data_bert_untrained, y_train_labels)
bert_trained_classifier.fit(train_emotions_data_bert_trained, y_train_labels)
roberta_classifier.fit(train_emotions_data_roberta, y_train_labels)

In [None]:
bert_untrained_classifier_complex.fit(train_emotions_data_bert_untrained_complex, y_train_labels)
bert_trained_classifier_complex.fit(train_emotions_data_bert_trained_complex, y_train_labels)
roberta_classifier_complex.fit(train_emotions_data_roberta_complex, y_train_labels)

In [None]:
bert_u_predictions = bert_untrained_classifier.predict(test_emotions_data_bert_trained)
bert_t_predictions = bert_trained_classifier.predict(test_emotions_data_bert_untrained)
roberta_predictions = roberta_classifier.predict(test_emotions_data_roberta)

In [None]:
bert_u_predictions_complex = bert_untrained_classifier_complex.predict(test_emotions_data_bert_trained_complex)
bert_t_predictions_complex = bert_trained_classifier_complex.predict(test_emotions_data_bert_untrained_complex)
roberta_predictions_complex = roberta_classifier_complex.predict(test_emotions_data_roberta_complex)

In [None]:
print("\nBert Untrained Simplified")
print_results(bert_u_predictions, y_test_labels)
print("\nBert Trained Simplified")
print_results(bert_t_predictions, y_test_labels)
print("\nRoberta Simplified")
print_results(roberta_predictions, y_test_labels)
print("\nBert Untrained Complex")
print_results(bert_u_predictions_complex, y_test_labels)
print("\nBert Trained Complex")
print_results(bert_t_predictions_complex, y_test_labels)
print("\nRoberta Complex")
print_results(roberta_predictions_complex, y_test_labels)


Bert Untrained Simplified
Accuracy: 0.0637
Precision: 0.0130, Recall: 0.0603, F1-Score: 0.0130

Bert Trained Simplified
Accuracy: 0.0839
Precision: 0.0159, Recall: 0.0794, F1-Score: 0.0238

Roberta Simplified
Accuracy: 0.0776
Precision: 0.1137, Recall: 0.0735, F1-Score: 0.0387

Bert Untrained Complex
Accuracy: 0.0606
Precision: 0.0369, Recall: 0.0574, F1-Score: 0.0117

Bert Trained Complex
Accuracy: 0.0885
Precision: 0.0240, Recall: 0.0838, F1-Score: 0.0332

Roberta Complex
Accuracy: 0.0839
Precision: 0.0954, Recall: 0.0800, F1-Score: 0.0518


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
