# GPT-4: Few-Shot Learning with Prompt Engineering
## ACL 2023 Conference
## WASSA 2023 Shared Task on Empathy, Emotion, and Personality Detection in Interactions
More details [here](https://codalab.lisn.upsaclay.fr/competitions/11167#learn_the_details)

In [1]:
import openai
import os
import re
import numpy as np
import pandas as pd
import time
import pickle
import tiktoken
import backoff
from typing import List
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, multilabel_confusion_matrix
from openai.embeddings_utils import cosine_similarity
from tqdm.autonotebook import tqdm
tqdm.pandas()

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 400)

# to see all env variables:
#for name, value in os.environ.items():
#    print("{0}: {1}".format(name, value))

  from tqdm.autonotebook import tqdm


In [2]:
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
    '''Return number of tokens used in a list of messages for ChatGPT'''
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        #print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model == "gpt-3.5-turbo":
        #print("Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
    elif model == "gpt-4":
        #print("Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.")
        return num_tokens_from_messages(messages, model="gpt-4-0314")
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif model == "gpt-4-0314":
        tokens_per_message = 3
        tokens_per_name = 1
    else:
        raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

In [3]:
# this is just one token
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-0301")
len(encoding.encode('####'))

1

In [24]:
def get_dummy_messages(df_, text_col):
    '''Used to determine num tokens in a text column in a dataframe'''
    return [ {'role': 'user', 'content': ' '.join(df_[text_col].tolist())} ]


# number of samples to randomly sample per category in one iteration (for one data point in dev set)
# (determined experimentally based on GPT-4 context window size) - constant
to_sample = {
    'Sadness': 14, 'Neutral': 11, 'Anger': 7, 'Disgust': 7, 'Fear': 7, 'Hope': 7, 'Surprise': 5, 'Joy': 5,
}

In [5]:
# target variables
label2key = {   
    'Anger':    0,
    'Disgust':  1,
    'Fear':     2,
    'Hope':     3,    
    'Joy':      4,
    'Neutral':  5,
    'Sadness':  6,
    'Surprise': 7,
}
key2label = {v: k for k,v in label2key.items()}
print(key2label)

{0: 'Anger', 1: 'Disgust', 2: 'Fear', 3: 'Hope', 4: 'Joy', 5: 'Neutral', 6: 'Sadness', 7: 'Surprise'}


In [6]:
openai.api_key = os.getenv("OPENAI_API_KEY")
model          = 'gpt-4'
labels_set     = set(label2key.keys())
clean = re.compile(r'[^a-zA-Z ]+')
multi_spaces = re.compile('\s{2,}')
print(labels_set)

{'Anger', 'Fear', 'Joy', 'Surprise', 'Sadness', 'Neutral', 'Hope', 'Disgust'}


In [7]:
def get_target(emotions: List[str])->List[int]:
    '''
        Convert list of strings with categories into list of 0s and 1s with length 8 because there are 8 categories;
        1 in the i-th position means that this essay belongs to the i-th category as in key2label[i]
    '''
    res  = [0]*8
    idxs = [label2key[e] for e in emotions]    
    for idx in idxs:
        res[idx] = 1
    return res

In [8]:
def verify_label(label_):
    '''
       Verify if label_ contains any of the categories
       from the predefined set of labels
    '''
    label_ = clean.sub(' ', label_)
    label_ = multi_spaces.sub(' ', label_).split()
    res    = [i for i in label_ if i in labels_set]
    res    = sorted(list(set(res)))
    return '/'.join(res) if res else None

In [31]:
def verify_num_tokens(model, messages):
    '''Check that there is enough tokens available for a ChatGPT repsonse'''
    num_tokens_tiktoken = num_tokens_from_messages(messages, model)
    if num_tokens_tiktoken > 3500:
        print(f'Number of tokens is {num_tokens_tiktoken} which exceeds 4080')        
        return False
    else:
        return True


@backoff.on_exception(backoff.expo, openai.error.RateLimitError, max_time=10)
def get_response(model, messages, temperature=0, max_tokens=None):
    '''Send request, return reponse'''
    response  = openai.ChatCompletion.create(
        model = model,
        messages = messages,
        temperature = temperature,         # range(0,2), the more the less deterministic / focused
        #top_p = 1,                        # top probability mass, e.g. 0.1 = only tokens from top 10% proba mass
        #n = 1,                            # number of chat completions
        #max_tokens = max_tokens,          # tokens to return
        #stream = False,        
        #stop=None,                        # sequence to stop generation (new line, end of text, etc.)
        )
    content = response['choices'][0]['message']['content'].strip()
    #num_tokens_api = response['usage']['prompt_tokens']
    return content

In [10]:
prompt_one = """
1 - Below you are given examples of essays with categories separated by four hashtags.
2 - Each essay has one or two relevant categories from the following list: \
Sadness, Neutral, Anger, Disgust, Fear, Hope, Surprise, Joy.
3 - Your task is to carefully learn from the examples of essays with categories in order to understand \
what features or words in the essays make them belong to a specific category and then use \
this knowledge to assign the correct relevant category from the above list to the very last essay.
4 - You may add a second category from the above list ONLY AND ONLY IF it is also relevant \
to the very last essay. Do not add the second category if it is not significant.
5 - If there is one relevant category, output just the category and nothing else. If there are two relevant \
categories: sort them alphabetically, concatenate with a forward slash, and output only them and nothing else.

####
"""

print(prompt_one)

# Using followup questions improves the reponse. but ChatGPT can change its mind too easily sometimes
followup = """Are you sure about that? If yes, output the same category; if no, change the category"""
print(followup)


1 - Below you are given examples of essays with categories separated by four hashtags.
2 - Each essay has one or two relevant categories from the following list: Sadness, Neutral, Anger, Disgust, Fear, Hope, Surprise, Joy.
3 - Your task is to carefully learn from the examples of essays with categories in order to understand what features or words in the essays make them belong to a specific category and then use this knowledge to assign the correct relevant category from the above list to the very last essay.
4 - You may add a second category from the above list ONLY AND ONLY IF it is also relevant to the very last essay. Do not add the second category if it is not significant.
5 - If there is one relevant category, output just the category and nothing else. If there are two relevant categories: sort them alphabetically, concatenate with a forward slash, and output only them and nothing else.

####

Are you sure about that? If yes, output the same category; if no, change the category


In [11]:
random_state = 47

# Load data

In [12]:
file1    = 'data/df_train.pkl'
df_train = pd.read_pickle(file1)

file2    = 'data/df_dev.pkl'
df_dev   = pd.read_pickle(file2)

print(df_train.shape, df_dev.shape)

2023-05-09 17:40:47.905271: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


(792, 36) (208, 38)


In [14]:
cols = [ 'essay_clean_spellchecked', 'emotion', ]
with pd.option_context('display.max_colwidth', None):
    display(df_dev[cols].head(25))

Unnamed: 0,essay_clean_spellchecked,emotion
0,"How sad is it that this kind of pain and suffering, and those kind of living conditions still exist today? what a gap we have in society between developed countries and those that aren't. It's crazy to drive around the US and see all the money people spend on pointless things, and then to think about how the people in Haiti are living.",[Sadness]
1,The article is kind of tragic and hits close to home as I am the son of Haitian immigrants. Haiti has a lot of problems that only become exaggerated during natural disasters. I think what the Haitian people really need from the international community is help developing infrastructure so they can address these issues themselves. Foreign aid only acts as a band aid.,[Sadness]
2,"I think that these kinds of stories, are sad, yet inspirational and leave you with kind of a good feeling. Even though his story is sad, it's cool and inspiring/motivational to see that he rose up against his circumstances. That he worked hard to make something of himself and he succeeded in what he wanted to do.",[Sadness]
3,It's crazy that random accidents like this happen everyday. I am not a baseball fan but of course enjoy a baseball game every now and again. I lived and worked in Miami too so I am vaguely familiar with that baseball player who unfortunately passed away. The effort to save him was great but unfortunately bad things seem to happen every day. He was so young too so it makes it worse.,[Neutral]
4,"This story makes me so so sad.... As someone who also grew up in the system, I can strongly relate. It's sad that America has not figured out a better and more safe system to handle kid's without parents or with parents who are unfit. A lot of the times, the system is no better, or even worse than the situation kids were in before, and I think this story is a good example of that.",[Sadness]
5,"After reading the article, my first reaction and feeling is that i feel really bad for the brothers. I feel like people their age should not have to be locked inside a jail cell. They should be out in the world improving themselves and being normal people. It's also really sad for the family members of these brothers as well because they are probably all suffering and worrying.",[Sadness]
6,"I didn't know coal mining had such adverse effects on the surrounding environment. It has basically ruined the lives of the people who live nearby these mines. And the animal populations too, imagine a heard of elephants not able to sustain themselves with the food available and needing to invade human territory...They must really be in a desperate situation.",[Neutral]
7,"This is very sad. I can't imagine having elephants come stampede my house in the middle of the night. What a terrible and sad situation, and these poor people can't even do anything about it. Someone needs to stop the deforestation and stop polluting the air these people breathe, it is not right that they are doing and all for the sake of turning a profit.",[Sadness]
8,"Guys, reading this article really hits home for me. If you or someone you know is having suicidal thoughts, please get help from the available sources. Suicide is no joke and it is a shame when someone does not get the help they need. I've struggled with this for a few years now but I got the help I needed. This woman was not as fortunate.",[Sadness]
9,Hey guys. So I just read this article about Iraqi Christians being persecuted by Muslims in Iraq. I don't understand why people of different religious backgrounds can't get along there. I'm sure it is a cultural thing but it is such unnecessary violence and conflict. It hurts both sides and I wish there was a way we could get them to set aside their differences. But not military action. We don't need another war.,[Neutral]


# Few-Shot Approach 1: concatenate random examples into one long string

## 1a. Divide training set into random chunks
This is done in a reproducible way - the same random_states are used (exact same results are confirmed in multiple runs)

Options:
1. Keep double categories when concatenating examples
2. 1 essay w/1 category (using explode()) before concatenating examples

In [40]:
openai.api_key = os.getenv("OPENAI_API_KEY")
model          = 'gpt-4'

In [43]:
num_iter       = df_dev.shape[0]
max_tokens     = 7300                     # use 7100 with a prompt
text_col       = 'essay_clean'
emotion_col    = 'emotion'
essays_sampled = []
example_dfs    = []

for i in range(num_iter):
    res = []
    for idx, (emo, num) in enumerate(to_sample.items()):
        temp = df_train.copy().explode(emotion_col)
        temp = temp[ ~temp[text_col].isin(essays_sampled) ]
        temp = temp[ temp[emotion_col] == emo ]
        
        # if not enough data to sample, sample from the entire dataframe
        if temp.shape[0] < num:
            temp = df_train.copy().explode(emotion_col)
            temp = temp[ temp[emotion_col] == emo ]

        essays     = temp.sample(n=num, random_state=i)[text_col].tolist()
        df_sampled = df_train[ df_train[text_col].isin(essays) ][[text_col, emotion_col]]
        res.append( df_sampled )
        essays_sampled.extend( essays )

    df_sampled_combined = pd.concat(res).sample(frac=1, random_state=random_state)
    # explode() helps avoide double categories in examples
    df_sampled_combined = df_sampled_combined.explode( emotion_col ).sample(frac=1, random_state=random_state)
    df_sampled_combined = df_sampled_combined.drop_duplicates()
        
    # reduce size to fit into the context window by removing overrepresented categories
    while num_tokens_from_messages( get_dummy_messages(df_sampled_combined, text_col),
                                    model="gpt-3.5-turbo-0301",
                                  ) > max_tokens:
        df_sampled_combined = df_sampled_combined.sample(frac=1, random_state=i)
        size = df_sampled_combined.shape[0]
        df_sampled_combined = df_sampled_combined.head(size-1)
        
        '''
        # old way, works worse after explode()
        overrepresented = ['Sadness', 'Neutral']
        count = 0
        while df_sampled_combined.tail(1)[emotion_col].values[0] not in overrepresented:
            df_sampled_combined = df_sampled_combined.sample(frac=1, random_state=i)
            count += 1
            if count >= 100:
                break
        size = df_sampled_combined.shape[0]
        df_sampled_combined = df_sampled_combined.head(size-1)
        '''
    example_dfs.append( df_sampled_combined )

    print('Size of combined df:', df_sampled_combined.shape )
    num_tokens = num_tokens_from_messages( get_dummy_messages(df_sampled_combined, text_col),
                                           model="gpt-3.5-turbo-0301",
                                         )
    print('Number of tokens:', num_tokens)
    print('Value counts:\n', df_sampled_combined.explode(emotion_col)[emotion_col].value_counts(),sep='')
    print('Index:', sorted(df_sampled_combined.index.tolist()))
    print('\n', '='*75, '\n', sep='')

Size of combined df: (81, 2)
Number of tokens: 7277
Value counts:
Sadness     27
Neutral     10
Fear         9
Anger        9
Disgust      8
Hope         7
Surprise     6
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [29, 32, 32, 35, 37, 38, 85, 90, 107, 111, 171, 177, 182, 216, 216, 238, 246, 246, 249, 253, 253, 295, 295, 302, 336, 342, 359, 371, 371, 373, 373, 383, 397, 400, 415, 426, 430, 436, 436, 478, 478, 479, 479, 494, 500, 527, 527, 557, 561, 561, 573, 574, 588, 593, 593, 596, 602, 617, 617, 620, 621, 645, 658, 681, 691, 692, 692, 694, 694, 714, 714, 725, 725, 729, 731, 740, 746, 746, 751, 751, 752]


Size of combined df: (79, 2)
Number of tokens: 7280
Value counts:
Sadness     22
Neutral     11
Disgust     10
Anger       10
Fear         9
Hope         7
Surprise     5
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [21, 94, 145, 156, 157, 157, 181, 181, 187, 190, 215, 224, 227, 239, 239, 289, 299, 319, 322, 324, 346, 347, 355, 355, 366, 390, 391, 

Size of combined df: (79, 2)
Number of tokens: 7287
Value counts:
Sadness     24
Neutral     11
Anger       10
Disgust      9
Fear         8
Hope         7
Surprise     5
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [14, 14, 21, 33, 83, 94, 102, 110, 119, 132, 147, 147, 150, 181, 181, 183, 205, 219, 244, 271, 272, 275, 275, 294, 295, 295, 338, 343, 343, 348, 355, 355, 357, 419, 431, 433, 438, 449, 449, 453, 453, 475, 487, 522, 530, 530, 537, 549, 555, 561, 561, 563, 595, 595, 604, 610, 615, 617, 628, 631, 631, 643, 643, 650, 650, 656, 662, 662, 679, 685, 695, 722, 724, 725, 725, 747, 774, 784, 785]


Size of combined df: (78, 2)
Number of tokens: 7263
Value counts:
Sadness     26
Anger       11
Neutral     11
Fear         7
Disgust      7
Hope         6
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [14, 14, 23, 29, 40, 81, 84, 100, 102, 102, 103, 103, 108, 111, 127, 147, 147, 150, 157, 157, 173, 200, 200, 294, 295, 295, 300, 320, 327, 332

Size of combined df: (83, 2)
Number of tokens: 7251
Value counts:
Sadness     25
Anger       13
Neutral     11
Disgust     10
Fear         9
Hope         7
Surprise     4
Joy          4
Name: emotion_no_2nd_neut, dtype: int64
Index: [29, 32, 32, 55, 63, 80, 80, 157, 163, 165, 197, 197, 217, 217, 225, 227, 234, 242, 248, 259, 272, 279, 279, 286, 289, 307, 325, 343, 343, 355, 355, 371, 371, 373, 381, 393, 408, 412, 436, 436, 449, 449, 453, 453, 454, 480, 501, 501, 523, 529, 533, 536, 536, 537, 538, 538, 561, 561, 567, 580, 593, 593, 624, 629, 650, 650, 662, 674, 674, 694, 694, 725, 740, 740, 745, 746, 746, 751, 764, 773, 773, 778, 778]


Size of combined df: (78, 2)
Number of tokens: 7118
Value counts:
Sadness     23
Neutral     11
Disgust     10
Anger       10
Fear         7
Hope         7
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [7, 19, 34, 35, 59, 74, 101, 101, 104, 112, 130, 152, 158, 192, 192, 207, 220, 235, 239, 239, 276, 276, 292, 295, 295, 331,

Size of combined df: (79, 2)
Number of tokens: 7252
Value counts:
Sadness     25
Anger       12
Neutral     10
Hope         8
Fear         8
Disgust      7
Surprise     5
Joy          4
Name: emotion_no_2nd_neut, dtype: int64
Index: [14, 14, 29, 32, 32, 95, 102, 102, 107, 107, 150, 167, 171, 184, 187, 192, 192, 200, 200, 216, 216, 221, 228, 251, 259, 261, 264, 322, 338, 338, 355, 355, 360, 371, 371, 390, 398, 406, 438, 453, 453, 472, 492, 501, 501, 502, 509, 511, 511, 530, 530, 538, 538, 544, 548, 560, 561, 561, 575, 593, 593, 613, 619, 619, 626, 648, 662, 662, 670, 674, 674, 678, 692, 694, 694, 709, 720, 728, 759]


Size of combined df: (79, 2)
Number of tokens: 7298
Value counts:
Sadness     24
Anger       11
Neutral     11
Fear         8
Surprise     7
Disgust      7
Hope         7
Joy          4
Name: emotion_no_2nd_neut, dtype: int64
Index: [5, 29, 32, 32, 37, 42, 64, 76, 88, 107, 107, 121, 121, 135, 146, 149, 157, 157, 253, 253, 288, 295, 295, 299, 313, 314, 315, 315, 319, 328, 3

Size of combined df: (83, 2)
Number of tokens: 7214
Value counts:
Sadness     25
Anger       14
Neutral     11
Hope         8
Disgust      8
Fear         7
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [2, 29, 32, 32, 35, 38, 77, 94, 101, 101, 107, 107, 108, 122, 144, 148, 149, 149, 157, 157, 166, 187, 196, 200, 200, 207, 239, 239, 247, 254, 277, 278, 293, 300, 314, 319, 338, 338, 347, 350, 352, 356, 358, 358, 375, 375, 383, 436, 436, 447, 447, 449, 449, 453, 453, 483, 489, 495, 523, 527, 527, 530, 530, 538, 538, 555, 561, 561, 577, 579, 621, 621, 622, 622, 639, 654, 668, 681, 704, 710, 739, 770, 785]


Size of combined df: (76, 2)
Number of tokens: 7271
Value counts:
Sadness     23
Neutral     10
Anger       10
Hope        10
Disgust      8
Fear         7
Joy          5
Surprise     3
Name: emotion_no_2nd_neut, dtype: int64
Index: [1, 14, 21, 22, 29, 33, 35, 65, 94, 102, 106, 149, 149, 181, 181, 196, 200, 200, 239, 239, 278, 279, 279, 280, 281, 295, 295,

Size of combined df: (77, 2)
Number of tokens: 7294
Value counts:
Sadness     22
Neutral     11
Anger        9
Fear         9
Disgust      9
Surprise     6
Hope         6
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [0, 0, 11, 11, 15, 19, 21, 65, 88, 111, 112, 130, 139, 141, 181, 181, 187, 191, 225, 233, 253, 254, 259, 295, 295, 302, 306, 324, 326, 338, 338, 347, 383, 396, 402, 438, 443, 445, 463, 475, 486, 492, 500, 518, 527, 527, 535, 538, 538, 546, 546, 556, 556, 577, 580, 622, 622, 627, 633, 670, 672, 681, 699, 705, 705, 708, 720, 725, 725, 734, 748, 748, 751, 751, 762, 773, 773]


Size of combined df: (83, 2)
Number of tokens: 7247
Value counts:
Sadness     24
Anger       12
Disgust     11
Neutral     11
Fear         8
Hope         7
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [0, 0, 6, 21, 29, 32, 32, 36, 51, 66, 70, 74, 111, 135, 152, 155, 157, 157, 173, 177, 187, 220, 240, 240, 252, 252, 259, 268, 272, 294, 296, 296, 313, 313, 3

Size of combined df: (85, 2)
Number of tokens: 7259
Value counts:
Sadness     25
Anger       12
Neutral     11
Disgust     10
Hope         8
Fear         8
Surprise     6
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [19, 21, 29, 38, 85, 96, 101, 101, 108, 128, 133, 139, 139, 148, 164, 191, 200, 200, 215, 224, 241, 253, 253, 280, 286, 295, 295, 313, 313, 328, 329, 338, 338, 347, 366, 383, 402, 406, 442, 455, 475, 477, 477, 478, 478, 500, 507, 511, 511, 518, 536, 536, 538, 538, 542, 542, 559, 561, 561, 564, 593, 593, 600, 619, 619, 629, 629, 631, 631, 674, 674, 694, 694, 712, 720, 731, 734, 734, 738, 743, 743, 751, 751, 760, 767]


Size of combined df: (82, 2)
Number of tokens: 7277
Value counts:
Sadness     26
Neutral     11
Anger       10
Disgust      9
Hope         9
Fear         7
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [29, 32, 32, 80, 80, 84, 99, 100, 102, 102, 107, 107, 111, 125, 128, 129, 137, 138, 139, 139, 157, 157, 165, 174

Size of combined df: (80, 2)
Number of tokens: 7292
Value counts:
Sadness     24
Anger       12
Neutral     10
Disgust      9
Fear         8
Surprise     6
Hope         6
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [0, 0, 11, 11, 13, 21, 35, 37, 64, 146, 149, 149, 157, 157, 181, 193, 219, 225, 240, 240, 252, 252, 299, 318, 321, 321, 321, 338, 338, 343, 343, 346, 349, 355, 355, 413, 429, 441, 448, 449, 459, 466, 466, 475, 480, 499, 500, 530, 530, 536, 536, 555, 561, 561, 566, 575, 595, 609, 617, 622, 622, 626, 631, 643, 643, 648, 667, 682, 683, 692, 692, 731, 735, 738, 743, 743, 750, 773, 773, 776]


Size of combined df: (80, 2)
Number of tokens: 7165
Value counts:
Sadness     23
Neutral     11
Anger       10
Disgust     10
Hope         9
Fear         7
Surprise     5
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [2, 14, 14, 20, 21, 23, 25, 29, 32, 32, 35, 37, 38, 56, 64, 68, 68, 102, 102, 107, 107, 111, 139, 139, 155, 173, 184, 208, 225, 230, 234, 236,

Size of combined df: (85, 2)
Number of tokens: 7146
Value counts:
Sadness     22
Disgust     15
Anger       12
Neutral     11
Hope         8
Fear         7
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [2, 10, 10, 22, 29, 32, 32, 34, 35, 37, 81, 90, 106, 107, 107, 108, 119, 131, 139, 139, 150, 151, 152, 157, 157, 164, 194, 195, 200, 200, 207, 208, 248, 255, 255, 294, 295, 295, 307, 311, 340, 343, 343, 356, 365, 383, 398, 500, 511, 511, 517, 517, 567, 567, 569, 569, 595, 595, 600, 608, 617, 617, 619, 619, 662, 662, 671, 671, 672, 681, 687, 692, 692, 694, 694, 696, 703, 703, 716, 724, 725, 725, 773, 773, 774]


Size of combined df: (78, 2)
Number of tokens: 7173
Value counts:
Sadness     22
Disgust     13
Anger       11
Neutral     10
Hope         6
Fear         6
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [24, 35, 39, 94, 95, 147, 147, 163, 179, 181, 197, 197, 213, 213, 225, 228, 233, 262, 268, 270, 274, 292, 295, 295, 326

Size of combined df: (77, 2)
Number of tokens: 7244
Value counts:
Sadness     23
Neutral     11
Anger       10
Disgust      9
Hope         8
Fear         6
Surprise     5
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [9, 9, 12, 21, 40, 74, 93, 101, 101, 117, 150, 155, 186, 200, 200, 205, 206, 209, 213, 213, 239, 239, 252, 252, 290, 295, 295, 326, 338, 338, 345, 347, 355, 355, 357, 376, 387, 396, 406, 430, 438, 453, 453, 471, 479, 479, 487, 517, 521, 528, 546, 546, 561, 561, 566, 575, 594, 608, 626, 633, 636, 642, 656, 668, 705, 705, 714, 714, 720, 739, 751, 753, 754, 754, 763, 769, 774]


Size of combined df: (76, 2)
Number of tokens: 7264
Value counts:
Sadness     22
Neutral     11
Anger       10
Disgust     10
Hope         8
Fear         7
Surprise     4
Joy          4
Name: emotion_no_2nd_neut, dtype: int64
Index: [34, 37, 55, 94, 117, 126, 139, 145, 152, 155, 157, 157, 164, 171, 181, 181, 182, 192, 192, 194, 209, 226, 239, 239, 241, 250, 253, 253, 254, 295, 295, 296

Size of combined df: (79, 2)
Number of tokens: 7214
Value counts:
Sadness     26
Neutral     10
Disgust      9
Fear         8
Anger        8
Hope         7
Surprise     6
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [9, 9, 14, 14, 19, 28, 32, 32, 34, 35, 63, 64, 92, 99, 107, 107, 126, 136, 152, 187, 212, 216, 216, 220, 223, 277, 295, 295, 338, 338, 347, 355, 371, 380, 406, 409, 427, 438, 500, 502, 521, 523, 530, 530, 534, 552, 570, 589, 589, 596, 621, 621, 631, 631, 650, 650, 658, 662, 662, 681, 685, 692, 692, 693, 696, 697, 699, 703, 703, 704, 705, 725, 725, 736, 742, 749, 749, 777, 784]


Size of combined df: (79, 2)
Number of tokens: 7191
Value counts:
Sadness     24
Neutral     11
Anger       11
Fear        10
Disgust      9
Hope         6
Joy          5
Surprise     3
Name: emotion_no_2nd_neut, dtype: int64
Index: [18, 20, 21, 66, 67, 70, 101, 101, 102, 111, 114, 114, 118, 133, 156, 157, 157, 203, 239, 253, 253, 275, 275, 278, 295, 295, 314, 320, 338, 338, 343, 34

Size of combined df: (83, 2)
Number of tokens: 7209
Value counts:
Sadness     22
Anger       12
Neutral     11
Disgust     11
Fear         9
Hope         8
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [3, 21, 29, 30, 35, 50, 91, 103, 103, 117, 120, 187, 203, 209, 227, 240, 240, 253, 253, 255, 255, 266, 272, 278, 300, 320, 321, 321, 321, 338, 338, 346, 372, 374, 385, 385, 388, 391, 392, 401, 405, 405, 407, 409, 416, 449, 449, 479, 479, 502, 510, 517, 517, 526, 530, 530, 538, 538, 561, 561, 586, 593, 593, 641, 645, 674, 674, 679, 681, 694, 694, 695, 698, 723, 725, 725, 739, 740, 740, 742, 760, 769, 777]


Size of combined df: (81, 2)
Number of tokens: 7175
Value counts:
Sadness     20
Anger       13
Neutral     11
Disgust     11
Hope         8
Fear         8
Surprise     5
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [35, 69, 94, 95, 107, 107, 108, 115, 126, 127, 136, 143, 149, 149, 151, 154, 157, 157, 173, 181, 181, 187, 203, 207, 213, 21

Size of combined df: (83, 2)
Number of tokens: 7273
Value counts:
Sadness     25
Disgust     12
Neutral     11
Anger       11
Hope         7
Surprise     6
Fear         6
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [0, 0, 11, 11, 14, 14, 30, 31, 65, 94, 102, 131, 141, 157, 157, 165, 167, 181, 188, 206, 216, 216, 235, 272, 276, 276, 279, 279, 285, 290, 295, 295, 309, 321, 321, 332, 334, 347, 349, 350, 355, 355, 371, 402, 406, 436, 436, 446, 449, 449, 453, 453, 461, 494, 505, 512, 536, 536, 542, 542, 556, 556, 567, 567, 618, 619, 619, 671, 671, 681, 702, 712, 722, 733, 740, 740, 749, 749, 764, 773, 773, 778, 778]


Size of combined df: (78, 2)
Number of tokens: 7198
Value counts:
Sadness     24
Anger       12
Disgust     10
Neutral      9
Fear         7
Surprise     6
Hope         6
Joy          4
Name: emotion_no_2nd_neut, dtype: int64
Index: [27, 29, 42, 47, 82, 142, 147, 147, 157, 165, 207, 239, 246, 246, 247, 255, 255, 299, 302, 315, 315, 321, 321, 325, 330, 343, 34

Size of combined df: (80, 2)
Number of tokens: 7288
Value counts:
Sadness     23
Neutral     11
Anger       11
Disgust      9
Fear         8
Hope         7
Joy          6
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [14, 14, 29, 32, 32, 35, 58, 75, 87, 89, 102, 102, 107, 107, 111, 132, 157, 160, 164, 180, 180, 187, 201, 207, 214, 216, 216, 226, 236, 239, 239, 240, 240, 252, 252, 253, 253, 286, 295, 295, 306, 310, 321, 321, 321, 343, 343, 371, 371, 383, 388, 416, 436, 436, 438, 462, 469, 476, 538, 538, 541, 561, 561, 582, 597, 599, 601, 610, 612, 629, 629, 643, 643, 656, 663, 700, 715, 721, 767, 787]


Size of combined df: (81, 2)
Number of tokens: 7166
Value counts:
Sadness     24
Anger       11
Neutral     11
Disgust      9
Fear         8
Hope         8
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [0, 0, 10, 10, 11, 11, 13, 21, 30, 32, 32, 34, 35, 55, 64, 78, 84, 107, 107, 117, 143, 152, 155, 197, 197, 209, 210, 216, 216, 230, 231, 235,

Size of combined df: (80, 2)
Number of tokens: 7111
Value counts:
Sadness     24
Neutral     11
Anger       10
Disgust      9
Hope         8
Fear         8
Joy          5
Surprise     5
Name: emotion_no_2nd_neut, dtype: int64
Index: [3, 12, 21, 35, 39, 64, 75, 87, 106, 114, 114, 125, 129, 132, 137, 174, 187, 188, 216, 216, 227, 253, 253, 286, 324, 326, 341, 356, 366, 367, 371, 371, 388, 392, 409, 428, 438, 455, 477, 477, 512, 514, 514, 536, 536, 561, 561, 568, 574, 575, 590, 593, 593, 662, 662, 681, 690, 692, 692, 694, 694, 703, 703, 705, 705, 708, 712, 720, 725, 725, 731, 740, 740, 748, 748, 750, 769, 778, 778, 785]


Size of combined df: (79, 2)
Number of tokens: 7103
Value counts:
Sadness     21
Neutral     11
Disgust     11
Anger        9
Fear         9
Hope         8
Surprise     5
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [5, 21, 26, 62, 66, 74, 75, 94, 98, 116, 121, 121, 135, 149, 149, 158, 181, 181, 207, 213, 213, 239, 239, 247, 253, 253, 262, 271, 272, 295,

Number of tokens: 6808
Value counts:
Sadness     18
Neutral     11
Anger       11
Fear        10
Disgust     10
Hope         8
Surprise     6
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [26, 29, 38, 47, 99, 101, 101, 104, 120, 139, 139, 145, 150, 157, 157, 181, 181, 206, 207, 222, 225, 239, 239, 242, 271, 294, 306, 311, 326, 329, 343, 343, 364, 383, 396, 398, 406, 436, 436, 438, 453, 453, 478, 478, 479, 479, 493, 507, 513, 527, 527, 536, 536, 558, 561, 561, 580, 589, 589, 591, 592, 605, 619, 619, 624, 626, 634, 645, 650, 650, 681, 694, 694, 704, 720, 731, 763, 774, 790]


Size of combined df: (78, 2)
Number of tokens: 6787
Value counts:
Sadness     22
Neutral     11
Anger       11
Disgust     10
Fear         7
Hope         7
Surprise     5
Joy          5
Name: emotion_no_2nd_neut, dtype: int64
Index: [23, 29, 30, 49, 61, 98, 117, 121, 121, 138, 157, 157, 161, 205, 209, 225, 262, 272, 277, 278, 279, 279, 299, 321, 321, 321, 327, 330, 334, 337, 353, 367, 373, 373, 392, 

In [44]:
# smallest number of examples?
from collections import Counter
lengths = [df_.shape[0] for df_ in example_dfs]
print('Mean length:', np.mean(lengths))
c = Counter(lengths)
c.most_common()

Mean length: 80.53846153846153


[(80, 35),
 (79, 28),
 (81, 25),
 (82, 25),
 (78, 24),
 (83, 22),
 (77, 13),
 (85, 12),
 (84, 10),
 (75, 6),
 (86, 5),
 (76, 2),
 (74, 1)]

This means that having 34-35 essays per one set of examples will ensure that the total number of tokens for this set of examples will not exceed the context window size of 4096 tokens

In [48]:
# 3.7k to 3.9k tokens per example
examples  = []
for df_ in example_dfs:
    example = ''    #prompt_one.strip() + '\n'
    for text, emo in df_[[text_col, emotion_col]].values:
        #example += f"\nText: {text}\n\nCategory: {'/'.join(emo)}.\n\n####\n"    #w/out explode()
        example += f"\nEssay: {text}\n\nCategory: {emo}.\n\n####\n"     #w/explode()
    examples.append(example.strip())
        
lengths = [ num_tokens_from_messages([{'role':'user', 'content': example} ]) for example in examples ]
print(lengths)
print([l for l in lengths if l <7800])
print([l for l in lengths if l > 8000])

[7981, 7972, 7964, 7908, 7718, 7462, 7678, 7736, 7911, 7683, 6781, 7003, 7978, 7941, 7891, 7790, 7958, 7953, 7913, 7649, 7948, 7917, 7924, 7980, 7746, 7981, 7807, 7916, 7927, 8042, 7959, 7937, 7942, 7840, 7905, 7336, 7681, 7468, 7825, 7938, 7986, 7612, 7976, 8041, 7986, 7960, 7480, 7952, 7998, 7799, 8016, 7895, 7942, 7934, 7939, 7998, 7871, 7945, 7852, 7698, 7896, 7748, 7560, 7940, 7804, 7234, 7582, 7966, 7976, 7960, 7832, 7956, 7861, 7955, 7613, 7998, 8003, 7150, 7977, 7955, 7972, 7403, 8003, 7998, 7444, 7920, 8033, 7959, 7986, 8033, 7960, 7570, 7913, 7975, 7943, 7718, 7992, 7866, 7899, 7989, 7911, 7930, 7691, 7730, 7887, 7155, 7423, 8000, 7825, 7900, 7863, 7950, 7981, 7541, 7985, 7989, 7753, 7934, 7960, 7983, 7952, 7970, 7977, 7919, 7935, 7903, 7951, 7960, 7939, 7937, 7387, 7923, 7809, 7986, 7980, 7773, 7737, 7904, 7883, 7915, 8021, 7826, 7684, 8038, 7965, 7760, 7757, 7897, 7932, 7935, 8002, 7754, 7933, 7884, 8004, 7525, 7979, 7943, 7775, 8009, 8038, 7216, 7923, 7944, 8008, 7880, 753

In [49]:
print(examples[0])

Essay: I feel really bad for the rangers, who were just doing their job and were treated unmercifully by the villagers. And unfortunately they didn't get any help from the police officers, who ran away. What a horribly frightening experience that must have been. On the other hand, the article is also confusing for me. Why were the villagers so upset? Do they earn their livings from poaching? Were they confused about what was going on? Also, who called for the helicopter? And how did the rangers survive if they were really confronted by all those villagers with weapons. If the story is real then it's frightening and disturbing. But there seems to be a lot of missing information.

Category: Sadness.

####

Essay: Reading about the attack on Paris that happened years ago brought up a lot of bad feelings and thoughts. I had completely forgotton about it because of how often things like that happen. It makes me upset to think that we are becoming numb to terror attacks in a way. i can only 

In [50]:
def classify_text_few_shot(prompt_):
    '''Classify text_ using prompt_ and ChatGPT API'''
        
    # compose messages and check num_tokens
    messages = [
            #{ "role": "system", "content": "You are a helpful emotion classifier.", },
            { "role": "user", "content": prompt_, },
            ]
    #if not verify_num_tokens(model, messages): return None
    label_    = get_response(model, messages)
    old_label = label_
    label_    = verify_label(label_)        # get just the category if response is too long
    print('First iteration:', old_label, label_)
        
    # if label not found in response text - second, extended chat
    if label_ is None:
        messages += [
            { "role": "assistant", "content": old_label, },
            { "role": "user", "content": followup, }
            ]        
        label_    = get_response(model, messages)        
        old_label = label_
        label_    = verify_label(label_)        # get just the category if response is too long
        print('\tSecond iteration:', old_label, label_)
            
    return label_ if label_ is not None else old_label

In [54]:
start  = time.time()
#res    = dict()
count1 = 0
count2 = 0
for t in df_dev[text_col].tolist():
    if t in res:
        continue
    if count2 >= len(examples):
        count2 = 0
    prompt = examples[ count2 ].strip() + f'\nEssay: {t}\n\nCategory:'
    count2 += 1
    try:
        res[ t ] = classify_text_few_shot(prompt)
    except openai.error.RateLimitError:
        print(f'\nText: {t}.\nRate limit error\n')
    except Exception as e:
        print(f'\nText: {t}\nError: {e}\n')
                
    count1 += 1    
    if count1 % 10 == 0:
        print(f'Processing text {count1}; example {count2-1}')
                        
        
elapsed = (time.time() - start)/60
print(f'\nTime elapsed {round(elapsed, 4)} min')

First iteration: Sadness. Sadness
First iteration: Neutral. Neutral
First iteration: Sadness. Sadness
First iteration: Sadness. Sadness
First iteration: Sadness. Sadness
First iteration: Sadness. Sadness

Time elapsed 0.3297 min


In [55]:
len(examples), len(res)

(208, 208)

### If a followup question was not used

In [56]:
# if one label is the output
df_dev['pred_all'] = df_dev[text_col].map( res )
print(df_dev.isna().sum())
print(df_dev['pred_all'].value_counts())

article_id                    0
conversation_id               0
speaker_number                0
essay_id                      0
speaker_id                    0
essay                         0
essay_clean                   0
split                         0
gender                        0
education                     0
race                          0
age                           0
income                        0
emotion                       0
emotion_count                 0
char_length                   0
word_length                   0
target_encoded                0
article                       0
article_clean                 0
essay_clean_docs              0
essay_clean_spellchecked      0
article_clean_docs            0
article_clean_spellchecked    0
compare1                      0
compare2                      0
gpt_embedding                 0
closest_texts                 0
emotion_no_2nd_neut           0
gpt35_keywords                0
gpt35_title                   0
gpt35_su

In [59]:
print(df_dev[ df_dev['pred_all']=="Sarcasm." ])
df_dev.at[ 26, 'pred_all' ] = 'Sadness'
df_dev.at[ 103, 'pred_all' ] = 'Anger'

     article_id  conversation_id  speaker_number  essay_id  speaker_id  \
103         103              497               1       496          67   

                                                 essay  \
103  lol, yeah, definitely an accidental death when...   

                                           essay_clean split  gender  \
103  lol, yeah, definitely an accidental death when...   dev       2   

     education  race  age  income  emotion  emotion_count  char_length  \
103          6     1   25   60000  [Anger]              1          331   

     word_length            target_encoded  \
103           61  [1, 0, 0, 0, 0, 0, 0, 0]   

                                               article  \
103  Death of former Putin aide at D.C. hotel is ru...   

                                         article_clean  \
103  Death of former Putin aide at D.C. hotel is ru...   

                                      essay_clean_docs  \
103  (lol, ,, yeah, ,, definitely, an, accidental, ... 

In [60]:
df_dev['pred_all'].value_counts()

Sadness     104
Disgust      31
Neutral      22
Hope         14
Anger        14
Fear         14
Surprise      5
Joy           4
Name: pred_all, dtype: int64

In [61]:
# binarize predictions
df_dev['pred_encoded'] = df_dev['pred_all'].apply( lambda x: get_target(x.split('/')) )

In [62]:
y_dev_encoded      = np.array( df_dev['target_encoded'].values.tolist() )
y_dev_pred_encoded = np.array( df_dev['pred_encoded'].values.tolist() )
labels = list(label2key.keys())
print( classification_report( y_dev_encoded, y_dev_pred_encoded, target_names=labels, digits=4 ) )

              precision    recall  f1-score   support

       Anger     0.7857    0.2895    0.4231        38
     Disgust     0.4516    0.5833    0.5091        24
        Fear     0.3571    0.6250    0.4545         8
        Hope     0.4286    0.3750    0.4000        16
         Joy     0.2500    0.5000    0.3333         2
     Neutral     0.9091    0.3704    0.5263        54
     Sadness     0.8173    0.8416    0.8293       101
    Surprise     0.2000    0.3333    0.2500         3

   micro avg     0.6875    0.5813    0.6300       246
   macro avg     0.5249    0.4898    0.4657       246
weighted avg     0.7445    0.5813    0.6176       246
 samples avg     0.6875    0.6058    0.6330       246



### If a followup question was used

In [36]:
# if a followup question was used - clean reponse
df_dev['pred'] = df_dev['pred_all'].apply( lambda x: x[3] )
print('Null values:\n', df_dev.isna().sum(), sep='')
df_dev['pred'].value_counts()

Null values:
article_id                    0
conversation_id               0
speaker_number                0
essay_id                      0
speaker_id                    0
essay                         0
essay_clean                   0
split                         0
gender                        0
education                     0
race                          0
age                           0
income                        0
emotion                       0
emotion_count                 0
char_length                   0
word_length                   0
target_encoded                0
article                       0
article_clean                 0
essay_clean_docs              0
essay_clean_spellchecked      0
article_clean_docs            0
article_clean_spellchecked    0
compare1                      0
compare2                      0
pred_all                      0
pred_encoded                  0
pred                          3
dtype: int64


Sadness             54
Fear/Sadness        31
Hope/Sadness        24
Anger/Sadness       23
Disgust/Sadness     19
Anger/Disgust        9
Hope                 8
Fear                 6
Disgust              5
Neutral/Sadness      4
Anger/Fear           4
Hope/Neutral         4
Disgust/Fear         3
Anger                2
Anger/Neutral        2
Neutral              2
Fear/Neutral         1
Disgust/Neutral      1
Sadness/Surprise     1
Joy                  1
Fear/Surprise        1
Name: pred, dtype: int64

In [42]:
# if a followup question was used - review why some predictions were NaNs
temp = df_dev[ df_dev['pred'].isna() ]
print(temp.index)
temp[['pred_all', 'pred']].values.tolist()

Int64Index([150, 167, 173], dtype='int64')


[[('Neutral',
   'Neutral',
   'I think the text is more related to Disappointment.',
   None),
  None],
 [('Sadness',
   'Sadness',
   "Yes, I am sure. The text expresses concern about children not being able to eat and the country's failure to provide basic necessities to its people, which is a sad situation.",
   None),
  None],
 [('Hope/Sadness',
   'Hope/Sadness',
   "Yes, I'm sure. The text expresses sympathy for the situation but also a sense of helplessness, and ends with a hopeful attitude towards the future.",
   None),
  None]]

In [43]:
# if a followup question was used - manually assign missing predictions
df_dev.at[150, 'pred'] = 'Neutral'
df_dev.at[167, 'pred'] = 'Sadness'
df_dev.at[173, 'pred'] = 'Hope/Sadness'

In [61]:
# binarize predictions
df_dev['pred_encoded'] = df_dev['pred'].apply( lambda x: get_target(x.split('/')) )

In [None]:
y_dev_encoded      = np.array( df_dev['target_encoded'].values.tolist() )
y_dev_pred_encoded = np.array( df_dev['pred_encoded'].values.tolist() )
labels = list(label2key.keys())
print( classification_report( y_dev_encoded, y_dev_pred_encoded, target_names=labels, digits=4 ) )

# Few-Shot Approach 2: concatenate closest 30 examples into one long string

In [55]:
openai.api_key = os.getenv("OPENAI_API_KEY")
model          = 'gpt-4'

In [30]:
# find top_n closest df_train embeddings for each df_dev embedding
def batch_cosine(embedding_, df, top_n=100):
    df['similarity'] = df['gpt_embedding'].apply(lambda x: cosine_similarity(x, embedding_))
    return df.sort_values(by='similarity', ascending=False).head(top_n)['essay_clean'].tolist()

df_train_copy = df_train.copy()
start = time.time()
res   = dict()
count = 0
for t, e in df_dev[['essay_clean', 'gpt_embedding']].values:
    if t in res:
        continue
    res[ t ] = batch_cosine( e, df_train_copy, top_n=30, )
    count += 1
    if count % 10 == 0:
        print(f'Processing text {count}. Time elapsed: {round((time.time()-start)/60, 4)} min')
        with open('data/res.pkl', 'wb') as f:
            pickle.dump(res, f, protocol=pickle.HIGHEST_PROTOCOL)
                        
elapsed = (time.time() - start)/60
print(f'\nTime elapsed {round(elapsed, 4)} min')

Processing text 10. Time elapsed: 0.0373 min
Processing text 20. Time elapsed: 0.074 min
Processing text 30. Time elapsed: 0.1105 min
Processing text 40. Time elapsed: 0.1474 min
Processing text 50. Time elapsed: 0.1841 min
Processing text 60. Time elapsed: 0.2211 min
Processing text 70. Time elapsed: 0.2579 min
Processing text 80. Time elapsed: 0.2952 min
Processing text 90. Time elapsed: 0.3326 min
Processing text 100. Time elapsed: 0.3696 min
Processing text 110. Time elapsed: 0.4074 min
Processing text 120. Time elapsed: 0.445 min
Processing text 130. Time elapsed: 0.4825 min
Processing text 140. Time elapsed: 0.5202 min
Processing text 150. Time elapsed: 0.5575 min
Processing text 160. Time elapsed: 0.595 min
Processing text 170. Time elapsed: 0.6326 min
Processing text 180. Time elapsed: 0.671 min
Processing text 190. Time elapsed: 0.709 min
Processing text 200. Time elapsed: 0.747 min

Time elapsed 0.7774 min


In [31]:
df_dev['closest_texts'] = df_dev['essay_clean'].map( res )
print(df_dev.isna().sum())

article_id                    0
conversation_id               0
speaker_number                0
essay_id                      0
speaker_id                    0
essay                         0
essay_clean                   0
split                         0
gender                        0
education                     0
race                          0
age                           0
income                        0
emotion                       0
emotion_count                 0
char_length                   0
word_length                   0
target_encoded                0
article                       0
article_clean                 0
essay_clean_docs              0
essay_clean_spellchecked      0
article_clean_docs            0
article_clean_spellchecked    0
compare1                      0
compare2                      0
gpt_embedding                 0
closest_texts                 0
dtype: int64


In [32]:
file = 'data/df_dev.pkl'
df_dev.to_pickle(file)

In [78]:
df_train.columns

Index(['article_id', 'conversation_id', 'speaker_number', 'essay_id',
       'speaker_id', 'essay', 'essay_clean', 'split', 'gender', 'education',
       'race', 'age', 'income', 'emotion', 'target_encoded', 'target_encoded2',
       'compare', 'emotion_count', 'char_length', 'word_length', 'article',
       'article_clean', 'essay_clean_docs', 'essay_clean_spellchecked',
       'article_clean_docs', 'article_clean_spellchecked', 'compare1',
       'compare2', 'gpt_embedding', 'emotion_no_2nd_neut'],
      dtype='object')

In [79]:
# concatenate 30 closest examples for each dev set datapoint and check the total number of tokens
def concatenate_few_show_examples( closest_texts ):
    ''' Concatenate 30 closest examples with their categories into one string '''
    df_ = df_train[ df_train['essay_clean'].isin(closest_texts) ]
    example = prompt_one
    for text, emo in df_[['essay_clean_spellchecked', 'emotion_no_2nd_neut']].values:
        example += f"\nText: {text}\n\nCategory: {'/'.join(sorted(emo))}.\n\n####\n"
    return example

df_dev['closest_texts_concatenated'] = df_dev['closest_texts'].apply( concatenate_few_show_examples )


# check that # tokens in concatenated exapmles doesn't exceed ChatGPT's context window size of 4096
lengths = [ num_tokens_from_messages([{'role':'user', 'content': example} ]) for example\
            in df_dev['closest_texts_concatenated'].values ]
print(lengths)
print([l for l in lengths if l <3000])
print([l for l in lengths if l > 3500])

[3313, 3442, 3131, 2930, 3297, 2964, 3101, 3057, 3215, 3209, 3411, 3226, 3303, 2836, 3143, 2919, 3094, 3051, 3092, 3041, 3076, 3145, 3198, 2972, 3106, 3383, 3174, 3158, 3301, 3304, 3248, 3578, 3088, 3496, 3239, 3236, 3138, 3503, 3398, 3093, 2961, 3032, 3099, 3198, 2906, 2922, 2897, 3201, 3300, 3176, 3368, 2982, 3358, 2995, 2955, 3116, 3228, 3032, 3006, 3178, 3383, 3193, 3050, 3086, 3406, 3327, 3067, 3011, 3051, 3020, 3368, 3264, 3320, 3393, 3292, 3640, 3348, 3241, 3337, 3383, 3050, 3074, 3570, 3543, 3015, 3259, 3436, 3232, 3119, 3230, 3241, 3207, 3207, 3130, 3416, 3257, 3523, 3125, 3135, 3144, 3122, 3409, 3171, 3130, 3227, 3001, 3258, 2760, 3319, 3220, 3113, 3264, 3271, 3459, 3272, 2979, 3626, 3242, 3096, 3179, 3207, 3096, 2917, 3062, 3294, 3102, 3153, 2955, 2957, 3237, 3366, 3103, 3190, 3295, 3151, 3173, 3144, 3211, 3610, 3163, 3150, 3338, 3077, 3403, 3105, 3223, 3135, 2878, 2907, 3278, 3154, 3326, 3386, 3143, 3435, 3410, 3358, 3086, 3043, 2948, 3133, 3537, 3248, 3098, 3147, 3119, 333

In [80]:
print(df_dev['closest_texts_concatenated'].values[0])


1 - Below you are given examples of texts with their most relevant emotion categories. The examples are separated by four hashtags.
2 - Each text can belong to one or two most relevant emotion categories from the following list: Sadness, Neutral, Anger, Disgust, Fear, Hope, Surprise, Joy.
3 - Your task is to carefully learn from the examples of texts with their categories and then use the acquired knowledge to classify the very last text by selecting the most relevant emotion category from the above list.
4 - You may add a second emotion category from the above list ONLY AND ONLY IF it is also relevant to the very last text.
5 - Output just the category or categories for the last text and nothing else. If there are two relevant emotion categories: sort them alphabetically, concatenate with a forward slash, and output only them and nothing else.

####

Text: It breaks my heart to see people living in those conditions. I hope that all the aid that was sent to the island makes it to the 

In [81]:
start = time.time()
res   = dict()
count = 0
for text, example in df_dev[['essay_clean_spellchecked', 'closest_texts_concatenated']].values:
    if text in res:
        continue
    prompt = example + f'\nText: {text}\n\nCategory:'   
    try:
        res[ text ] = classify_text_few_shot(prompt)
    except Exception as e:
        print(f'\nText: {text}\nError: {e}\n')
                
    count += 1    
    if count % 10 == 0:
        print(f'Processing text {count}')
        with open('data/res.pkl', 'wb') as f:
            pickle.dump(res, f, protocol=pickle.HIGHEST_PROTOCOL)
                        
        
elapsed = (time.time() - start)/60
print(f'\nTime elapsed {round(elapsed, 4)} min')

Frist iteration: Sadness. Sadness
Frist iteration: Hope. Hope
Frist iteration: Hope/Joy. Hope/Joy
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Hope/Sadness. Hope/Sadness
Frist iteration: Anger/Sadness. Anger/Sadness
Processing text 10
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Hope/Sadness. Hope/Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Disgust/Sadness. Disgust/Sadness
Frist iteration: Sadness/Neutral. Neutral/Sadness
Frist iteration: Hope. Hope
Processing text 20
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Sadness. Sadness
Frist iteration: Hope/Sadness. Hope/Sadness
Frist iteration: Sadness. Sadness
Frist iteratio

### If a followup question was not used

In [82]:
# if one label is the output
df_dev['pred_all'] = df_dev['essay_clean_spellchecked'].map( res )
print(df_dev.isna().sum())
print(df_dev['pred_all'].value_counts())

article_id                    0
conversation_id               0
speaker_number                0
essay_id                      0
speaker_id                    0
essay                         0
essay_clean                   0
split                         0
gender                        0
education                     0
race                          0
age                           0
income                        0
emotion                       0
emotion_count                 0
char_length                   0
word_length                   0
target_encoded                0
article                       0
article_clean                 0
essay_clean_docs              0
essay_clean_spellchecked      0
article_clean_docs            0
article_clean_spellchecked    0
compare1                      0
compare2                      0
gpt_embedding                 0
closest_texts                 0
emotion_no_2nd_neut           0
closest_texts_concatenated    0
pred_all                      0
dtype: i

In [45]:
print(df_dev[ df_dev['pred_all']=="Yes, I'm sure. The category is Disappointment."])
df_dev.at[ 54, 'pred_all' ] = 'Fear'
df_dev.at[ 74, 'pred_all' ] = 'Neutral'
df_dev.at[ 26, 'pred_all' ] = 'Sadness'

    article_id  conversation_id  speaker_number  essay_id  speaker_id  \
26         233               93               1        92          71   

                                                essay  \
26  Unfortunately in countries like these the offi...   

                                          essay_clean split  gender  \
26  Unfortunately in countries like these the offi...   dev       1   

    education  race  age  income    emotion  emotion_count  char_length  \
26          6     2   32   35000  [Disgust]              1          369   

    word_length            target_encoded  \
26           72  [0, 1, 0, 0, 0, 0, 0, 0]   

                                              article  \
26  Nigeria investigates reports that officials ra...   

                                        article_clean  \
26  Nigeria investigates reports that officials ra...   

                                     essay_clean_docs  \
26  (Unfortunately, in, countries, like, these, th...   

        

In [83]:
df_dev['pred_all'].value_counts()

Sadness             95
Neutral             19
Hope/Sadness        15
Disgust             14
Anger/Sadness       12
Disgust/Sadness     12
Anger               10
Hope                 7
Fear                 5
Anger/Disgust        3
Neutral/Sadness      3
Anger/Hope           2
Disgust/Hope         2
Disgust/Fear         2
Fear/Sadness         2
Hope/Joy             1
Disgust/Surprise     1
Fear/Hope            1
Sadness/Surprise     1
Fear/Neutral         1
Name: pred_all, dtype: int64

In [84]:
# binarize predictions
df_dev['pred_encoded'] = df_dev['pred_all'].apply( lambda x: get_target(x.split('/')) )

In [85]:
y_dev_encoded      = np.array( df_dev['target_encoded'].values.tolist() )
y_dev_pred_encoded = np.array( df_dev['pred_encoded'].values.tolist() )
labels = list(label2key.keys())
print( classification_report( y_dev_encoded, y_dev_pred_encoded, target_names=labels, digits=4 ) )

              precision    recall  f1-score   support

       Anger     0.5926    0.4211    0.4923        38
     Disgust     0.4412    0.6250    0.5172        24
        Fear     0.3636    0.5000    0.4211         8
        Hope     0.3214    0.5625    0.4091        16
         Joy     0.0000    0.0000    0.0000         2
     Neutral     0.6957    0.2963    0.4156        54
     Sadness     0.6714    0.9307    0.7801       101
    Surprise     0.0000    0.0000    0.0000         3

   micro avg     0.5789    0.6260    0.6016       246
   macro avg     0.3857    0.4169    0.3794       246
weighted avg     0.5957    0.6260    0.5783       246
 samples avg     0.6178    0.6394    0.6106       246



## Results

### Concatenate random examples
Notes:
* When GPT-4 is not given a prompt with a request to add a second emotion if relevant, it tends to output just one emotion (did ChatGPT did the same?)

__Experiment 1__:  
text_col = 'spellchecked', temperature = 0  
No system role, __second Neutral removed__    
_Prompt_:
```
1 - Below you are given examples of essays with categories separated by four hashtags.
2 - Each essay has one or two relevant categories from the following list: Sadness, Neutral, Anger, Disgust, Fear, Hope, Surprise, Joy.
3 - Your task is to carefully learn from the examples of essays with categories in order to understand what features or words in the essays make them belong to a specific category and then use this knowledge to assign the correct relevant category from the above list to the very last essay.
4 - You may add a second category from the above list ONLY AND ONLY IF it is also relevant to the very last essay. Do not add the second category if it is not significant.
5 - If there is one relevant category, output just the category and nothing else. If there are two relevant categories: sort them alphabetically, concatenate with a forward slash, and output only them and nothing else.

####

Are you sure about that? If yes, output the same category; if no, change the category

    + concatented examples (no list of instructions)
```

```
              precision    recall  f1-score   support

       Anger     0.8182    0.2368    0.3673        38
     Disgust     0.5333    0.6667    0.5926        24
        Fear     0.4167    0.6250    0.5000         8
        Hope     0.3333    0.2500    0.2857        16
         Joy     0.0000    0.0000    0.0000         2
     Neutral     0.7222    0.2407    0.3611        54
     Sadness     0.7323    0.9208    0.8158       101
    Surprise     0.5000    0.3333    0.4000         3

   micro avg     0.6620    0.5732    0.6144       246
   macro avg     0.5070    0.4092    0.4153       246
weighted avg     0.6789    0.5732    0.5685       246
 samples avg     0.6683    0.5962    0.6170       246
```

__Experiment 2__:  
text_col = 'spellchecked', temperature = 0  
No system role, __second Neutral removed__    
_Prompt_:  
concatented examples (no list of instructions)
```
              precision    recall  f1-score   support

       Anger     0.7857    0.2895    0.4231        38
     Disgust     0.4516    0.5833    0.5091        24
        Fear     0.3571    0.6250    0.4545         8
        Hope     0.4286    0.3750    0.4000        16
         Joy     0.2500    0.5000    0.3333         2
     Neutral     0.9091    0.3704    0.5263        54
     Sadness     0.8173    0.8416    0.8293       101
    Surprise     0.2000    0.3333    0.2500         3

   micro avg     0.6875    0.5813    0.6300       246
   macro avg     0.5249    0.4898    0.4657       246
weighted avg     0.7445    0.5813    0.6176       246
 samples avg     0.6875    0.6058    0.6330       246
```

## Appendix

In [30]:
# another way to  randomly split the data
X = df_train['essay_clean'].values
y = ['/'.join(i) for i in df_train['emotion'].values]

skf        = StratifiedKFold(n_splits=18, shuffle=True, random_state=random_state)
chunks_idx = [test_index for _, test_index in skf.split(X, y)]
print('Length X:', len(X))
print('Total chunks length:', sum([len(i) for i in chunks_idx]), '\n')

for ch in chunks_idx:
    print(df_train.loc[ch].explode('emotion')['emotion'].value_counts())
    messages = [ {'role': 'user', 'content': ' '.join(df_train.loc[ch]['essay_clean'].tolist())} ]
    num_tokens = num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
    print('Num tokens:', num_tokens)
    print('\n', '='*75, '\n', sep='')

Length X: 792
Total chunks length: 792 

Sadness     22
Neutral     12
Anger        6
Disgust      5
Hope         3
Fear         2
Surprise     1
Name: emotion, dtype: int64
Num tokens: 3480


Sadness     22
Neutral     14
Anger        6
Disgust      4
Surprise     2
Hope         2
Fear         1
Joy          1
Name: emotion, dtype: int64
Num tokens: 3819


Sadness     21
Neutral     14
Anger        5
Disgust      4
Hope         2
Surprise     2
Fear         2
Joy          1
Name: emotion, dtype: int64
Num tokens: 3744


Sadness     21
Neutral     14
Anger        6
Disgust      4
Hope         2
Fear         2
Joy          1
Surprise     1
Name: emotion, dtype: int64
Num tokens: 3946


Sadness     21
Neutral     14
Anger        7
Disgust      5
Hope         2
Fear         2
Surprise     1
Name: emotion, dtype: int64
Num tokens: 3801


Sadness     20
Neutral     13
Anger        8
Disgust      5
Hope         2
Surprise     2
Fear         2
Name: emotion, dtype: int64
Num tokens: 3777


Sa

