In [1]:
import pandas as pd
import numpy as np

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


# Computing and formating "sentiment_features"

## Valder

In [19]:
# Load VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Load the JSON file into a DataFrame
file_path = 'data/sarcasm_data.json'
df = pd.read_json(file_path).transpose()

# Reset the index to turn the first element into a new column
df = df.reset_index()

# Define a function to apply sentiment analysis to a text
def get_sentiment(text):
    return analyzer.polarity_scores(text)

# Apply sentiment analysis to the 'utterance' column
df['sentiment_utterance'] = df['utterance'].apply(get_sentiment)

# Apply sentiment analysis to the 'context' column
df['sentiment_context_all'] = df['context'].apply(get_sentiment)

# Apply sentiment analysis to each sentence in the 'context' column
df['sentiment_context_per_sentence'] = df['context'].apply(lambda context: [get_sentiment(sentence) for sentence in context])

df = df.transpose()
df.to_json('sarcasm_data_vader.json')

In [6]:
def get_data_Valder():
    df = pd.read_json('../preprocessed_data/sarcasm_data_vader.json').transpose()
    return df

In [21]:
# U : Uterance sentiment
# oC : Overall context sentiment
# pC : Per sentence context sentiment

# Add 1 with compound // Add 2 without it

### Uterance sentiments with pos, neu, neg and compound

In [7]:
df = get_data_Valder()

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: list(x.values()))

# Get the keys from the dictionaries and add a prefix
utterance_keys = ['utterance_' + key for key in df['sentiment_utterance'].apply(lambda x: list(x.keys())).iloc[0]]

# Transpose the dataframe to get the orignal json format
df = df.transpose()

df.to_json('sarcasm_data_vader1_U.json')

df.iloc[-1], utterance_keys

(0                            [0.0, 0.783, 0.217, 0.3612]
 1                [0.18, 0.8200000000000001, 0.0, -0.296]
 2                                   [0.0, 1.0, 0.0, 0.0]
 3             [0.058, 0.851, 0.091, 0.42150000000000004]
 4                                   [0.0, 1.0, 0.0, 0.0]
                              ...                        
 685           [0.10200000000000001, 0.898, 0.0, -0.5106]
 686    [0.0, 0.858, 0.14200000000000002, 0.3595000000...
 687    [0.0, 0.763, 0.23700000000000002, 0.4215000000...
 688             [0.0, 0.781, 0.219, 0.42150000000000004]
 689             [0.0, 0.527, 0.47300000000000003, 0.659]
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance_neg', 'utterance_neu', 'utterance_pos', 'utterance_compound'])

### Uterance sentiments with pos, neu, neg

In [23]:
df = get_data_Valder()

def remove_compound(dict):
    return {key: value for key, value in dict.items() if key != 'compound'}

# Apply the function to the 'sentiment_utterance' columns
df['sentiment_utterance'] = df['sentiment_utterance'].apply(remove_compound)
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: list(x.values())) 

# Get the keys from the dictionaries and add a prefix
utterance_keys = ['utterance_' + key for key in df['sentiment_utterance'].apply(lambda x: list(x.keys())).iloc[0]]

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_vader2_U.json')

df.iloc[-1], utterance_keys

(0                    [0.0, 0.783, 0.217]
 1        [0.18, 0.8200000000000001, 0.0]
 2                        [0.0, 1.0, 0.0]
 3                  [0.058, 0.851, 0.091]
 4                        [0.0, 1.0, 0.0]
                      ...                
 685    [0.10200000000000001, 0.898, 0.0]
 686    [0.0, 0.858, 0.14200000000000002]
 687    [0.0, 0.763, 0.23700000000000002]
 688                  [0.0, 0.781, 0.219]
 689    [0.0, 0.527, 0.47300000000000003]
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance_neg', 'utterance_neu', 'utterance_pos'])

### Valder sentiment context overall + utterance with pos, neu, neg and compound

In [24]:
df = get_data_Valder()

# Get the keys from the dictionaries and add a prefix
utterance_keys = ['utterance_' + key for key in df['sentiment_utterance'].apply(lambda x: list(x.keys())).iloc[0]]
context_keys = ['context_' + key for key in df['sentiment_context_all'].apply(lambda x: list(x.keys())).iloc[0]]
keys_list = utterance_keys + context_keys

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df.apply(lambda row: list(row['sentiment_utterance'].values()) + list(row['sentiment_context_all'].values()), axis=1)

# Transpose the dataframe to get the original json format
df = df.transpose()

# Save the DataFrame to a new JSON file
df.to_json('sarcasm_data_vader1_UoC.json')

df.iloc[-1], keys_list

(0        [0.0, 0.783, 0.217, 0.3612, 0.0, 1.0, 0.0, 0.0]
 1      [0.18, 0.8200000000000001, 0.0, -0.296, 0.0, 0...
 2      [0.0, 1.0, 0.0, 0.0, 0.14300000000000002, 0.85...
 3      [0.058, 0.851, 0.091, 0.42150000000000004, 0.0...
 4      [0.0, 1.0, 0.0, 0.0, 0.097, 0.8150000000000001...
                              ...                        
 685    [0.10200000000000001, 0.898, 0.0, -0.5106, 0.0...
 686    [0.0, 0.858, 0.14200000000000002, 0.3595000000...
 687    [0.0, 0.763, 0.23700000000000002, 0.4215000000...
 688    [0.0, 0.781, 0.219, 0.42150000000000004, 0.0, ...
 689    [0.0, 0.527, 0.47300000000000003, 0.659, 0.159...
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance_neg',
  'utterance_neu',
  'utterance_pos',
  'utterance_compound',
  'context_neg',
  'context_neu',
  'context_pos',
  'context_compound'])

### Valder sentiment context overall + utterance with pos, neu, neg 

In [25]:
df = get_data_Valder()

def remove_compound(dict):
    return {key: value for key, value in dict.items() if key != 'compound'}

# Apply the function to the 'sentiment_utterance' and 'sentiment_context_all' columns
df['sentiment_utterance'] = df['sentiment_utterance'].apply(remove_compound)
df['sentiment_context_all'] = df['sentiment_context_all'].apply(remove_compound)

# Get the keys from the dictionaries and add a prefix
utterance_keys = ['utterance_' + key for key in df['sentiment_utterance'].apply(lambda x: list(x.keys())).iloc[0]]
context_keys = ['context_' + key for key in df['sentiment_context_all'].apply(lambda x: list(x.keys())).iloc[0]]
keys_list = utterance_keys + context_keys

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: list(x.values())) + df['sentiment_context_all'].apply(lambda x: list(x.values()))

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_vader2_UoC.json')

df.iloc[-1] , keys_list

(0                     [0.0, 0.783, 0.217, 0.0, 1.0, 0.0]
 1      [0.18, 0.8200000000000001, 0.0, 0.0, 0.871, 0....
 2       [0.0, 1.0, 0.0, 0.14300000000000002, 0.857, 0.0]
 3               [0.058, 0.851, 0.091, 0.0, 0.906, 0.094]
 4      [0.0, 1.0, 0.0, 0.097, 0.8150000000000001, 0.088]
                              ...                        
 685     [0.10200000000000001, 0.898, 0.0, 0.0, 1.0, 0.0]
 686    [0.0, 0.858, 0.14200000000000002, 0.062, 0.751...
 687    [0.0, 0.763, 0.23700000000000002, 0.506, 0.494...
 688                   [0.0, 0.781, 0.219, 0.0, 1.0, 0.0]
 689    [0.0, 0.527, 0.47300000000000003, 0.159, 0.736...
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance_neg',
  'utterance_neu',
  'utterance_pos',
  'context_neg',
  'context_neu',
  'context_pos'])

### Valder sentiment utterance + contex_per_sentence with pos, neu, neg and compound

In [26]:
# Define a function to extract the 'neg', 'neu', 'pos', and 'compound' values from the first three dictionaries in a list
def extract_values(sentences):
    # Ensure the list has at least 3 dictionaries, padding if necessary
    sentences.extend([{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}] * (3 - len(sentences)))
    # Extract the values from the first 3 dictionaries
    return [value for sentence in sentences[:3] for key, value in sentence.items()]

df = get_data_Valder()

# Get the keys from the dictionaries and add a prefix
utterance_keys = ['utterance_' + key for key in df['sentiment_utterance'].apply(lambda x: list(x.keys())).iloc[0]]
context_per_sentence_keys = [f"context_{i}_{key}" for i in range(1, 4) for key in ['neg', 'neu', 'pos', 'compound']]
keys_list = utterance_keys + context_per_sentence_keys

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df.apply(lambda row: list(row['sentiment_utterance'].values()) + extract_values(row['sentiment_context_per_sentence']), axis=1)

# Transpose the dataframe to get the original json format
df = df.transpose()

# Save the DataFrame to a new JSON file
df.to_json('sarcasm_data_vader1_UpC.json')

df.iloc[-1], keys_list

(0      [0.0, 0.783, 0.217, 0.3612, 0.0, 1.0, 0.0, 0.0...
 1      [0.18, 0.8200000000000001, 0.0, -0.296, 0.0, 0...
 2      [0.0, 1.0, 0.0, 0.0, 0.268, 0.732, 0.0, -0.296...
 3      [0.058, 0.851, 0.091, 0.42150000000000004, 0.0...
 4      [0.0, 1.0, 0.0, 0.0, 0.202, 0.439, 0.36, 0.421...
                              ...                        
 685    [0.10200000000000001, 0.898, 0.0, -0.5106, 0.0...
 686    [0.0, 0.858, 0.14200000000000002, 0.3595000000...
 687    [0.0, 0.763, 0.23700000000000002, 0.4215000000...
 688    [0.0, 0.781, 0.219, 0.42150000000000004, 0.0, ...
 689    [0.0, 0.527, 0.47300000000000003, 0.659, 0.159...
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance_neg',
  'utterance_neu',
  'utterance_pos',
  'utterance_compound',
  'context_1_neg',
  'context_1_neu',
  'context_1_pos',
  'context_1_compound',
  'context_2_neg',
  'context_2_neu',
  'context_2_pos',
  'context_2_compound',
  'context_3_neg',
  'context_3_neu',
  'context_3_pos',
  'cont

### Valder sentiment utterance + contex_per_sentence with pos, neu, neg

In [27]:
# Define a function to extract the 'neg', 'neu', 'pos', and 'compound' values from the first three dictionaries in a list
def extract_values(sentences):
    # Ensure the list has at least 3 dictionaries, padding if necessary
    sentences.extend([{'neg': 0.0, 'neu': 0.0, 'pos': 0.0}] * (3 - len(sentences)))
    # Extract the values from the first 3 dictionaries
    return [value for sentence in sentences[:3] for key, value in sentence.items()]

def remove_compound_1(dict):
    return {key: value for key, value in dict.items() if key != 'compound'}
def remove_compound_2(sentences):
    return [{key: value for key, value in sentence.items() if key != 'compound'} for sentence in sentences]

df = get_data_Valder()

df['sentiment_utterance'] = df['sentiment_utterance'].apply(remove_compound_1)
df['sentiment_context_per_sentence'] = df['sentiment_context_per_sentence'].apply(remove_compound_2)

# Get the keys from the dictionaries and add a prefix
utterance_keys = ['utterance_' + key for key in df['sentiment_utterance'].apply(lambda x: list(x.keys())).iloc[0]]
context_per_sentence_keys = [f"context_{i}_{key}" for i in range(1, 4) for key in ['neg', 'neu', 'pos']]
keys_list = utterance_keys + context_per_sentence_keys

# Transform the dictionaries into arrays of scores
df['sentiment_features'] = df.apply(lambda row: list(row['sentiment_utterance'].values()) + extract_values(row['sentiment_context_per_sentence']), axis=1)

# Transpose the dataframe to get the original json format
df = df.transpose()

# Save the DataFrame to a new JSON file
df.to_json('sarcasm_data_vader2_UpC.json')

df.iloc[-1], keys_list

(0      [0.0, 0.783, 0.217, 0.0, 1.0, 0.0, 0.0, 1.0, 0...
 1      [0.18, 0.8200000000000001, 0.0, 0.0, 0.705, 0....
 2      [0.0, 1.0, 0.0, 0.268, 0.732, 0.0, 0.0, 1.0, 0...
 3      [0.058, 0.851, 0.091, 0.0, 1.0, 0.0, 0.0, 0.58...
 4      [0.0, 1.0, 0.0, 0.202, 0.439, 0.36, 0.356, 0.6...
                              ...                        
 685    [0.10200000000000001, 0.898, 0.0, 0.0, 1.0, 0....
 686    [0.0, 0.858, 0.14200000000000002, 0.0, 1.0, 0....
 687    [0.0, 0.763, 0.23700000000000002, 0.0, 1.0, 0....
 688    [0.0, 0.781, 0.219, 0.0, 1.0, 0.0, 0.0, 1.0, 0...
 689    [0.0, 0.527, 0.47300000000000003, 0.159, 0.736...
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance_neg',
  'utterance_neu',
  'utterance_pos',
  'context_1_neg',
  'context_1_neu',
  'context_1_pos',
  'context_2_neg',
  'context_2_neu',
  'context_2_pos',
  'context_3_neg',
  'context_3_neu',
  'context_3_pos'])

## Hartmann

In [11]:
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
def hartmann_sentiment(text):
    return classifier(text)[0]

# Load the JSON file into a DataFrame
file_path = 'data/sarcasm_data.json'
df = pd.read_json(file_path).transpose()

# Reset the index to turn the first element into a new column
df = df.reset_index()

# Apply sentiment analysis to the 'utterance' column
df['sentiment_utterance'] = df['utterance'].apply(hartmann_sentiment)

# Apply sentiment analysis to the 'context' column
df['sentiment_context_all'] = df['context'].apply(hartmann_sentiment)

# Apply sentiment analysis to each sentence in the 'context' column
df["sentiment_context_per_sentence"] = df['context'].apply(lambda context: [hartmann_sentiment(sentence) for sentence in context])

df = df.transpose()
df.to_json('sarcasm_data_hartmann.json')

# anger = 0, disgust = 1, fear = 2,  joy = 3, neutral = 4, sadness =  5, surprise = 6



In [8]:
def get_data_hartmann():
    df = pd.read_json('..//preprocessed_data/sarcasm_data_hartmann.json').transpose()
    return df

In [7]:
# U : Uterance sentiment
# oC : Overall context sentiment
# pC : Per sentence context sentiment

# anger = 0, disgust = 1, fear = 2,  joy = 3, neutral = 4, sadness =  5, surprise = 6 , missing_context = -1

### Uterance sentiments (MAX)

In [9]:
df = get_data_hartmann()

# Create a mapping from index number to sentiment label
index_to_label = {i: d['label'] for i, d in enumerate(df['sentiment_utterance'].iloc[0])}

# Get the sentiment features
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: np.argmax([d['score'] for d in x]))

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_hartmann_max_U.json')

df.iloc[-1], ["utterance"] , index_to_label

(0      4
 1      6
 2      4
 3      6
 4      4
       ..
 685    0
 686    1
 687    4
 688    1
 689    3
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance'],
 {0: 'anger',
  1: 'disgust',
  2: 'fear',
  3: 'joy',
  4: 'neutral',
  5: 'sadness',
  6: 'surprise'})

### Uterance sentiments + context overall (MAX)

In [14]:
df = get_data_hartmann()

# Create a mapping from index number to sentiment label
index_to_label = {i: d['label'] for i, d in enumerate(df['sentiment_utterance'].iloc[0])}

# Get the argmax sentiment score
df['sentiment_utterance'] = df['sentiment_utterance'].apply(lambda x: np.argmax([d['score'] for d in x]))
df["sentiment_context_all"] = df["sentiment_context_all"].apply(lambda x: np.argmax([d['score'] for d in x]))

# Get the sentiment features
df['sentiment_features'] = [list(item) for item in zip(df["sentiment_utterance"], df["sentiment_context_all"])]

# Get names of the columns
keys_list = ['utterance'] +  ['context']

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_hartmann_max_UoC.json')

df.iloc[-1], keys_list , index_to_label

(0      [4, 4]
 1      [6, 4]
 2      [4, 4]
 3      [6, 6]
 4      [4, 2]
         ...  
 685    [0, 0]
 686    [1, 6]
 687    [4, 4]
 688    [1, 6]
 689    [3, 5]
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance', 'context'],
 {0: 'anger',
  1: 'disgust',
  2: 'fear',
  3: 'joy',
  4: 'neutral',
  5: 'sadness',
  6: 'surprise'})

### Uterance sentiments + context per sentence (MAX)

In [15]:
def take_first_three(contexts):
    # If there are less than three contexts, pad the list with 'new_value'
    while len(contexts) < 3:
        contexts.append('-1')
    # If there are more than three contexts, take the first three
    return contexts[:3]
def flatten_list_of_dicts(list_of_lists):
    return [item for sublist in list_of_lists for item in sublist]

df = get_data_hartmann()

# Create a mapping from index number to sentiment label
index_to_label = {i: d['label'] for i, d in enumerate(df['sentiment_utterance'].iloc[0])}
index_to_label = {**index_to_label, -1: 'missing_context'}

# Get the argmax sentiment score
df['sentiment_utterance'] = df['sentiment_utterance'].apply(lambda x: np.argmax([d['score'] for d in x]))
df["sentiment_context_per_sentence"] = df["sentiment_context_per_sentence"].apply(lambda x: [np.argmax([d['score'] for d in sublist]) for sublist in x])

df['sentiment_context_per_sentence'] = df['sentiment_context_per_sentence'].apply(take_first_three)

print(df['sentiment_utterance'])
# Get the sentiment features
df['sentiment_features'] = df.apply(lambda row: [row["sentiment_utterance"]] + row["sentiment_context_per_sentence"], axis=1)

# Get names of the columns
keys_list = ['utterance'] +  ['context_' + str(i) for i in range(1, 4)]

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_hartmann_max_UpC.json')

df.iloc[-1], keys_list , index_to_label

0      4
1      6
2      4
3      6
4      4
      ..
685    0
686    1
687    4
688    1
689    3
Name: sentiment_utterance, Length: 690, dtype: int64


(0       [4, 4, 5, -1]
 1        [6, 4, 4, 4]
 2        [4, 4, 4, 4]
 3        [6, 6, 0, 4]
 4        [4, 2, 1, 1]
             ...      
 685     [0, 0, 4, -1]
 686      [1, 6, 6, 1]
 687     [4, 4, 0, -1]
 688      [1, 6, 4, 3]
 689    [3, 5, -1, -1]
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance', 'context_1', 'context_2', 'context_3'],
 {0: 'anger',
  1: 'disgust',
  2: 'fear',
  3: 'joy',
  4: 'neutral',
  5: 'sadness',
  6: 'surprise',
  -1: 'missing_context'})

### Uterance sentiments 

In [16]:
df = get_data_hartmann()

# Get the sentiment features
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: [d['score'] for d in x])

# Get the keys from the dictionaries and add a prefix
labels = ['utterance_' + d['label'] for d in df['sentiment_utterance'].iloc[0]]

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_hartmann_U.json')

df.iloc[-1], labels


(0      [0.010123579800000001, 0.0143101504, 0.0011973...
 1      [0.0273885094, 0.045210629700000005, 0.0265790...
 2      [0.0048839869, 0.0049315374, 0.0015044714, 0.0...
 3      [0.0979550406, 0.0185232982, 0.1187259331, 0.0...
 4      [0.0098890308, 0.0246547051, 0.0029980363, 0.0...
                              ...                        
 685    [0.759996593, 0.1669357866, 0.0182649642, 0.00...
 686    [0.340244025, 0.4619074166, 0.0146603724, 0.00...
 687    [0.0086229192, 0.0097058332, 0.0121506732, 0.0...
 688    [0.0083397916, 0.7863316536, 0.004822876800000...
 689    [0.005968420800000001, 0.0016014389, 0.0021068...
 Name: sentiment_features, Length: 690, dtype: object,
 ['utterance_anger',
  'utterance_disgust',
  'utterance_fear',
  'utterance_joy',
  'utterance_neutral',
  'utterance_sadness',
  'utterance_surprise'])

### Uterance sentiments + context overall 

In [17]:
df = get_data_hartmann()

# Get the sentiment features
df['sentiment_features'] = df['sentiment_utterance'].apply(lambda x: [d['score'] for d in x]) + df['sentiment_context_all'].apply(lambda x: [d['score'] for d in x])

# Get the keys from the dictionaries and add a prefix
labels_utter = ['utterance_' + d['label'] for d in df['sentiment_utterance'].iloc[0]]
labels_context = ['context_' + d['label'] for d in df['sentiment_context_all'].iloc[0]]
labels = labels_utter + labels_context

# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_hartmann_UoC.json')

df.iloc[-1][0], labels


([0.010123579800000001,
  0.0143101504,
  0.0011973934,
  0.057147428400000005,
  0.9046183228,
  0.0029129912000000003,
  0.0096901413,
  0.0301044136,
  0.0456504636,
  0.061435729300000005,
  0.0034190910000000004,
  0.5549920797,
  0.0093617234,
  0.2950364351],
 ['utterance_anger',
  'utterance_disgust',
  'utterance_fear',
  'utterance_joy',
  'utterance_neutral',
  'utterance_sadness',
  'utterance_surprise',
  'context_anger',
  'context_disgust',
  'context_fear',
  'context_joy',
  'context_neutral',
  'context_sadness',
  'context_surprise'])

### Uterance sentiments + context per sentence 

In [18]:
# Padding function to ensure at least 21 (7*3) scores (context per sentence) per row
def pad_scores(scores, target_length=7*3, padding_value=0):
    return scores + [padding_value] * (target_length - len(scores))

df = get_data_hartmann()
labels_utter = ['utterance_' + d['label'] for d in df['sentiment_utterance'].iloc[0]]
labels_context = [f"context_{i}_{key}" for i in range(1, 4) for key in labels_utter]
labels = labels_utter + labels_context



# Utterance sentiment
df['sentiment_utterance'] = df['sentiment_utterance'].apply(lambda x: [d['score']for d in x]) 
# Get only the first three contexts per row
df["sentiment_context_per_sentence"] = df["sentiment_context_per_sentence"].apply(lambda x: [d['score'] for sublist in x[:3] for d in sublist])
# Apply padding if there are fewer than 21 values
df['sentiment_context_per_sentence'] = df['sentiment_context_per_sentence'].apply(lambda x: pad_scores(x))


# Get the sentiment features
df['sentiment_features'] = df.apply(lambda row: row["sentiment_utterance"] + row["sentiment_context_per_sentence"], axis=1)


# Transpose the dataframe to get the original json format
df = df.transpose()

df.to_json('sarcasm_data_hartmann_UpC.json')

print(df.iloc[-1][0],"\n",labels)
print("Length of a row: ",len(df.iloc[-1][0]),"Length of labels",len(labels))

[0.010123579800000001, 0.0143101504, 0.0011973934, 0.057147428400000005, 0.9046183228, 0.0029129912000000003, 0.0096901413, 0.0301044136, 0.0456504636, 0.061435729300000005, 0.0034190910000000004, 0.5549920797, 0.0093617234, 0.2950364351, 0.0439296663, 0.0407607555, 0.032509129500000004, 0.0059855198, 0.2351579964, 0.5660635829, 0.07559338210000001, 0, 0, 0, 0, 0, 0, 0] 
 ['utterance_anger', 'utterance_disgust', 'utterance_fear', 'utterance_joy', 'utterance_neutral', 'utterance_sadness', 'utterance_surprise', 'context_1_utterance_anger', 'context_1_utterance_disgust', 'context_1_utterance_fear', 'context_1_utterance_joy', 'context_1_utterance_neutral', 'context_1_utterance_sadness', 'context_1_utterance_surprise', 'context_2_utterance_anger', 'context_2_utterance_disgust', 'context_2_utterance_fear', 'context_2_utterance_joy', 'context_2_utterance_neutral', 'context_2_utterance_sadness', 'context_2_utterance_surprise', 'context_3_utterance_anger', 'context_3_utterance_disgust', 'contex