# DEMO for empathy classification

In this demo, we classify the empathy in text exchanges. 

We provide the most supported pattern for the classification of the exchange. 


# Set up

In [1]:
import pickle
import pandas as pd
import numpy as np
from PBC4cip import PBC4cip
import os
import sys
import random 
import re
import CEM as cem

import seaborn as sns

from scipy.stats import ttest_ind

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score




import test_BERT as bert_tester
import database_processing_package as data_processer
import exchange_processing as exchange_processer
from classifiers.course_grained_emotion import emotion_reductor as em_red


### Load main classification model

In [2]:
#Relevant directories
current_dir = os.getcwd() #get directory of the repository
groups_dir = current_dir+'/../' #Groups folder

print(current_dir)

#EmpatheticExchanges Database
database_dir = '/processed_databases/EmpatheticExchanges/EmpatheticExchanges.csv'
database = pd.read_csv(current_dir + database_dir)

print(database['empathy'].unique())

#select PBC4emp model
model_directory = current_dir + '/trained_pbc4cip.sav'

#load model
pbc = pickle.load(open(model_directory, 'rb'))

#print features relevant for the model
att_lst = [attribute[0] for attribute in pbc.dataset.Attributes]
print('Features for the model: ')
for attribute in att_lst:
    print(attribute, end = ' ')


/home/edwin/Desktop/projects/EERobot/classifiers/PBC4emp
[4. 5. 2. 3. 1.]
Features for the model: 
s_negative s_neutral s_positive l_negative l_neutral l_positive predictions_ER valence_speaker arousal_speaker dominance_speaker valence_listener arousal_listener dominance_listener s_word_len l_word_len agreeing acknowledging encouraging consoling sympathizing suggesting questioning wishing neutral mimicry 

### Load supplementary classification models

Please run this cell only once per kernel reset.

We load contextual cues classifiers. These are:

* Empathetic Intent
* EPITOME mechanisms: Only Emotional Reaction is used.
* NRC_VAD Lexicon
* Sentiment

In [3]:
flag_array, model_components = exchange_processer.load_supplementary_classifiers(att_lst)

### PBC4emp inference

In [4]:
#get Dataset

video_exchanges_trajectory = current_dir + '/../../TSC_exchanges_final.csv'
video_exchanges_df = pd.read_csv(video_exchanges_trajectory)
video_exchanges_df.head()

print(len(video_exchanges_df))

video_exchanges_df = video_exchanges_df[video_exchanges_df['usable'] == 1]



video_exchanges_df = video_exchanges_df.reset_index()

print(len(video_exchanges_df))

83
83


### Get master dataframe using PBC4cip

In [5]:

processed_exchange_list = []
predictions = []
for i in range(len(video_exchanges_df['final_label'])):
    #print(video_exchanges_df.loc[i,'id'])
    processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,video_exchanges_df.loc[i,'speaker_utterance'], video_exchanges_df.loc[i,'listener_utterance'],model_components)
    processed_exchange_list.append(processed_exchange)
    predictions.append(y_pred)

concatenated_df   = pd.concat(processed_exchange_list, ignore_index=True)
concatenated_df['label'] = video_exchanges_df['final_label']

concatenated_df['prediction'] = pd.Series(predictions)

concatenated_df


                                                                          

Unnamed: 0,s_negative,s_neutral,s_positive,l_negative,l_neutral,l_positive,predictions_ER,valence_speaker,arousal_speaker,dominance_speaker,...,encouraging,consoling,sympathizing,suggesting,questioning,wishing,neutral,mimicry,label,prediction
0,0.027176,0.796710,0.176114,0.067284,0.666969,0.265747,0,0.487000,-0.066000,0.249000,...,0.000384,0.001325,0.000619,0.000071,0.004756,0.002980,0.033929,1,1,1
1,0.092510,0.861493,0.045996,0.106203,0.808225,0.085572,0,0.122000,-0.548000,-0.004000,...,0.000067,0.000549,0.000433,0.000100,0.002623,0.006147,0.023130,1,1,1
2,0.149889,0.767086,0.083025,0.295691,0.652712,0.051597,0,0.396000,-0.280000,0.200000,...,0.000019,0.000024,0.000060,0.000236,0.000146,0.000027,0.999377,1,1,1
3,0.257841,0.689445,0.052714,0.109324,0.819122,0.071554,0,0.368000,-0.140000,0.178000,...,0.000023,0.000030,0.000104,0.000165,0.999352,0.000128,0.000041,1,3,1
4,0.003301,0.023628,0.973071,0.970134,0.024670,0.005196,0,0.942000,0.552000,0.660000,...,0.000200,0.000348,0.000210,0.000237,0.000321,0.000309,0.140843,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,0.076886,0.862537,0.060577,0.007381,0.639547,0.353072,0,0.441000,-0.092000,-0.019000,...,0.000071,0.000025,0.000143,0.000069,0.998873,0.000088,0.000198,1,3,2
79,0.276325,0.599351,0.124324,0.019061,0.600548,0.380391,0,0.832000,0.550000,0.764000,...,0.000128,0.007984,0.000146,0.000192,0.000413,0.000106,0.976220,1,2,1
80,0.001255,0.011106,0.987639,0.005270,0.254785,0.739945,0,0.500154,-0.051231,0.180462,...,0.000053,0.000174,0.000175,0.000181,0.000058,0.000276,0.004559,1,2,1
81,0.001538,0.037596,0.960865,0.002229,0.049708,0.948064,0,0.545600,0.111200,-0.028800,...,0.000149,0.000055,0.000069,0.000091,0.000055,0.000401,0.002111,0,2,1


In [6]:
concatenated_df['label'] = concatenated_df['label'].map({1: 1, 2: 1,3 : 2, 4:3, 5:3})
concatenated_df['label'] 


0     1
1     1
2     1
3     2
4     1
     ..
78    2
79    1
80    1
81    1
82    1
Name: label, Length: 83, dtype: int64

In [7]:
master_df = concatenated_df.copy()
master_df = master_df.rename(columns={"label": "empathy"})
list_o_cols = master_df.columns.to_list()

new_order_list = ['id','speaker_utterance','listener_utterance']
new_order_list.extend(list_o_cols)
master_df['id'] = video_exchanges_df['id']
master_df['speaker_utterance'] = video_exchanges_df['speaker_utterance']
master_df['listener_utterance'] = video_exchanges_df['listener_utterance']
master_df = master_df[new_order_list]

print(master_df.columns)

master_df.to_csv(current_dir+'/results/PBC4emp_TSC_master_file.csv', index=False)


#Get main metrics 
acc = accuracy_score(master_df['empathy'],master_df['prediction'])
cem_score = cem.get_cem(master_df['prediction']-1,master_df[['empathy']])
pre = precision_score(master_df["empathy"],master_df['prediction'], average='weighted')
f1 = f1_score(master_df["empathy"],master_df['prediction'], average='weighted')
rec = recall_score(master_df["empathy"],master_df['prediction'], average='weighted')


#vex_bert = video_exchanges_df.rename(columns={"final_label": "empathy"})

print(f'acc: {acc}, cem: {cem_score}, f1: {f1}, precision: {pre}, rec: {rec}')

list_o_cols = concatenated_df.columns.to_list()

['id'].extend(list_o_cols)




Index(['id', 'speaker_utterance', 'listener_utterance', 's_negative',
       's_neutral', 's_positive', 'l_negative', 'l_neutral', 'l_positive',
       'predictions_ER', 'valence_speaker', 'arousal_speaker',
       'dominance_speaker', 'valence_listener', 'arousal_listener',
       'dominance_listener', 's_word_len', 'l_word_len', 'agreeing',
       'acknowledging', 'encouraging', 'consoling', 'sympathizing',
       'suggesting', 'questioning', 'wishing', 'neutral', 'mimicry', 'empathy',
       'prediction'],
      dtype='object')
acc: 0.6746987951807228, cem: 0.7431982278683815, f1: 0.6831442729984265, precision: 0.7548192771084338, rec: 0.6746987951807228


### BERT Inference

In [35]:
df_bert = video_exchanges_df.rename(columns={"final_label": "empathy"})

bert_tester.test_bert('bert_classifier_3_extendeddatabase.pth', 3, df_bert)

bert_predictions_3 = pd.read_csv('BERT_predictions_'+str('bert_classifier_3_extendeddatabase.pth')[:-4]+'.txt', header=None)

bert_key_3 = [2,1,3]

bert_predictions_3_fixed = []
for i in range(len(bert_predictions_3[0])):
   fixed_bert = bert_key_3[bert_predictions_3.loc[i,0]]
   bert_predictions_3_fixed.append(fixed_bert)
master_df['pred_bert_3'] = bert_predictions_3_fixed

acc = accuracy_score(master_df['empathy'],master_df['pred_bert_3'])
cem_score = cem.get_cem(master_df['pred_bert_3']-1,master_df[['empathy']])
pre = precision_score(master_df["empathy"],master_df['pred_bert_3'], average='weighted')
f1 = f1_score(master_df["empathy"],master_df['pred_bert_3'], average='weighted')
rec = recall_score(master_df["empathy"],master_df['pred_bert_3'], average='weighted')


print(f'acc: {acc}, cem: {cem_score}, f1: {f1}, precision: {pre}, rec: {rec}')



Testing BERT model
bert_classifier_3_extendeddatabase.pth
Num of classes: 3
cpu
Test Accuracy: 0.2530
              precision    recall  f1-score   support

           0       0.41      0.36      0.38        45
           1       0.20      0.15      0.17        26
           2       0.04      0.12      0.06         8
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         1

    accuracy                           0.25        83
   macro avg       0.13      0.13      0.12        83
weighted avg       0.29      0.25      0.27        83

Closeness Evaluation Measure: 0.4107
acc: 0.37349397590361444, cem: 0.5552562155872033, f1: 0.41829627708485606, precision: 0.6253012048192772, rec: 0.37349397590361444


In [None]:
master_df.to_csv(current_dir+'/results/PBC4emp_TSC_master_file.csv', index=False)



## GPT-4 metrics

In [39]:

tsc_gtp_ratings_trajectory = current_dir + '/../../gpt_results/TSC_exchanges_final_gpt-4o_rated.csv'
gpt_results = pd.read_csv(tsc_gtp_ratings_trajectory)

gpt_results['final_label'] = gpt_results['final_label'].map({1: 1, 2: 1,3 : 2, 4:3, 5:3})


gpt_results = gpt_results.rename(columns={"final_label": "empathy"})


acc = accuracy_score(gpt_results['empathy'],gpt_results['rate'])
cem_score = cem.get_cem(gpt_results['rate']-1,gpt_results[['empathy']])
pre = precision_score(gpt_results["empathy"],gpt_results['rate'], average='weighted')
f1 = f1_score(gpt_results["empathy"],gpt_results['rate'], average='weighted')
rec = recall_score(gpt_results["empathy"],gpt_results['rate'], average='weighted')

print(f'acc: {acc}, cem: {cem_score}, f1: {f1}, precision: {pre}, rec: {rec}')

master_df['gpt4_predictions'] = gpt_results['rate']

acc: 0.6626506024096386, cem: 0.6305908576680954, f1: 0.5521302216904954, precision: 0.6318775100401606, rec: 0.6626506024096386


In [None]:
master_df.to_csv(current_dir+'/results/PBC4emp_TSC_master_file.csv', index=False)


Unnamed: 0,id,speaker_utterance,listener_utterance,s_negative,s_neutral,s_positive,l_negative,l_neutral,l_positive,predictions_ER,...,sympathizing,suggesting,questioning,wishing,neutral,mimicry,empathy,prediction,pred_bert_3,gpt4_predictions
0,ND_1,What is your favorite subject?\n,Nature and technology.,0.027176,0.796710,0.176114,0.067284,0.666969,0.265747,0,...,0.000619,0.000071,0.004756,0.002980,0.033929,1,1,1,2,1
1,ND_1,What are your ages?,My age is 12.,0.092510,0.861493,0.045996,0.106203,0.808225,0.085572,0,...,0.000433,0.000100,0.002623,0.006147,0.023130,1,1,1,2,1
2,ND_1,What grade are you?,6th grade. But we have a different grading sch...,0.149889,0.767086,0.083025,0.295691,0.652712,0.051597,0,...,0.000060,0.000236,0.000146,0.000027,0.999377,1,1,1,2,1
3,ND_1,We also start with zero,How is your school?,0.257841,0.689445,0.052714,0.109324,0.819122,0.071554,0,...,0.000104,0.000165,0.999352,0.000128,0.000041,1,2,1,1,1
4,ND_1,It is wonderful.,"To be honest, I hate my school. I don’t like g...",0.003301,0.023628,0.973071,0.970134,0.024670,0.005196,0,...,0.000210,0.000237,0.000321,0.000309,0.140843,0,1,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,AJ_3,What do you listen to?,I listen to a famous Japanese artist called Yo...,0.076886,0.862537,0.060577,0.007381,0.639547,0.353072,0,...,0.000143,0.000069,0.998873,0.000088,0.000198,1,2,2,2,1
79,AJ_3,"No, is it very famous?","Yeah, at least in Japan. He makes songs for an...",0.276325,0.599351,0.124324,0.019061,0.600548,0.380391,0,...,0.000146,0.000192,0.000413,0.000106,0.976220,1,1,1,2,1
80,AJ_4,My favorite subject at school is English. I li...,I cook and I like playing the piano,0.001255,0.011106,0.987639,0.005270,0.254785,0.739945,0,...,0.000175,0.000181,0.000058,0.000276,0.004559,1,1,1,2,1
81,AJ_4,It sounds fun. What is your favorite thing to ...,I like cupcakes.,0.001538,0.037596,0.960865,0.002229,0.049708,0.948064,0,...,0.000069,0.000091,0.000055,0.000401,0.002111,0,1,1,2,1
