In [1]:
import torch
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy.stats import pearsonr
import random

In [2]:
val_data = pd.read_csv('data/messages_dev_features_ready_for_WS_2022.tsv', sep='\t')

In [3]:
model_path = 'emp_model_multi.pt'
saved_model = torch.load(model_path)

model_path2 = 'dis_model_multi.pt'
saved_model2 = torch.load(model_path2)

In [4]:
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=1)
model2 = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=1)

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

In [5]:
model.load_state_dict(saved_model)
model2.load_state_dict(saved_model2)

<All keys matched successfully>

In [6]:
model.eval()
model2.eval()
print("")




In [7]:
def predict_score(sentences):
  inputs = tokenizer(sentences, padding=True, truncation=True, max_length=512, return_tensors='pt')
  input_ids = inputs['input_ids']
  attention_mask = inputs['attention_mask']
  with torch.no_grad():
    outputs = model(input_ids, attention_mask=attention_mask)
    outputs2 = model2(input_ids, attention_mask=attention_mask)
  predictions_emp = outputs.logits.squeeze(1).detach().cpu().numpy()
  predictions_dis = outputs2.logits.squeeze(1).detach().cpu().numpy()
  return predictions_emp,predictions_dis

In [8]:
acu_emp = val_data['empathy'].tolist()
acu_dis = val_data['distress'].tolist()

In [9]:
sen_org = val_data['essay'].tolist()

In [10]:
type(sen_org)

list

In [11]:
pred_emp_org, pred_dis_org = predict_score(sen_org)

In [12]:
pred_emp_org.tolist()
print(pred_emp_org[0])

3.6287787


In [13]:
pred_dis_org.tolist()
print(pred_dis_org[0])

3.7899573


In [14]:
corr_coef, p_value = pearsonr(pred_emp_org, acu_emp)

print("Pearson correlation coefficient emp stack:", corr_coef)

Pearson correlation coefficient emp stack: 0.009979168937770495


In [15]:
corr_coef, p_value = pearsonr(pred_dis_org, acu_dis)

print("Pearson correlation coefficient dis stack:", corr_coef)

Pearson correlation coefficient dis stack: 0.0713244105512847


In [16]:
def get_positive_score(l_aug,org,aug):
  wrong_list = []
  for i in range(len(org)):
    if (abs(org[i]-aug[i])>0.06):
      wrong_list.append(i)
  print("Error score:",len(wrong_list)/len(org))
  if(len(wrong_list)<=5):
        random_five = wrong_list
  else:
    random_five = random.sample(wrong_list, 5)
  for j in random_five:
    print(val_data['essay'].values[j])
    print("score:",org[j])
    print(l_aug['essay'].values[j])
    print("score:",aug[j])

In [17]:
def get_scores(l_aug,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_aug, pred_dis_aug):
  
  corr_coef_f_aug, p_value = pearsonr(pred_emp_aug, acu_emp)
  print("Pearson Fution empathy:", corr_coef_f_aug)

  corr_coef_s_a, p_value = pearsonr(pred_dis_aug, acu_dis)
  print("Pearson Stack distress:", corr_coef_s_a)

  print("Empathy:")
  get_positive_score(l_aug,pred_emp_org,pred_emp_aug)

  print("Distress:")
  get_positive_score(l_aug,pred_dis_org,pred_dis_aug)

In [18]:
data_bn = pd.read_csv('t_data/test_data_bn.tsv', sep='\t')
sen_bn = data_bn['essay'].tolist()
pred_emp_bn, pred_dis_bn = predict_score(sen_bn)
get_scores(data_bn,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_bn, pred_dis_bn)

Pearson Fution empathy: -0.10676489678414297
Pearson Stack distress: 0.03663533180944396
Empathy:
Error score: 0.0
Distress:
Error score: 0.12962962962962962
I can't believe Russel Crowe would do something like this. Why was Russel Crowe even involved in that event if he is so racist and can't stand Azealia Banks? Anyway, since no one is able to attest to her story being true, I have to assume she is mentally ill. Maybe she was drunk or high and hallucinating. Maybe she has psychosis. I really think she needs help! There is zero proof that Russel Crowe did any of the things she is claiming.
score: 3.874701
আমি বিশ্বাস করতে পারছি না রাসেল ক্রো এরকম কিছু করবে। কেন রাসেল ক্রো এমনকি সেই ইভেন্টে জড়িত ছিলেন যদি তিনি এত বর্ণবাদী হন এবং আজেলিয়া ব্যাঙ্কসকে দাঁড়াতে না পারেন? যাইহোক, যেহেতু কেউ তার গল্পটি সত্য বলে প্রমাণ করতে সক্ষম নয়, আমাকে ধরে নিতে হবে সে মানসিকভাবে অসুস্থ। হয়তো সে মাতাল বা উচ্চ এবং হ্যালুসিনেটিং ছিল. হয়তো তার সাইকোসিস আছে। আমি সত্যিই তার সাহায্য প্রয়োজন মনে হয়! শূন্য প

In [19]:
data_de = pd.read_csv('t_data/test_data_de.tsv', sep='\t')
sen_de = data_de['essay'].tolist()
pred_emp_de, pred_dis_de = predict_score(sen_de)
get_scores(data_de,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_de, pred_dis_de)

Pearson Fution empathy: -0.06622404245792637
Pearson Stack distress: 0.044064594110332034
Empathy:
Error score: 0.0
Distress:
Error score: 0.13703703703703704
I find it crazy that there are still situations these days in which people cannot receive donations and help during and emergency. We are supposed to be a civilized race and yet even during tragedies such as this hurricane the greed of man shines through. I hesitate to donate to a lot of organizations because I feel that a lot of it may be siphoned off or stolen and only a portion of my help reaches the destination. I wish there was a direct way to help.
score: 3.8011258
Ich finde es verrückt, dass es heutzutage immer noch Situationen gibt, in denen Menschen keine Spenden und Hilfe in Notfällen erhalten können. Wir sollen eine zivilisierte Rasse sein, und doch scheint selbst bei Tragödien wie diesem Hurrikan die Gier der Menschen durch. Ich zögere, an viele Organisationen zu spenden, weil ich das Gefühl habe, dass viel davon abge

In [20]:
data_el = pd.read_csv('t_data/test_data_el.tsv', sep='\t')
sen_el = data_el['essay'].tolist()
pred_emp_el, pred_dis_el = predict_score(sen_el)
get_scores(data_el,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_el, pred_dis_el)

Pearson Fution empathy: 0.04165922811806646
Pearson Stack distress: 0.10158036677647157
Empathy:
Error score: 0.0
Distress:
Error score: 0.17777777777777778
I'm torn on the whole ivory trade situation. I get that farmers and people in their territory are often bothered by elephants (i.e. through crop destruction, etc.), but it doesn't seem fair to cause so much suffering to these animals. I feel bad that the locals are likely poor and feel that they have little choice, but I hate to think of elephants suffering.
score: 3.786398
Είμαι διχασμένη με την όλη κατάσταση του εμπορίου ελεφαντόδοντου. Καταλαβαίνω ότι οι αγρότες και οι άνθρωποι στην επικράτειά τους ενοχλούνται συχνά από ελέφαντες (δηλαδή μέσω της καταστροφής των καλλιεργειών, κ.λπ.), αλλά δεν φαίνεται δίκαιο να προκαλέσουμε τόσο μεγάλο πόνο σε αυτά τα ζώα. Αισθάνομαι άσχημα που οι ντόπιοι είναι πιθανώς φτωχοί και νιώθω ότι δεν έχουν πολλές επιλογές, αλλά μισώ να σκέφτομαι τους ελέφαντες που υποφέρουν.
score: 3.9978364
Hey! Did y

In [21]:
data_hi = pd.read_csv('t_data/test_data_hi.tsv', sep='\t')
sen_hi = data_hi['essay'].tolist()
pred_emp_hi, pred_dis_hi = predict_score(sen_hi)
get_scores(data_hi,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_hi, pred_dis_hi)

Pearson Fution empathy: 0.040446413647949396
Pearson Stack distress: 0.07584135703355131
Empathy:
Error score: 0.0
Distress:
Error score: 0.15185185185185185
I do not find is unbelievable that many veterans have sleep disorders or trouble sleeping at night. My grandfather was a Vietnam War veteran and he would wake in the middle of night screaming three or four days out of the week. PTSD and anxiety that stems from being overseas at war is something that should be taken very serious when it comes to Veterans who have issues later in life. Not getting enough sleep at night can also cause a whole new host of health issues.
score: 3.8007064
मुझे नहीं लगता कि यह अविश्वसनीय है कि कई दिग्गजों को नींद की बीमारी या रात में सोने में परेशानी होती है। मेरे दादाजी वियतनाम युद्ध के अनुभवी थे और वे सप्ताह में तीन या चार दिन चिल्लाते हुए आधी रात को जाग जाते थे। पीटीएसडी और चिंता जो युद्ध में विदेशों में होने से उत्पन्न होती है, कुछ ऐसी चीज है जिसे बहुत गंभीरता से लिया जाना चाहिए जब वेटरनर्स की बात आत

In [22]:
data_nel = pd.read_csv('t_data/test_data_nel.tsv', sep='\t')
sen_nel = data_nel['essay'].tolist()
pred_emp_nel, pred_dis_nel = predict_score(sen_nel)
get_scores(data_nel,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_nel, pred_dis_nel)

Pearson Fution empathy: -0.06304422050070489
Pearson Stack distress: 0.06418706452215772
Empathy:
Error score: 0.0
Distress:
Error score: 0.07407407407407407
I do not find is unbelievable that many veterans have sleep disorders or trouble sleeping at night. My grandfather was a Vietnam War veteran and he would wake in the middle of night screaming three or four days out of the week. PTSD and anxiety that stems from being overseas at war is something that should be taken very serious when it comes to Veterans who have issues later in life. Not getting enough sleep at night can also cause a whole new host of health issues.
score: 3.8007064
धेरै दिग्गजहरूलाई निद्रा सम्बन्धी विकारहरू वा रातमा सुत्न समस्या भएको कुरा मलाई अविश्वसनीय लाग्दैन। मेरो हजुरबुबा भियतनाम युद्धका दिग्गज हुनुहुन्थ्यो र उहाँ हप्ताको तीन वा चार दिन चिच्याउदै मध्यरातमा उठ्नुहुन्छ। PTSD र चिन्ता जुन युद्धमा विदेशमा हुनबाट उत्पन्न हुन्छ जुन धेरै गम्भीर रूपमा लिनु पर्छ जब यो जीवनमा पछि समस्याहरू भएका वेटरहरूको कुरा आउँछ। रा

In [23]:
data_sw = pd.read_csv('t_data/test_data_sw.tsv', sep='\t')
sen_sw = data_sw['essay'].tolist()
pred_emp_sw, pred_dis_sw = predict_score(sen_sw)
get_scores(data_sw,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_sw, pred_dis_sw)

Pearson Fution empathy: -0.06595314087195321
Pearson Stack distress: 0.06262843281893489
Empathy:
Error score: 0.0
Distress:
Error score: 0.05925925925925926
Why can't people just live in peace?  These things happen way too often.  What does killing innocent people do to further an offender's cause?  I can't imagine the panic and terror the attendees must have felt.  It makes me nervous every time I'm in a crowded venue.  You like to think it can't happen to you, but honestly, you just don't know.  We all have such a short time here, we should be able to enjoy it without fear.
score: 3.7949452
Kwa nini watu hawawezi tu kuishi kwa amani? Mambo haya hutokea mara nyingi sana. Je, kuua watu wasio na hatia kunafanya nini ili kuendeleza sababu ya mkosaji? Siwezi kufikiria hofu na woga ambao waliohudhuria lazima walihisi. Hunifanya niwe na wasiwasi kila ninapokuwa kwenye ukumbi wenye watu wengi. Unapenda kufikiria kuwa haiwezi kutokea kwako, lakini kwa uaminifu, hujui tu. Sote tuna muda mfupi

In [24]:
data_tel = pd.read_csv('t_data/test_data_tel.tsv', sep='\t')
sen_tel = data_tel['essay'].tolist()
pred_emp_tel, pred_dis_tel = predict_score(sen_tel)
get_scores(data_tel,pred_emp_org, acu_emp, pred_dis_org, acu_dis,pred_emp_tel, pred_dis_tel)

Pearson Fution empathy: -0.013458585127990811
Pearson Stack distress: 0.07216117392972365
Empathy:
Error score: 0.0
Distress:
Error score: 0.11851851851851852
I just read an article about how animals all over the world in zoo's are starving for various reasons. it is pretty sad to see that people take these wild animals to show off then can't afford to pay for their food, starving them sometimes to death. Poor animals. Hope that they can resolve this soon so no more animals have to suffer. Hope the people in those areas are alright as well.
score: 3.7851408
జంతుప్రదర్శనశాలలో ప్రపంచవ్యాప్తంగా ఉన్న జంతువులు వివిధ కారణాల వల్ల ఎలా ఆకలితో అలమటిస్తున్నాయనే దాని గురించి నేను ఒక కథనాన్ని చదివాను. ప్రజలు ఈ అడవి జంతువులను ప్రదర్శన కోసం తీసుకువెళ్లడం చాలా విచారకరం, అప్పుడు వాటి ఆహారం కోసం డబ్బు చెల్లించలేక, కొన్నిసార్లు వాటిని ఆకలితో చనిపోతుంది. పేద జంతువులు. వారు దీన్ని త్వరగా పరిష్కరిస్తారని ఆశిస్తున్నాము, తద్వారా ఇకపై జంతువులు బాధపడకూడదు. ఆయా ప్రాంతాల్లోని ప్రజలు కూడా బాగుంటారని ఆశిస్తున్నాం.
