In [2]:
import json
from sklearn.metrics import r2_score
from sklearn.metrics import f1_score
from sklearn.preprocessing import MultiLabelBinarizer


In [10]:
with open('controls/CaSiNo.jsonl', 'r') as fin:
    CaSiNo = json.load(fin)
test_set_size = len(CaSiNo)
agent1, agent2, both = 0, 0, 0
for item in CaSiNo:
    if item['response'].split('.')[0] == item['ground_truth'].split('.')[0]:
        agent1+=1
    if item['response'].split('.')[1] == item['ground_truth'].split('.')[1]:
        agent2+=1
    if item['response']==item['ground_truth']:
        both+=1
print(f'Accuracy for predicting priorities:\nAgent1: {agent1/test_set_size} percents\nAgent2: {agent2/test_set_size} percents\nBoth Agents: {both/test_set_size} percents')

Accuracy for predicting priorities:
Agent1: 0.48 percents
Agent2: 0.54 percents
Both Agents: 0.25 percents


In [16]:
with open('controls/CRAIGSLISTBARGAIN.jsonl', 'r') as fin:
    CRAIGSLISTBARGAIN = json.load(fin)
buyer_price_actual, buyer_price_predicted, seller_price_actual, seller_price_predicted = [], [], [], []
for item in CRAIGSLISTBARGAIN:
    buyer_price_actual.append(int(item['ground_truth'].split(' ')[-2].replace(',', '')))
    buyer_price_predicted.append(int(item['response'].split(' ')[-2].replace(',', '')))
    seller_price_actual.append(int(item['ground_truth'].split(' ')[7].strip(',').replace(',', '')))
    seller_price_predicted.append(int(item['response'].split(' ')[7].strip(',').replace(',', '')))
print(f'R^2 Score for predicted buyer\'s price is {r2_score(buyer_price_actual, buyer_price_predicted)}\nR^2 Score for predicted seller\'s price is {r2_score(seller_price_actual, seller_price_predicted)}')

R^2 Score for predicted buyer's price is 0.9162770767996156
R^2 Score for predicted seller's price is 0.9470176479894197


In [3]:
with open('controls/NegotiationToM.jsonl', 'r') as fin:
    NegotiationToM = json.load(fin)
desire_actual, desire_predicted, belief_actual, belief_predicted, a1_intent_actual, \
intent_actual, intent_predicted, \
a1_intent_predicted, a2_intent_actual, a2_intent_predicted = [], [], [], [], [], [], [], [], [], []

for item in NegotiationToM:

    temp = item['ground_truth'].split('Desire ')
    temp = [label.split(':')[1].split(',')[0].strip() for label in temp[1:]]
    desire_actual.append(temp)
    temp = item['response'].split('Desire ')
    try:
        temp = [label.split(':')[1].split(',')[0].strip() for label in temp[1:]]
    except:
        temp = ['_' for i in range(6)]
    desire_predicted.append(temp)
    
    temp = item['ground_truth'].split('Belief ')
    temp = [label.split(':')[1].replace('.', ',').split(',')[0].strip() for label in temp[1:]]
    belief_actual.append(temp)
    temp = item['response'].split('Belief ')
    try:
        temp = [label.split(':')[1].replace('.', ',').split(',')[0].strip() for label in temp[1:]]
    except:
        temp = ['_' for i in range(6)]
    if len(temp)!=6:
        temp = ['_' for i in range(6)]
    belief_predicted.append(temp)
    
    a1_intent_actual.append(item['ground_truth'].split('[')[1].split(']')[0].split(','))
    try:
        a1_intent_predicted.append(item['response'].split('[')[1].split(']')[0].split(','))
    except:
        a1_intent_predicted.append([])
    if len(temp)!=6:
        temp = ['_' for i in range(6)]
    
    a2_intent_actual.append(item['ground_truth'].split('[')[2].split(']')[0].split(','))
    try:
        a2_intent_predicted.append(item['response'].split('[')[2].split(']')[0].split(','))
    except:
        a2_intent_predicted.append([])

intent_actual = [a1+a2 for a1, a2 in zip(a1_intent_actual, a2_intent_actual)]
intent_predicted = [a1+a2 for a1, a2 in zip(a1_intent_predicted, a2_intent_predicted)]

# print(a1_intent_actual)
# print(a1_intent_predicted)
# print(a2_intent_actual)
# print(a2_intent_predicted)
# print(desire_actual)
# print(desire_predicted)
# print(belief_actual)
# print(belief_predicted)
mlb = MultiLabelBinarizer()
y_true_binary = mlb.fit_transform(intent_actual)
y_pred_binary = mlb.transform(intent_predicted)

# Calculate F1 score
f1_micro = f1_score(y_true_binary, y_pred_binary, average='micro')
f1_macro = f1_score(y_true_binary, y_pred_binary, average='macro')

print(f"F1 Score (Micro): {f1_micro}")
print(f"F1 Score (Macro): {f1_macro}")

test_set_size = len(NegotiationToM)
desire_accuracy = 0
for a, p in zip(desire_predicted, desire_actual):
    if a==p:
        desire_accuracy+=1
print(desire_accuracy/test_set_size)

F1 Score (Micro): 0.6426656453466105
F1 Score (Macro): 0.4827246681902486
0.3558368495077356


In [34]:
from sklearn.metrics import f1_score
f1_score(['Not Given', 'Not Given', 'Not Given', 'Not Given', 'Not Given', 'Not Given'], )

The intent of the Agent 1 is [Build-Rapport,Describe-Need] and the intent of the Agent 2 is [Build-Rapport] Regarding the Agent 1, Desire High: Food, Desire Medium: Not Given, Desire Low: Not Given,  Belief High: Not Given, Belief Medium: Not Given, Belief Low: Not Given. Regarding the Agent 2, Desire High: Not Given, Desire Medium: Not Given, Desire Low: Not Given,  Belief High: Food, Belief Medium: Not Given, Belief Low: Not Given.
The intent of the Agent 1 is [Build-Rapport,Describe-Need] and the intent of the Agent 2 is [Describe-Need] Regarding the Agent 1, Desire High: Not Given, Desire Medium: Not Given, Desire Low: Firewood,  Belief High: Food, Belief Medium: Not Given, Belief Low: Not Given. Regarding the Agent 2, Desire High: Food, Desire Medium: Not Given, Desire Low: Not Given,


In [13]:
import json
count = 0
with open('data/NegotiationToM/test.json', 'r') as fin: 
    data = json.load(fin)
for item in data:
    if len(item['intents'])==len(item['dialogue']):
        for idx, intent in enumerate(item['intents']):
            if (intent.find('Show-Empathy')!= -1) and (item['dialogue'][idx].startswith('agent_1')): 
                print(item['dialogue_id'], intent, idx)
                count+=1

print(count)
        

7-4 Describe-Need,Show-Empathy 5
7-4 Describe-Need,Show-Empathy 7
10-4 Describe-Need,Discover-Preference,No-Need,Promote-Coordination,Show-Empathy 5
11-4 Describe-Need,Show-Empathy 6
19-5 Promote-Coordination,Show-Empathy 7
28-6 Show-Empathy 7
40-5 Show-Empathy,Undermine-Requirements 5
40-5 Show-Empathy 9
40-5 Build-Rapport,Show-Empathy 11
56-4 Show-Empathy 4
69-5 Show-Empathy 6
83-10 Promote-Coordination,Show-Empathy,Undermine-Requirements 10
84-4 Describe-Need,Show-Empathy 1
85-4 Show-Empathy 6
85-4 Show-Empathy 8
94-5 No-Need,Show-Empathy 9
101-5 Describe-Need,Show-Empathy 4
101-5 No-Need,Show-Empathy 10
103-5 Show-Empathy 10
108-5 Build-Rapport,Show-Empathy 10
119-4 Promote-Coordination,Show-Empathy,Undermine-Requirements 5
126-5 Build-Rapport,Describe-Need,Show-Empathy 9
131-4 Show-Empathy 3
131-4 No-Need,Show-Empathy 5
142-8 Describe-Need,Show-Empathy 9
142-8 Show-Empathy,Undermine-Requirements 13
142-8 Show-Empathy,Undermine-Requirements 15
146-6 No-Need,Show-Empathy 5
159-6 Sho