In [69]:
import json
import pandas as pd
import warnings
from difflib import SequenceMatcher
from IPython.display import display
import random
from copy import deepcopy
import numpy as np

In [70]:
warnings.filterwarnings("ignore")

In [71]:
with open('../output/gpt_triplets.json', 'r') as file:
    gpt_triplets = json.load(file)

with open('../output/llama_triplets.json', 'r') as file:
    llama_triplets = json.load(file)

with open('../output/mixtral_triplets.json', 'r') as file:
    mixtral_triplets = json.load(file)

gpt_triplets = gpt_triplets[:-1]
mixtral_triplets = mixtral_triplets[:-1]

In [72]:
random.seed(100)
random_indices = sorted(random.sample(range(len(gpt_triplets)), k=50))[0:23]

In [73]:
def clean_tripplet(triplet):

 temp_triplet = deepcopy(triplet)
 length = min(len(triplet['object']), len(triplet['subject']))

 for key, value in triplet.items():
    temp_triplet[key] = value[0:length]

 return temp_triplet

In [74]:
def triplets_2models(model_a, model_b, show=False):

 all_triplets_a = pd.DataFrame(model_a) 
 all_triplets_b = pd.DataFrame(model_b)
 similar_triplets=  pd.DataFrame(columns=all_triplets_a.columns)
 all_columns = ['subject', 'object']

 
 
 for _, one_triplet_a in all_triplets_a.iterrows(): 
    for _, one_triplet_b in  all_triplets_b.iterrows():


        all_score= []
        for column in all_columns:
         all_score.append(SequenceMatcher(None, one_triplet_a[column].lower() ,one_triplet_b[column].lower()).ratio())
        
        average_score = np.average((np.array(all_score)))
        score_realshionhsip =  SequenceMatcher(None, one_triplet_a['relationship'].lower() ,one_triplet_b['relationship'].lower()).ratio()
        if(average_score>0.5 and score_realshionhsip>0.5):
         similar_triplets = similar_triplets._append(one_triplet_a,  ignore_index=True)
        
        
 if(show):
  similar_triplets = similar_triplets.drop_duplicates()
  if(len(similar_triplets)>0):
   display(similar_triplets)
  
 similar_triplets = similar_triplets.drop_duplicates() 
 return similar_triplets.to_dict(orient='list') 

In [75]:
def consensus_all_models():
 
 consensus_result = []
 for index in random_indices:
    triplets_gpt = clean_tripplet(gpt_triplets[index])
    triplets_llama = clean_tripplet(llama_triplets[index])
    triplets_mixtral = clean_tripplet(mixtral_triplets[index])

    _2models = triplets_2models(triplets_gpt,triplets_llama ) 
    _3models = triplets_2models(_2models,triplets_mixtral, show=True)
    consensus_result.append(_3models)

 return consensus_result 

In [76]:
def evaluate_consensus(human_evaluation_csv, results, model_triplets):

 human_evaluation  = pd.read_csv(human_evaluation_csv)
 result_data = pd.DataFrame(columns=human_evaluation.columns[0:2])
 c_1 = result_data.columns[0]
 c_2 = result_data.columns[1]



 for index, result_item in enumerate(results):
  document_n = random_indices[index]
  document_result =pd.DataFrame(result_item)
  all_triplets_in_document = pd.DataFrame(clean_tripplet(model_triplets[document_n]))

  for _, triplet in document_result.iterrows(): 
  

   true_values = np.sum(all_triplets_in_document.eq(triplet), axis=1)
   true_values = np.array(true_values)
   triplet_n = np.argmax(true_values) + 1

  
   result_data  = result_data._append({c_1: document_n, c_2: triplet_n},ignore_index=True)
  
 
 

 evaluation = pd.merge(human_evaluation, result_data , on=[c_1,c_2])
 evaluation  = evaluation.astype('int32')
 return evaluation


In [77]:
con_results = consensus_all_models()

Unnamed: 0,subject,subject_type,relationship,object,object_type
0,BR Communications,COMP,Produce,Military-Communications Equipment,PRODUCT


Unnamed: 0,subject,subject_type,relationship,object,object_type
0,MetLife Inc,COMP,Announce,Planned Merger,EVENT


Unnamed: 0,subject,subject_type,relationship,object,object_type
0,Victory Giant Technology HuiZhou Co Ltd,COMP,Announce,Net Profit Increase,ECON_INDICATOR


Unnamed: 0,subject,subject_type,relationship,object,object_type
0,New Generation Jalisco Cartel,ORG,Impact,Violence,CONCEPT
1,New Generation Jalisco Cartel,ORG,Operate_In,Mexico,GPE


Unnamed: 0,subject,subject_type,relationship,object,object_type
0,Honeywell International Inc,COMP,Supply,United Airlines,COMP


Unnamed: 0,subject,subject_type,relationship,object,object_type
0,Donald Trump,PERSON,Announce,Immigration Reform Proposal,CONCEPT


Unnamed: 0,subject,subject_type,relationship,object,object_type
0,Asetek A/S,COMP,Receive,Order from Fujitsu,CONCEPT


Unnamed: 0,subject,subject_type,relationship,object,object_type
0,Gregg Singer,PERSON,Own,Public School Building,PRODUCT
1,Gregg Singer,PERSON,Want,Dormitory,PRODUCT
2,Gregg Singer,PERSON,Complain,Conspiracy,CONCEPT


In [78]:
evaluation = evaluate_consensus(human_evaluation_csv='../evaluations/GPT.csv', results=con_results, model_triplets=gpt_triplets)
display(evaluation)
evaluation.to_csv('../evaluations/SequenceMatcher.csv', index=False)  

Unnamed: 0,Document number,Triplet,correct triplet,correct entities,correct relation
0,2,2,1,2,1
1,41,1,1,2,1
2,56,1,1,2,1
3,63,1,1,2,1
4,63,2,1,2,1
5,64,1,1,2,1
6,64,1,0,2,0
7,72,1,1,2,1
8,74,1,1,2,1
9,107,1,1,2,1
