In [1]:
# import libraries
import json
import pandas as pd
from indra.statements.io import stmt_from_json
from indra.preassembler.grounding_mapper.gilda import ground_statements


INFO: [2024-07-22 14:23:40] /home/klas/repos/adeft/src/adeft/recognize.py - OneShotRecognizer not available. Extension module for AlignmentBasedScorer is missing


# Checking For Accuracy

In [2]:
# load dataframe
from indra_gpt.constants import OUTPUT_DEFAULT
df = pd.read_table(OUTPUT_DEFAULT)


In [3]:
# get original statement json and the statement json from chatGPT
original_statement_jsons = [json.loads(sj) for sj in df["input"]]
gpt_statement_jsons = [json.loads(sj) for sj in df["generated_json_object"]]


In [4]:
# create statement objects from the jsons, skip the ones that can't be loaded
original_statements = []
gpt_statements = []
for og_json, gpt_json in zip(original_statement_jsons, gpt_statement_jsons):
    try:
        og_stmt = stmt_from_json(og_json)
        original_statements.append(og_stmt)

        gpt_stmt = stmt_from_json(gpt_json)
        gpt_statements.append(gpt_stmt)
    except (IndexError, ValueError):
        continue
remove_8 = original_statements.pop(8) # remove the one json object which doesn't appear in gpt_statements list due to chatGPT being unable to output a loadable response 




In [5]:
# use list of statements to check if chatGPT is correct
incorrect_stmt_type = 0
incorrect_grounding = 0
correct_stmt_type = 0
correct_grounding = 0
for og_stmt, gpt_stmt in zip(original_statements, gpt_statements):
    # compare statement types
    if type(og_stmt) == type(gpt_stmt):
        correct_stmt_type += 1  # count each correct statement type
        og_ns_list = []
        og_id_list = []
        gpt_ns_list = []
        gpt_id_list = []
        for og_agent, gpt_agent in zip(og_stmt.real_agent_list(), gpt_stmt.real_agent_list()):
    # get list of agents and access the grounding of each agent by calling agent.get_grounding()
            og_ns, og_id = og_agent.get_grounding()
            gpt_ns, gpt_id = gpt_agent.get_grounding()
            og_ns_list.append(og_ns)
            og_id_list.append(og_id)
            gpt_ns_list.append(gpt_ns)
            gpt_id_list.append(gpt_id)
        # compare db_refs
        if (og_ns_list,og_id_list) != (gpt_ns_list,gpt_id_list):
            incorrect_grounding+=1 # count grounding as incorrect if at least one agent in the lists don't match
        else:
            correct_grounding+=1 # count grounding as correct if all agents in the list match
    else:
      incorrect_stmt_type+=1  # count as incorrect statement type if statement types don't match


In [6]:
print("Number of Correct Statement Types: "+str(correct_stmt_type))
print("Number of Incorrect Statement Types: "+str(incorrect_stmt_type))
print("Number of Correct Statements With Correct Agents: "+str(correct_grounding))
print("Number of Correct Statements With 1+ Incorrect Agents: "+str(incorrect_grounding))


Number of Correct Statement Types: 33
Number of Incorrect Statement Types: 14
Number of Correct Statements With Correct Agents: 0
Number of Correct Statements With 1+ Incorrect Agents: 33


#  Try to ground statements

In [7]:
gpt_statements_grounded = ground_statements(gpt_statements)


INFO: [2024-07-22 14:23:50] indra.ontology.bio.ontology - Loading INDRA bio ontology from cache at /home/klas/.indra/bio_ontology/1.34/bio_ontology.pkl


In [8]:
# check again after gpt_statements is passed through ground_statements 

# use list of statements to check if chatGPT is correct
incorrect_stmt_type_grounded = 0
incorrect_grounding_grounded = 0
correct_stmt_type_grounded = 0
correct_grounding_grounded = 0
for og_stmt, gpt_stmt in zip(original_statements, gpt_statements_grounded):
    # compare statement types
    if type(og_stmt) == type(gpt_stmt):
        correct_stmt_type_grounded += 1  # count each correct statement type
        og_ns_list = []
        og_id_list = []
        gpt_ns_list = []
        gpt_id_list = []
        for og_agent, gpt_agent in zip(og_stmt.real_agent_list(), gpt_stmt.real_agent_list()):
    # get list of agents and access the grounding of each agent by calling agent.get_grounding()
            og_ns, og_id = og_agent.get_grounding()
            gpt_ns, gpt_id = gpt_agent.get_grounding()
            og_ns_list.append(og_ns)
            og_id_list.append(og_id)
            gpt_ns_list.append(gpt_ns)
            gpt_id_list.append(gpt_id)
         # compare db_refs
        if (og_ns_list,og_id_list) != (gpt_ns_list,gpt_id_list):
            incorrect_grounding_grounded+=1 # count grounding as incorrect if at least one agent in the lists don't match
        else:
            correct_grounding_grounded+=1 # count grounding as correct if all agents in the list match        
    else:
      incorrect_stmt_type_grounded+=1  # count as incorrect statement type if statement types don't match


In [9]:
print("Number of Correct Statement Types: "+str(correct_stmt_type_grounded))
print("Number of Incorrect Statement Types: "+str(incorrect_stmt_type_grounded))
print("Number of Correct Statements With Correct Agents: "+str(correct_grounding_grounded))
print("Number of Correct Statements With 1+ Incorrect Agents: "+str(incorrect_grounding_grounded))


Number of Correct Statement Types: 33
Number of Incorrect Statement Types: 14
Number of Correct Statements With Correct Agents: 5
Number of Correct Statements With 1+ Incorrect Agents: 28
