In [2]:
from utils import read_jsonl, save_jsonl
import pandas as pd
from pydantic import BaseModel, model_validator, field_validator, Field, ValidationInfo
from typing import List, Dict, Union, Any, Optional
import instructor
from openai import OpenAI
import os
import json

from data_classes import KnowledgeGraph, ValidatedProperty

In [3]:
client = instructor.patch(OpenAI(api_key=os.environ['OPENAI_API_KEY']))
MODEL = "gpt-3.5-turbo-0125"

# 🧠 Load data

In [4]:
pred_kbs = read_jsonl('../../data/prediction.jsonl')
print(f"Number of predicted KBs: {len(pred_kbs)}")

Number of predicted KBs: 4


In [5]:
pred_kbs[0].keys()

dict_keys(['entity_label', 'properties'])

# 🪬 Define Evaluation Model

In [46]:

class WorldKnowledgeKGValidator(KnowledgeGraph):

    validated_properties: List[ValidatedProperty] = []


    @model_validator(mode='before')
    def validate(self, context) -> "WorldKnowledgeKGValidator":

        self['validated_properties'] = []
        existing_pred_properties = [name for name in self['properties'].keys()]

        for predicted_property_name, predicted_property_value in self['properties'].items():

            # EVALUATE ONE PROPERTY
            resp: ValidatedProperty = client.chat.completions.create(
                response_model=ValidatedProperty,
                messages=[
                    {
                        "role": "user",
                        "content": f"Using your knowledge of the world" +
                        "is the following property valid for the given entity? " +
                        f"\nEntity Label: {self['entity_label']}" +
                        f"\nPredicted Property Name: {predicted_property_name}" +
                        f"\nPredicted Property Value: {predicted_property_value}"
                    }
                ],
                validation_context={
                    "existing_pred_properties": existing_pred_properties,
                },
                max_retries=2,
                model=MODEL,
            )

            self['validated_properties'].append(resp)
        return self


    @model_validator(mode='after')
    def assert_all_properties_validated(self, info: ValidationInfo):
        if len(self.validated_properties) != len(self.properties):
            raise ValueError(
                "Number of properties validated does not match number of properties in the prediction knowledge base. " +
                f"Number of properties validated: {len(self.validated_properties)}, " +
                f"Number of properties in the text: {len(self.properties)}"
                )
        return self


    

# Evaluate!

In [47]:
import copy
pred_kb_with_wrong_fact = copy.deepcopy(pred_kbs[0])
pred_kb_with_wrong_fact['properties']['Bought Stocks in'] = ['Tesla', 'Nvidia', 'Hertz']
pred_kb_with_wrong_fact['properties']['Favourite Fast Food Chain'] = 'McDonalds'

In [48]:
results = []
results.append(WorldKnowledgeKGValidator(**pred_kb_with_wrong_fact))

In [49]:
results[0].model_dump()

{'entity_label': 'George Washington',
 'properties': {'Name': 'George Washington',
  'Birth date': 'February 22, 1732',
  'Death date': 'December 14, 1799',
  'Occupation': ['Founding Father',
   'Military Officer',
   'Politician',
   'First President of the United States'],
  'Place of Birth': 'Westmoreland County, Virginia',
  'Place of Death': 'Mount Vernon, Virginia',
  'Spouse': 'Martha Washington',
  'Children': ['none',
   'adoptive children: Martha Parke Custis, John Parke Custis'],
  'Role in American Revolution': 'Commander-in-Chief of the Continental Army during the American Revolution',
  'Deficit in Estate': 'Eleventh year running deficit in 1787 due to poor crop yields and pestilence',
  'Agricultural Innovation': 'Undertook a new landscaping plan to cultivate fast-growing trees and native shrubs',
  'Mule Breeding': 'Began breeding mules after being gifted a Spanish jack by King Charles III of Spain in 1784, believed they would revolutionize agriculture and transportati

In [50]:
# results = []
# for idx in range(len(pred_kbs)):

#     # try:
#     result = WorldKnowledgeKGValidator(**pred_kbs[idx])
#     results.append(result)
#     # except Exception as e:
#     #     print(f"Failed to evaluate at KB {idx} with error {e}")

In [51]:
results_json = [r.model_dump() for r in results]
save_jsonl(results_json, '../../data/world_knowledge_evaluation_results.jsonl')

Saved to f'../../data/world_knowledge_evaluation_results.jsonl


# Look at our Evaluations

In [45]:
results = read_jsonl('../../data/world_knowledge_evaluation_results.jsonl')
len(results)

1