In [1]:
import json, operator, requests, time, pycm, wikipedia, pandas as pd
from langchain import HuggingFaceHub, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain, SequentialChain
from langchain.prompts import PromptTemplate
from conceptual_engineering_toolkit import Concept, Entity
from datetime import datetime
from string import Template
from pathlib import Path

In [34]:
DIRECTORY = "wd_experiments_v2"

SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"

QUERY_HEADERS = {
    'User-Agent': 'ConceptualEngineeringAgent/0.2 (https://github.com/bradleypallen/conceptual-engineering-using-llms; b.p.allen@uva.nl)'
}

CLASS_QUERY_LIMIT = 250

E_QUERY_LIMIT = 20

# MIN_INSTANCE_COUNT = 100

# CLASS_QUERY_TEMPLATE = Template("""SELECT ?class ?label ?superClass (COUNT(?instance) AS ?instanceCount) 
# WHERE {
#    ?class rdf:type owl:Class ;
#         rdfs:label ?label ;
#         rdfs:subClassOf ?superClass .
#    ?instance rdf:type ?class .
# }
# GROUP BY ?class ?label ?superClass
# HAVING (COUNT(?instance) > $minInstanceCount)
# LIMIT $limit
# """)

POSITIVES_QUERY_TEMPLATE = Template("""SELECT DISTINCT ?instance ?label
WHERE {
  ?instance p:P31/ps:P31/wdt:P279* <$class> ;
      rdfs:label ?label .
  FILTER(LANG(?label) = "en") .
}
LIMIT $limit
""")

NEGATIVES_QUERY_TEMPLATE = Template("""SELECT DISTINCT ?instance ?label
WHERE {
  ?subclassOfSuperClass wdt:P279* <$superClass> .
  ?instance p:P31/ps:P31/wdt:P279* ?subclassOfSuperClass ;
       rdfs:label ?label .
  FILTER(LANG(?label) = "en") .
  FILTER NOT EXISTS {
    ?subclassOfClass wdt:P279* <$class> .
    ?instance p:P31/ps:P31/wdt:P279* ?subclassOfClass .
  }
}
LIMIT $limit
""")
                                  
DESCRIPTION_QUERY_TEMPLATE = Template("""SELECT DISTINCT ?s ?p ?o WHERE {
{ 
  VALUES ?s { <$id> }
  ?s ?p ?o . 
  FILTER(LANG(?o) = "en") .
}
UNION
{ 
  VALUES ?o { <$id> }
  ?s ?p ?o . 
  FILTER(LANG(?o) = "en") .
}
}
LIMIT $limit
""")
                                  
CLASS_DEFINITION_FROM_LABEL_PROMPT_TEMPLATE = Template("""Define the concept "$label". 
Work step by step and check your facts. State your definition in the manner of a dictionary.
""")

CLASS_DEFINITION_FROM_SERIALIZATION_PROMPT_TEMPLATE = Template("""Using the following set of RDF statements, 
define the concept "$label". Work set by step and check your facts. State your definition in the manner 
of a dictionary.
                                                           
$statements'
""")

INSTANCE_DESCRIPTION_PROMPT_TEMPLATE = Template("""Summarize the following set of RDF statements 
describing the entity "$label". Work set by step and check your facts. State your summarization 
in the manner of the first paragraph of an encylopedia article on the topic.
                                                   
$statements'
""")

# def classes_for_evaluation():
#     query = CLASS_QUERY_TEMPLATE.substitute({"minInstanceCount": MIN_INSTANCE_COUNT, "limit": CLASS_QUERY_LIMIT})
#     response = requests.get(SPARQL_ENDPOINT, params={'query' : query, 'format' : 'json'}, headers=QUERY_HEADERS)
#     response.raise_for_status()
#     return sorted([ 
#             { 
#                 "id": candidate["class"]["value"], 
#                 "label": candidate["label"]["value"], 
#                 "superClassId": candidate["superClass"]["value"], 
#                 "instanceCount": candidate["instanceCount"]["value"] 
#             } 
#             for candidate in response.json()["results"]["bindings"] 
#         ], 
#         key=operator.itemgetter("instanceCount"), 
#         reverse=True
#     )

def positive_examples(cls):
    query = POSITIVES_QUERY_TEMPLATE.substitute({"class": cls["id"], "limit": E_QUERY_LIMIT})
    response = requests.get(SPARQL_ENDPOINT, params={'query' : query, 'format' : 'json'}, headers=QUERY_HEADERS)
    response.raise_for_status()
    return [ 
        { 
            "id": instance["instance"]["value"], 
            "label": instance["label"]["value"] 
        } 
        for instance in response.json()["results"]["bindings"] 
    ]

def negative_examples(cls):
    query = NEGATIVES_QUERY_TEMPLATE.substitute({"class": cls["id"], "superClass": cls["superClassId"], "limit": E_QUERY_LIMIT})
    response = requests.get(SPARQL_ENDPOINT, params={'query' : query, 'format' : 'json'}, headers=QUERY_HEADERS)
    response.raise_for_status()
    return [ 
        { 
            "id": instance["instance"]["value"], 
            "label": instance["label"]["value"] 
        } 
        for instance in response.json()["results"]["bindings"] 
    ]

def serialize(id):
    headers = QUERY_HEADERS
    headers["Accept"] = 'text/tab-separated-values'
    query = DESCRIPTION_QUERY_TEMPLATE.substitute({"id": id, "limit": E_QUERY_LIMIT})
    response = requests.get(SPARQL_ENDPOINT, params={'query' : query}, headers=headers)
    response.raise_for_status()
    return response.text.removeprefix('?s	?p	?o\n')

def class_definition_from_label(label):
    return ChatOpenAI(model="gpt-4").predict(CLASS_DEFINITION_FROM_LABEL_PROMPT_TEMPLATE.substitute({"label": label}))

def class_definition_from_serialization(id, label):
    return ChatOpenAI(model="gpt-4").predict(CLASS_DEFINITION_FROM_SERIALIZATION_PROMPT_TEMPLATE.substitute({"label": label, "statements": serialize(id)}))

def instance_description(id, label):
    return ChatOpenAI(model="gpt-4").predict(INSTANCE_DESCRIPTION_PROMPT_TEMPLATE.substitute({"label": label, "statements": serialize(id)}))

In [38]:
def evaluate(cls):
    positives = positive_examples(cls)
    negatives = negative_examples(cls)
    concept = Concept(cls["id"], cls["label"], class_definition_from_serialization(cls["id"], cls["label"]), "gpt-4", 0.1)
    df_positives = pd.DataFrame.from_records(positives)
    df_positives["actual"] = "positive"
    df_negatives = pd.DataFrame.from_records(negatives)
    df_negatives["actual"] = "negative"
    df_data = pd.concat([df_positives, df_negatives], ignore_index=True, axis=0)
    df_data["description"] = df_data.apply(lambda ex: instance_description(ex["id"], ex["label"]), axis=1)
    predictions = [ concept.classify(Entity(ex["id"], ex["label"], ex["description"])) for ex in df_data.to_dict("records") ]
    df_predictions = pd.DataFrame(predictions, columns = [ 'predicted', 'rationale' ])
    df_predictions["predicted"] = df_predictions["predicted"].str.lower()
    df_results = pd.concat([df_data, df_predictions], axis=1)
    cm = pycm.ConfusionMatrix(df_results["actual"].tolist(), df_results["predicted"].tolist(), digit=2, classes=[ 'positive', 'negative' ])
    evaluation = { "created": datetime.now().isoformat(), "concept": concept.to_json(), "data": df_results.to_dict('records'), "confusion_matrix": cm.matrix, }
    experiment_filename = f'{DIRECTORY}/{cls["label"].replace(" ","_")}/{evaluation["concept"]["model_name"]}_{evaluation["concept"]["label"].replace(" ","_")}_{evaluation["created"]}.json'
    experiment_path = Path(experiment_filename)
    experiment_path.parent.mkdir(parents=True, exist_ok=True)
    json.dump(evaluation, open(experiment_filename, 'w+'))

In [4]:
# candidates = classes_for_evaluation()
candidates = json.load(open(f'{DIRECTORY}/candidates.json', 'r'))

In [20]:
for cls in candidates[50:]:
    # time.sleep(60)
    print("Evaluating", cls["label"], "...")
    evaluate(cls)

Evaluating Child ...
Evaluating Fraud ...
Evaluating Indigenous peoples of the Americas ...
Evaluating Fetus ...
Evaluating Unnatural death ...


HTTPError: 500 Server Error: Internal Server Error for url: https://query.wikidata.org/sparql?query=SELECT+%3Finstance+%3Flabel%0AWHERE+%7B%0A++%3FsubclassOfSuperClass+wdt%3AP279%2A+%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2FQ4%3E+.%0A++%3Finstance+p%3AP31%2Fps%3AP31%2Fwdt%3AP279%2A+%3FsubclassOfSuperClass+%3B%0A+++++++rdfs%3Alabel+%3Flabel+.%0A++FILTER%28LANG%28%3Flabel%29+%3D+%22en%22%29+.%0A++FILTER+NOT+EXISTS+%7B%0A++++%3FsubclassOfClass+wdt%3AP279%2A+%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2FQ855919%3E+.%0A++++%3Finstance+p%3AP31%2Fps%3AP31%2Fwdt%3AP279%2A+%3FsubclassOfClass+.%0A++%7D%0A%7D%0ALIMIT+20%0A&format=json

In [None]:
# print(json.dump(candidates, open(f'{DIRECTORY}/candidates.json', 'w'))

In [32]:
print(serialize(candidates[3]['id']).removeprefix('?s	?p	?o\n'))

<http://www.wikidata.org/entity/Q26513>	<http://www.w3.org/2000/01/rdf-schema#label>	"human fetus"@en
<http://www.wikidata.org/entity/Q26513>	<http://schema.org/description>	"prenatal human between the embryonic state and birth"@en
<http://www.wikidata.org/entity/Q26513>	<http://www.w3.org/2004/02/skos/core#altLabel>	"faetus"@en
<http://www.wikidata.org/entity/Q26513>	<http://www.w3.org/2004/02/skos/core#altLabel>	"fætus"@en
<http://www.wikidata.org/entity/Q26513>	<http://www.w3.org/2004/02/skos/core#altLabel>	"fetus"@en
<http://www.wikidata.org/entity/Q26513>	<http://www.w3.org/2004/02/skos/core#altLabel>	"foetus"@en
<http://www.wikidata.org/entity/Q26513>	<http://www.w3.org/2004/02/skos/core#altLabel>	"fœtus"@en



In [71]:
candidates[48]

{'id': 'http://www.wikidata.org/entity/Q300920',
 'label': 'Esports',
 'superClassId': 'http://www.wikidata.org/entity/Q349',
 'instanceCount': 409}

In [72]:
evaluate(candidates[48])

In [73]:
len(candidates)

533

In [76]:
from random import sample
s = sample(candidates, 5)
s

[{'id': 'http://www.wikidata.org/entity/Q968159',
  'label': 'Art movement',
  'superClassId': 'http://www.wikidata.org/entity/Q735',
  'instanceCount': 1510},
 {'id': 'http://www.wikidata.org/entity/Q557206',
  'label': 'Catholic higher education',
  'superClassId': 'http://www.wikidata.org/entity/Q3918',
  'instanceCount': 221},
 {'id': 'http://www.wikidata.org/entity/Q1363241',
  'label': 'Plant variety (law)',
  'superClassId': 'http://www.wikidata.org/entity/Q4886',
  'instanceCount': 124},
 {'id': 'http://www.wikidata.org/entity/Q494721',
  'label': 'Cities of Japan',
  'superClassId': 'http://www.wikidata.org/entity/Q515',
  'instanceCount': 873},
 {'id': 'http://www.wikidata.org/entity/Q4174776',
  'label': 'Village (Japan)',
  'superClassId': 'http://www.wikidata.org/entity/Q532',
  'instanceCount': 220}]

In [77]:
for cls in s:
    print("Evaluating", cls["label"], "...")
    evaluate(cls)

Evaluating Art movement ...
Evaluating Catholic higher education ...
Evaluating Plant variety (law) ...
Evaluating Cities of Japan ...
Evaluating Village (Japan) ...
