In [106]:
import json, operator, requests
from langchain import HuggingFaceHub, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain, SequentialChain
from langchain.prompts import PromptTemplate
from string import Template

In [164]:
DIRECTORY = "caligraph-experiments"

CLASS_QUERY_LIMIT = 200

E_QUERY_LIMIT = 20

MIN_INSTANCE_COUNT = 100

QUERY_HEADERS = {
    'User-Agent': 'ConceptualEngineeringAgent/0.2 (https://github.com/bradleypallen/conceptual-engineering-using-llms; b.p.allen@uva.nl)'
}

SPARQL_ENDPOINT = "http://caligraph.org/sparql"

CLASS_QUERY_TEMPLATE = Template("""SELECT ?class ?label ?superClass (COUNT(?instance) AS ?instanceCount) 
WHERE {
   ?class rdf:type owl:Class ;
        rdfs:label ?label ;
        rdfs:subClassOf ?superClass .
   ?instance rdf:type ?class .
}
GROUP BY ?class ?label ?superClass
HAVING (COUNT(?instance) > $minInstanceCount)
LIMIT $limit
""")

E_PLUS_QUERY_TEMPLATE = Template("""SELECT ?instance ?label
WHERE {
  ?class rdfs:subClassOf* <$class> .
  ?instance rdf:type ?class ;
      rdfs:label ?label .
}
LIMIT $limit
""")

E_MINUS_QUERY_TEMPLATE = Template("""SELECT ?instance ?label
WHERE {
  ?subclassOfSuperClass rdfs:subClassOf* <$superClass> .
  ?instance rdf:type ?subclassOfSuperClass ;
       rdfs:label ?label .
 
  FILTER NOT EXISTS {
    ?subclassOfClass rdfs:subClassOf* <$class> .
    ?instance rdf:type ?subclassOfClass .
  }
}
LIMIT $limit
""")

In [145]:
query = CLASS_QUERY_TEMPLATE.substitute({"minInstanceCount": MIN_INSTANCE_COUNT, "limit": CLASS_QUERY_LIMIT})
response = requests.get(SPARQL_ENDPOINT, params={'query' : query, 'format' : 'json'}, headers=QUERY_HEADERS)
response.raise_for_status()

In [147]:
candidates = sorted([ 
    { 
        "class": candidate["class"]["value"], 
        "label": candidate["label"]["value"], 
        "superClass": candidate["superClass"]["value"], 
        "instanceCount": candidate["instanceCount"]["value"] 
    } for candidate in response.json()["results"]["bindings"] 
    ], 
    key=operator.itemgetter("instanceCount"), 
    reverse=True
)

In [173]:
candidates

[{'class': 'http://caligraph.org/ontology/England_under-21_international_footballer',
  'label': 'England under-21 international footballer',
  'superClass': 'http://caligraph.org/ontology/National_under-21_association_football_team',
  'instanceCount': '802'},
 {'class': 'http://caligraph.org/ontology/Autobiography',
  'label': 'Autobiography',
  'superClass': 'http://caligraph.org/ontology/Biography_(books)',
  'instanceCount': '776'},
 {'class': 'http://caligraph.org/ontology/Newspaper_in_New_South_Wales',
  'label': 'Newspaper in New South Wales',
  'superClass': 'http://caligraph.org/ontology/Newspaper_published_in_Australia',
  'instanceCount': '740'},
 {'class': 'http://caligraph.org/ontology/Player_of_American_football_from_Alabama',
  'label': 'Player of American football from Alabama',
  'superClass': 'http://caligraph.org/ontology/American_player_of_American_football',
  'instanceCount': '697'},
 {'class': 'http://caligraph.org/ontology/PlayStation_VR_game',
  'label': 'Play

In [149]:
def generate_NL_class_definition_from_label(label):
    llm = ChatOpenAI(model="gpt-4")
    prompt = f'Define the concept "{label}". Work step by step and check your facts. State your definition in the manner of a dictionary.'
    return llm.predict(prompt)

In [150]:
print(generate_NL_class_definition_from_label(candidates[0]["label"]))

"England under-21 international footballer" (noun):

1. A soccer player who is below the age of 21 and represents England at an international level in the sport of football. 
2. The player must be selected by the England under-21 national football team which is controlled by The Football Association, the governing body for football in England. 
3. This player typically demonstrates a high level of skill, potential, and commitment, and competes against under-21 teams from other nations in various championships and tournaments. 
4. However, being an "under-21" player does not necessarily mean the player is under 21 years of age. According to FIFA's eligibility rules, players can be up to 23 years old, as long as they were 21 or under at the start of the two-year campaign of the competition. 
5. Finally, an England under-21 international footballer is often seen as a prospect for future inclusion in the senior England national team.


In [151]:
query = E_PLUS_QUERY_TEMPLATE.substitute({"class": candidates[0]["class"], "limit": E_QUERY_LIMIT})
response = requests.get(SPARQL_ENDPOINT, params={'query' : query, 'format' : 'json'}, headers=QUERY_HEADERS)
response.raise_for_status()

In [152]:
positives = [ { 
        "instance": instance["instance"]["value"], 
        "label": instance["label"]["value"] 
    } for instance in response.json()["results"]["bindings"] ]

In [174]:
positives

[{'instance': 'http://caligraph.org/resource/Garry_Parker',
  'label': 'Garry Parker'},
 {'instance': 'http://caligraph.org/resource/Patrick_Bamford',
  'label': 'Patrick Bamford'},
 {'instance': 'http://caligraph.org/resource/Paul_Gascoigne',
  'label': 'Paul Gascoigne'},
 {'instance': 'http://caligraph.org/resource/Ray_Wilkins',
  'label': 'Ray Wilkins'},
 {'instance': 'http://caligraph.org/resource/Rod_Wallace',
  'label': 'Rod Wallace'},
 {'instance': 'http://caligraph.org/resource/Brian_Atkinson',
  'label': 'Brian Atkinson'},
 {'instance': 'http://caligraph.org/resource/Chris_Woods',
  'label': 'Chris Woods'},
 {'instance': 'http://caligraph.org/resource/Daniel_Sturridge',
  'label': 'Daniel Sturridge'},
 {'instance': 'http://caligraph.org/resource/Garry_Flitcroft',
  'label': 'Garry Flitcroft'},
 {'instance': 'http://caligraph.org/resource/Gary_Mills_(footballer,_born_1961)',
  'label': 'Gary Mills (footballer, born 1961)'},
 {'instance': 'http://caligraph.org/resource/Hayden_Mu

In [154]:
response = requests.get(SPARQL_ENDPOINT, params={'query' : f'DESCRIBE <{positives[0]["instance"]}>', 'format' : 'text\turtle'}, headers=QUERY_HEADERS)
response.raise_for_status()
instance_turtle = response.text

In [155]:
def generate_NL_description_from_label_and_turtle(label, turtle):
    #label = instance["label"]
    # get Turtle description of instance.
    llm = ChatOpenAI(model="gpt-4")
    prompt = f'Summarize the following set of RDF statements describing the entity "{label}". Work set by step and check your facts. State your summarization in the manner of the first paragraph of an encylopedia article on the topic.\nTurtle: {turtle}'
    return llm.predict(prompt)

In [156]:
generate_NL_description_from_label_and_turtle(positives[0]["label"], instance_turtle)

'Garry Parker (born 7 September 1965 in Oxford, England) is a former English footballer and current football manager. He is known for his career as a midfielder, playing for several teams including Luton Town F.C., AC Omonia, Nottingham Forest F.C., Hull City A.F.C., the England national under-21 football team, the England national football B team, Leicester City F.C., and Aston Villa F.C. Additionally, he managed Leicester City F.C. during the 2001-2002 season. Parker also played in the FA Cup Final and was part of the England under-21 international team. He played in the Premier League and is a living individual.'

In [157]:
response = requests.get(SPARQL_ENDPOINT, params={'query' : f'DESCRIBE <{candidates[0]["class"]}>', 'format' : 'text\turtle'}, headers=QUERY_HEADERS)
response.raise_for_status()
class_turtle = response.text

In [158]:
def generate_NL_class_definition_from_turtle(label, turtle):
    llm = ChatOpenAI(model="gpt-4")
    prompt = f'Using the following set of RDF statements, define the concept "{label}". Work set by step and check your facts. State your summarization in the manner of a dictionary definition of the term.\nTurtle: {turtle}'
    return llm.predict(prompt)

In [159]:
generate_NL_class_definition_from_turtle(candidates[0]['label'], class_turtle)

'An "England under-21 international footballer" is a defined class in the Caligraph ontology, indicating an individual who has been a part of the England national under-21 football team. This individual is a subclass of the broader categories of players in National under-21 association football teams and England youth international footballers. The concept was derived from the Wikipedia category of England under-21 international footballers. In simpler terms, an England under-21 international footballer is a football player who has represented England at the under-21 level in international competitions.'

In [160]:
generate_NL_class_definition_from_label(candidates[0]['label'])

'An "England under-21 international footballer" refers to a soccer player who represents England at an international level in competitions and matches specifically designated for athletes under the age of 21. The player must be eligible under FIFA\'s rules, primarily possessing English nationality, and must be selected by the coaching staff of England\'s Under-21 National Football Team. This status is often seen as a stepping stone towards being selected for the senior England National Football Team.'

In [165]:
query = E_MINUS_QUERY_TEMPLATE.substitute({"class": candidates[0]["class"], "superClass": candidates[0]["superClass"], "limit": E_QUERY_LIMIT})
response = requests.get(SPARQL_ENDPOINT, params={'query' : query, 'format' : 'json'}, headers=QUERY_HEADERS)
response.raise_for_status()

In [167]:
negatives = [ { 
        "instance": instance["instance"]["value"], 
        "label": instance["label"]["value"] 
    } for instance in response.json()["results"]["bindings"] ]

In [172]:
negatives

[{'instance': 'http://caligraph.org/resource/Alexander_Prass',
  'label': 'Alexander Prass'},
 {'instance': 'http://caligraph.org/resource/Christian_Ramsebner',
  'label': 'Christian Ramsebner'},
 {'instance': 'http://caligraph.org/resource/Davit_Terteryan',
  'label': 'Davit Terteryan'},
 {'instance': 'http://caligraph.org/resource/Shaqir_Stafa',
  'label': 'Shaqir Stafa'},
 {'instance': 'http://caligraph.org/resource/Andreas_Kyriakou',
  'label': 'Andreas Kyriakou'},
 {'instance': 'http://caligraph.org/resource/Christopher_Drazan',
  'label': 'Christopher Drazan'},
 {'instance': 'http://caligraph.org/resource/Darvydas_%C5%A0ernas',
  'label': 'Darvydas Šernas'},
 {'instance': 'http://caligraph.org/resource/Dominik_Yankov',
  'label': 'Dominik Yankov'},
 {'instance': 'http://caligraph.org/resource/Gentian_Mu%C3%A7a',
  'label': 'Gentian Muça'},
 {'instance': 'http://caligraph.org/resource/Harallamb_Qaqi',
  'label': 'Harallamb Qaqi'},
 {'instance': 'http://caligraph.org/resource/Jorgo

In [170]:
response = requests.get(SPARQL_ENDPOINT, params={'query' : f'DESCRIBE <{negatives[0]["instance"]}>', 'format' : 'text\turtle'}, headers=QUERY_HEADERS)
response.raise_for_status()
instance_turtle = response.text

In [171]:
generate_NL_description_from_label_and_turtle(negatives[0]["label"], instance_turtle)

'Alexander Prass (born May 26, 2001 in Austria) is a living professional footballer who plays as a midfielder. He has been a player for the Austrian Football Bundesliga and 2. Liga (Austria), and has also been a part of several teams including SK Sturm Graz, FC Liefering, LASK, FC Juniors OÖ, and Austria\'s national under-18, under-19, and under-21 football teams. He is also recognized under the nickname "Prass" and is known for his height of 1.8 meters and his shirt number "8". Prass has also been categorized as an Austrian under-21 international footballer and a youth international footballer of Austria.'