In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from llama_cpp.llama import Llama, LlamaGrammar


In [3]:
model=Llama.from_pretrained(
            repo_id="NousResearch/Hermes-3-Llama-3.1-8B-GGUF",
            filename="*.Q8_0.gguf",
            n_batch=1024,
            n_ctx=10000,
            n_gpu_layers=-1,
        )


llama_load_model_from_file: using device Metal (Apple M2 Pro) - 21845 MiB free
llama_model_loader: loaded meta data with 27 key-value pairs and 292 tensors from /Users/benedikt/.cache/huggingface/hub/models--NousResearch--Hermes-3-Llama-3.1-8B-GGUF/snapshots/307a5dfb59aa38d88b6cfd32f44b8ad7c1da9fb8/./Hermes-3-Llama-3.1-8B.Q8_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Hermes 3 Llama 3.1 8B
llama_model_loader: - kv   3:                       general.organization str              = NousResearch
llama_model_loader: - kv   4:                           general.basename str              = Hermes-3-Llama-3.1
llama_model_loa

In [4]:
import sys
import os

sys.path.insert(0, os.path.abspath("../.."))
sys.path.append(os.path.abspath("../../backend"))
sys.path.append(os.path.abspath(""))


from rdflib.plugins.stores.sparqlstore import SPARQLStore
from backend.model import *
from backend.ontology import *
from backend.datasetmatcher import *
from backend.explorative_support import *

store = SPARQLStore(
    "http://localhost:7012/",
    method="POST_FORM",
    params={"infer": False, "sameAs": False},
)
graph = Graph(store=store)

config = OntologyConfig()

ontology_manager = OntologyManager(config, graph)
topic_man = TopicModelling(ontology_manager)


In [5]:
import pydantic
from llama_cpp_agent.gbnf_grammar_generator.gbnf_grammar_from_pydantic_models import (
    generate_gbnf_grammar_from_pydantic_models,
)
import json


class Constraint(pydantic.BaseModel):
    property: str
    value: str | None
    modifier: str | None


class Entity(pydantic.BaseModel):
    identifier: str
    type: str
    constraints: list[Constraint]


class Relation(pydantic.BaseModel):
    entity: str
    relation: str
    target: str


class EntitiesRelations(pydantic.BaseModel):
    relations: list[Relation]
    entities: list[Entity]


gbnf_erl = generate_gbnf_grammar_from_pydantic_models(
    [EntitiesRelations], "EntitiesRelations", add_inner_thoughts=False
)
print(gbnf_erl)
grammar_erl = LlamaGrammar.from_string(gbnf_erl)
grammar_erl._grammar

root ::= entities-relations
entities-relations ::= (" "| "\n") "{" ws "\"EntitiesRelations\""  ": " ws grammar-models
grammar-models ::= entities-relations-grammar-model
entities-relations-grammar-model ::= "\"EntitiesRelations\"" "," ws "\"None\"" ": " entities-relations

entities-relations ::= "{"  ws "\"relations\"" ": " entities-relations-relations ","  ws "\"entities\"" ": " entities-relations-entities  ws "}"
relation ::= "{"  ws "\"entity\"" ": " string ","  ws "\"relation\"" ": " string ","  ws "\"target\"" ": " string  ws "}"
entities-relations-relations ::= "[" ws (relation)? ("," ws relation)* ws "]" 
entity ::= "{"  ws "\"identifier\"" ": " string ","  ws "\"type\"" ": " string ","  ws "\"constraints\"" ": " entity-constraints  ws "}"
constraint ::= "{"  ws "\"property\"" ": " string ","  ws "\"value\"" ": " constraint-value-optional ","  ws "\"modifier\"" ": " constraint-modifier-optional  ws "}"
constraint-value-optional ::= string | null
constraint-modifier-optional ::= 

'root ::= entities-relations\nentities-relations ::= (" "| "\\n") "{" ws "\\"EntitiesRelations\\""  ": " ws grammar-models\ngrammar-models ::= entities-relations-grammar-model\nentities-relations-grammar-model ::= "\\"EntitiesRelations\\"" "," ws "\\"None\\"" ": " entities-relations\n\nentities-relations ::= "{"  ws "\\"relations\\"" ": " entities-relations-relations ","  ws "\\"entities\\"" ": " entities-relations-entities  ws "}"\nrelation ::= "{"  ws "\\"entity\\"" ": " string ","  ws "\\"relation\\"" ": " string ","  ws "\\"target\\"" ": " string  ws "}"\nentities-relations-relations ::= "[" ws (relation)? ("," ws relation)* ws "]" \nentity ::= "{"  ws "\\"identifier\\"" ": " string ","  ws "\\"type\\"" ": " string ","  ws "\\"constraints\\"" ": " entity-constraints  ws "}"\nconstraint ::= "{"  ws "\\"property\\"" ": " string ","  ws "\\"value\\"" ": " constraint-value-optional ","  ws "\\"modifier\\"" ": " constraint-modifier-optional  ws "}"\nconstraint-value-optional ::= string 

In [6]:
# System prompt describes information given to all conversations
ERL_PROMPT_SYSTEM = """
Return all the entity relations and constraint within the prompt in the form of JSON output. The output should be a list of all entities and their constraints, as well as the relations between them. Make sure to include all entities and targets in the list of entities. Constraints should only be included in the list of entities they are associated with.
"""
ERL_SAMPLE = EntitiesRelations(
    relations=[Relation(entity="person 1", relation="author", target="work 1"),
               Relation(entity="person 1", relation="birth place", target="place 1")],
    entities=[
        Entity(
            identifier="person 1",
            type="person",
            constraints=[
                Constraint(property="birth_date", value="1990", modifier="greater_than")
            ],
        ),
        Entity(identifier="work 1", type="work", constraints=[]),
        Entity(identifier="place 1", type="place", constraints=[]),
    ],
)
# Example prompt demonstrating the output we are looking for
ERL_PROMPT_EXAMPLE = (
    "the birth place of an author of a work where the author is born after 1990",
    ERL_SAMPLE.model_dump_json(),
)

In [7]:
query = "a person who is the ceo of a company and the birthplace of the ceo"

response = model.create_chat_completion(
    grammar=grammar_erl,
    messages=[
        {"role": "system", "content": ERL_PROMPT_SYSTEM},
        {"role": "user", "content": ERL_PROMPT_EXAMPLE[0]},
        {"role": "assistant", "content": ERL_PROMPT_EXAMPLE[1]},
        {"role": "user", "content": query},
    ],
    max_tokens=-1,
)
response_msg = response["choices"][0]["message"]["content"]

llama_perf_context_print:        load time =    2297.49 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   222 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   237 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   18305.25 ms /   459 tokens


In [8]:
print(response_msg)

{
  "relations": [
    {
      "entity": "person 1",
      "relation": "CEO",
      "target": "company 1"
    },
    {
      "entity": "person 1",
      "relation": "birth place",
      "target": "place 1"
    }
  ],
  "entities": [
    {
      "identifier": "person 1",
      "type": "person",
      "constraints": [
        {
          "property": "role",
          "value": "CEO",
          "modifier": "of company 1"
        }
      ]
    },
    {
      "identifier": "company 1",
      "type": "company",
      "constraints": [
        {
          "property": "CEO",
          "value": "person 1",
          "modifier": null
        }
      ]
    },
    {
      "identifier": "place 1",
      "type": "place",
      "constraints": [
        {
          "property": "birth place",
          "value": "person 1",
          "modifier": null
        }
      ]
    }
  ]
}


In [9]:


erl_llm = EntitiesRelations.model_validate_json(response_msg)
erl_llm

EntitiesRelations(relations=[Relation(entity='person 1', relation='CEO', target='company 1'), Relation(entity='person 1', relation='birth place', target='place 1')], entities=[Entity(identifier='person 1', type='person', constraints=[Constraint(property='role', value='CEO', modifier='of company 1')]), Entity(identifier='company 1', type='company', constraints=[Constraint(property='CEO', value='person 1', modifier=None)]), Entity(identifier='place 1', type='place', constraints=[Constraint(property='birth place', value='person 1', modifier=None)])])

In [10]:
class CandidateRelation(Relation):
    score: float


class CandidateConstraint(Constraint):
    score: float
    type: str

class CandidateEntity(pydantic.BaseModel):
    score: float
    type: str


class Candidates(pydantic.BaseModel):
    relations: list[CandidateRelation]
    entities: list[CandidateEntity]
    constraints: list[CandidateConstraint]


gbnf_candidates = generate_gbnf_grammar_from_pydantic_models(
    [Candidates], "Candidates", add_inner_thoughts=False
)
grammar_candidates = LlamaGrammar.from_string(gbnf_candidates)

In [11]:
example_limit = 3
relation_candidates: list[CandidateRelation] = []
constraint_candidates: list[CandidateConstraint] = []
entity_candidates: list[CandidateEntity] = []
for relation in erl_llm.relations:
    top_results = topic_man.search_fuzzy(
        query=FuzzyQuery(
            q=f"A {relation.entity} is {relation.relation} of {relation.target}",
            limit=example_limit,
            type=RETURN_TYPE.LINK,
            relation_type=RELATION_TYPE.INSTANCE,
        )
    )
    relation_candidates.extend(
        [
            CandidateRelation(
                entity=res.link.from_subject.label,
                relation=res.link.label,
                target=res.link.to_subject.label,
                score=res.score,
            )
            for res in top_results.results
        ]
    )

for entity in erl_llm.entities:
    top_results = topic_man.search_fuzzy(
        query=FuzzyQuery(
            q=f"A {entity.type}",
            limit=example_limit,
            type=RETURN_TYPE.SUBJECT,
            relation_type=RELATION_TYPE.INSTANCE,
        )
    )
    entity_candidates.extend(
        [
            CandidateEntity(
                type=res.subject.label,
                score=res.score,
            )
            for res in top_results.results
        ]
    )
    for constraint in entity.constraints:
        top_results = topic_man.search_fuzzy(
            query=FuzzyQuery(
                q=f"The {constraint.property} of is {constraint.modifier} {constraint.value}",
                limit=example_limit,
                type=RETURN_TYPE.LINK,
                relation_type=RELATION_TYPE.PROPERTY,
                
            )
        )
        constraint_candidates.extend(
            [
                CandidateConstraint(
                    property=res.link.label,
                    value=None,
                    modifier=None,
                    score=res.score,
                    type=res.link.to_proptype,
                )
                for res in top_results.results
            ]
        )
candidates = Candidates(
    relations=relation_candidates,
    entities=entity_candidates,
    constraints=constraint_candidates,
)

Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [12]:
print(candidates.model_dump_json(indent=2))

{
  "relations": [
    {
      "entity": "organisation",
      "relation": "chief executive officer",
      "target": "person",
      "score": 0.33333299950285533
    },
    {
      "entity": "organisation",
      "relation": "chairperson",
      "target": "person",
      "score": 0.4677414181306755
    },
    {
      "entity": "person",
      "relation": "employer",
      "target": "organisation",
      "score": 0.4793328955147912
    },
    {
      "entity": "person",
      "relation": "place of burial",
      "target": "place",
      "score": 0.4045288449592698
    },
    {
      "entity": "person",
      "relation": "education place",
      "target": "place",
      "score": 0.4122799761982181
    },
    {
      "entity": "animal",
      "relation": "birth place",
      "target": "place",
      "score": 0.41459560255855243
    }
  ],
  "entities": [
    {
      "score": 0.3762859689649488,
      "type": "person"
    },
    {
      "score": 0.3820784245203156,
      "type": "business

In [48]:
import types


def build_constrained_classes(candidates: Candidates) -> pydantic.BaseModel:
    constrained_classes = []
    ALLOWED_ENTITY_TYPES = Enum(
        "ALLOWED_ENTITY_TYPES",
        {e.type: e.type for e in candidates.entities},
    )
    ALLOWED_CONSTRAINT_TYPES = Enum(
        "ALLOWED_CONSTRAINT_TYPES",
        {c.property: c.property for c in candidates.constraints},
    )
    ALLOWED_RELATION_TYPES = Enum(
        "ALLOWED_RELATION_TYPES",
        {r.relation: r.relation for r in candidates.relations},
    )

    ConstrainedRelation = pydantic.create_model(
        "ConstrainedRelation",
        # (pydantic.BaseModel,),
        **{
            "entity": (str, ...),
            "relation": (ALLOWED_RELATION_TYPES, ...),
            "target": (str, ...),
        },
    )
    ConstrainedConstraint = pydantic.create_model(
        "ConstrainedConstraint",
        # (pydantic.BaseModel,),
        **{
            "property": (ALLOWED_CONSTRAINT_TYPES, ...),
            "value": (str, ...),
            "modifier": (str, ...),
        },
    )
    ConstrainedEntity = pydantic.create_model(
        "ConstrainedEntity",
        # (pydantic.BaseModel,),
        **{
            "type": (ALLOWED_ENTITY_TYPES, ...),
            "identifier": (str, ...),
            "constraints": (list[ConstrainedConstraint], []),
        },
    )
    ConstrainedEntitiesRelations = pydantic.create_model(
        "ConstrainedEntitiesRelations",
        # (pydantic.BaseModel,),
        **{
            "relations": (list[ConstrainedRelation], []),
            "entities": (list[ConstrainedEntity], []),
        },
    )

    return ConstrainedEntitiesRelations


ConstrainedEntitiesRelations = build_constrained_classes(candidates)
grammar_constrained = LlamaGrammar.from_string(
    generate_gbnf_grammar_from_pydantic_models(
        [ConstrainedEntitiesRelations], "ConstrainedEntitiesRelations", add_inner_thoughts=False
    )
)
print(grammar_constrained._grammar)

root ::= constrained-entities-relations
constrained-entities-relations ::= (" "| "\n") "{" ws "\"ConstrainedEntitiesRelations\""  ": " ws grammar-models
grammar-models ::= constrained-entities-relations-grammar-model
constrained-entities-relations-grammar-model ::= "\"ConstrainedEntitiesRelations\"" "," ws "\"None\"" ": " constrained-entities-relations

constrained-entities-relations ::= "{"  ws "\"relations\"" ": " constrained-entities-relations-relations ","  ws "\"entities\"" ": " constrained-entities-relations-entities  ws "}"
constrained-relation ::= "{"  ws "\"entity\"" ": " string ","  ws "\"relation\"" ": " constrained-relation-relation ","  ws "\"target\"" ": " string  ws "}"
constrained-relation-relation ::= "\"chief executive officer\"" | "\"chairperson\"" | "\"employer\"" | "\"place of burial\"" | "\"education place\"" | "\"birth place\""
constrained-entities-relations-relations ::= "[" ws (constrained-relation)? ("," ws constrained-relation)* ws "]" 
constrained-entity ::=

In [49]:
RAG_PROMPT_EXAMPLE_CANDIDATES = Candidates(
    relations=[
        CandidateRelation(
            entity="person 1", relation="author", target="work 1", score=0.9
        )
    ],
    entities=[
        CandidateEntity(type="person", score=0.9),
        CandidateEntity(type="composer", score=0.7),
        CandidateEntity(type="work", score=0.9),
        CandidateEntity(type="song", score=0.6),
    ],
    constraints=[
        CandidateConstraint(
            property="birth date",
            value="1990",
            modifier="greater_than",
            score=0.9,
            type="sko:DateTime",
        ),
        CandidateConstraint(
            property="death year",
            value="180cm",
            modifier="greater_than",
            score=0.8,
            type="sko:DateTime",
        ),
    ],
)

RAG_PROMPT_SYSTEM = f"""
Return all the entities, relations, and constraints within the query in the form of JSON output. The output should be a list of all entities and their relations between them, with additional constraints if they are present in the query.
"""

RAG_PROMPT_EXPECTED_OUTPUT = EntitiesRelations(
    relations=[
        Relation(entity="person 1", relation="author", target="work 1"),
        Relation(entity="person 1", relation="birth place", target="place 1"),
    ],
    entities=[
        Entity(
            identifier="person 1",
            type="person",
            constraints=[
                Constraint(
                    property="birth date",
                    value="1.1.1990",
                    modifier="greater_than",
                )
            ],
        ),
        Entity(identifier="work 1", type="work", constraints=[]),
        Entity(identifier="place 1", type="place", constraints=[]),
    ],
)
RAG_PROMPT_EXAMPLE = (
    "the birth place of an author of a work where the author is born after 1990",
    RAG_PROMPT_EXAMPLE_CANDIDATES.model_dump_json(),
    RAG_PROMPT_EXPECTED_OUTPUT.model_dump_json(),
)

In [50]:
RAG_PROMPT_EXAMPLE

('the birth place of an author of a work where the author is born after 1990',
 '{"relations":[{"entity":"person 1","relation":"author","target":"work 1","score":0.9}],"entities":[{"score":0.9,"type":"person"},{"score":0.7,"type":"composer"},{"score":0.9,"type":"work"},{"score":0.6,"type":"song"}],"constraints":[{"property":"birth date","value":"1990","modifier":"greater_than","score":0.9,"type":"sko:DateTime"},{"property":"death year","value":"180cm","modifier":"greater_than","score":0.8,"type":"sko:DateTime"}]}',
 '{"relations":[{"entity":"person 1","relation":"author","target":"work 1"},{"entity":"person 1","relation":"birth place","target":"place 1"}],"entities":[{"identifier":"person 1","type":"person","constraints":[{"property":"birth date","value":"1.1.1990","modifier":"greater_than"}]},{"identifier":"work 1","type":"work","constraints":[]},{"identifier":"place 1","type":"place","constraints":[]}]}')

In [51]:
print(query)
response = model.create_chat_completion(
    grammar=grammar_constrained,
    messages=[
        {"role": "system", "content": RAG_PROMPT_SYSTEM},
        {"role": "user", "content": RAG_PROMPT_EXAMPLE[0]},
        {"role": "assistant", "content": RAG_PROMPT_EXAMPLE[2]},
        {"role": "user", "content": query},
    ],
    max_tokens=-1,
)
response_msg = response["choices"][0]["message"]["content"]

a person who is the ceo of a company and the birthplace of the ceo


Llama.generate: 203 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_context_print:        load time =    2297.49 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   244 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   21511.02 ms /   245 tokens


In [52]:
print(response_msg)

{
  "relations": [
    {
      "entity": "person 1",
      "relation": "chief executive officer",
      "target": "company 1"
    },
    {
      "entity": "person 1",
      "relation": "birth place",
      "target": "place 1"
    }
  ],
  "entities": [
    {
      "type": "person",
      "identifier": "person 1",
      "constraints": [
        {
          "property": "birth name",
          "value": "",
          "modifier": ""
        }
      ]
    },
    {
      "type": "company",
      "identifier": "company 1",
      "constraints": [
        {
          "property": "number of entities of Person class born in the place",
          "value": "",
          "modifier": ""
        }
      ]
    },
    {
      "type": "place",
      "identifier": "place 1",
      "constraints": [
        {
          "property": "number of entities of Person class born in the place",
          "value": "",
          "modifier": ""
        }
      ]
    }
  ]
}


In [53]:
query_constrained:EntitiesRelations=ConstrainedEntitiesRelations.model_validate_json(response_msg)

In [54]:
class EnrichedConstraint(Constraint):
    constraint: SubjectLink|None
class EnrichedEntity(Entity):
    subject: Subject
    constraints: list[EnrichedConstraint]
class EnrichedRelation(Relation):
    link: SubjectLink|None

class EnrichedEntitiesRelations(pydantic.BaseModel):
    relations: list[EnrichedRelation]
    entities: list[EnrichedEntity]
    

In [56]:
def enrich_entities_relations(erl: EntitiesRelations) -> EnrichedEntitiesRelations:
    with topic_man.engine.begin() as session:

        def enrich_relation(relation: Relation) -> EnrichedRelation:
            link_db = session.execute(
                select(SubjectLinkDB)
                .where(
                    SubjectLinkDB.label == relation.relation,
                )
                .limit(1)
            ).first()
            return EnrichedRelation(link=SubjectLink.from_db(link_db, topic_man.oman), **relation.model_dump())

        def enrich_constraint(constraint: Constraint) -> EnrichedConstraint:
            link_db = session.execute(
                select(SubjectLinkDB)
                .where(
                    SubjectLinkDB.label == constraint.property,
                )
                .limit(1)
            ).first()
            return EnrichedConstraint(
                constraint=SubjectLink.from_db(link_db, topic_man.oman), **constraint.model_dump()
            )

        def enrich_entity(entity: Entity) -> EnrichedEntity:
            subject_db = session.execute(
                select(SubjectInDB)
                .where(
                    SubjectInDB.label == entity.type,
                )
                .limit(1)
            ).first()
            return EnrichedEntity(
                subject=topic_man.oman.enrich_subject(subject_db.subject_id),
                constraints=[enrich_constraint(c) for c in entity.constraints],
                **entity.model_dump(exclude=["constraints"])
            )

        return EnrichedEntitiesRelations(
            relations=[enrich_relation(r) for r in erl.relations],
            entities=[enrich_entity(e) for e in erl.entities],
        )
enriched_erl= enrich_entities_relations(query_constrained)

In [57]:
enriched_erl

EnrichedEntitiesRelations(relations=[EnrichedRelation(entity='person 1', relation='chief executive officer', target='company 1', link=SubjectLink(link_id=1096, label='chief executive officer', from_id='<http://dbpedia.org/ontology/Organisation>', link_type='ObjectProperty', to_id='<http://dbpedia.org/ontology/Person>', to_proptype=None, property_id='<http://dbpedia.org/ontology/ceo>', from_subject=Subject(subject_id='<http://dbpedia.org/ontology/Organisation>', label='organisation', spos={'rdfs:label': Property(property='rdfs:label', label=None, values=[PropertyValue(value='organisation', label=None)]), 'rdfs:subClassOf': Property(property='rdfs:subClassOf', label=None, values=[PropertyValue(value='<http://dbpedia.org/ontology/Agent>', label='agent')])}, subject_type='class', refcount=0, descendants={}, total_descendants=0, properties={}, instance_count=352864), to_subject=Subject(subject_id='<http://dbpedia.org/ontology/Person>', label='person', spos={'rdfs:label': Property(property='