In [25]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [26]:

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU available")
else:
    device = torch.device("cpu")

tokenizer = AutoTokenizer.from_pretrained("ibm/knowgl-large")
model = AutoModelForSeq2SeqLM.from_pretrained("ibm/knowgl-large").to(device)

input_text = "A maidservant pours milk, entirely absorbed in her work. Except for the stream of milk, everything else is still."

inputs = tokenizer(input_text, return_tensors="pt").to(device)

num_beams = 10
output = model.generate(**inputs, max_length=1000, num_beams=num_beams)

decoded_output = tokenizer.decode(output[0].to("cuda"), skip_special_tokens=True)

print(decoded_output)

GPU available
[(maidservant#Maidservant#profession)|field of this occupation|(milk#Milk#food)]$[(milk#Milk#food)|practiced by|(maidservant#Maidservant#profession)]


In [28]:
def parse_string(s):
    statements = s.split("$")
    s = statements[0].strip("[]")
    # Split into subject, verb, object
    parts = s.split("|")
    result = {}
    for i, part in enumerate(parts):
        part = part.strip("()")
        name_category = part.split("#")[::2]
        if i == 0:
            result["subject"] = {"name": name_category[0], "category": name_category[1]}
        elif i == 1:
            result["verb"] = {"name": name_category[0]}
        else:
            result["object"] = {"name": name_category[0], "category": name_category[1]}
    return result


parsed_statement = parse_string(decoded_output)
print(parsed_statement)

{'subject': {'name': 'maidservant', 'category': 'profession'}, 'verb': {'name': 'field of this occupation'}, 'object': {'name': 'milk', 'category': 'food'}}


In [30]:
import requests
import json

for key, value in parsed_statement.items():
    if "name" in value:
        if key == "subject" or key == "object":
            query = value["name"]
            print("Querying Wikidata for: " + query)
            url = f"https://wikidata.reconci.link/en/api?queries=%7B%22q0%22%3A%7B%22query%22%3A%22{query}%22%2C%22limit%22%3A1%7D%7D"
            response = requests.get(url)
            data = json.loads(response.text)
            value["result"] = {
                "description": data["q0"]["result"][0]["description"],
                "id": data["q0"]["result"][0]["id"],
                "wd_name": data["q0"]["result"][0]["name"],
            }
        elif key == "verb":
            query = value["name"]
            print("Querying Wikidata for: " + query)
            url = f"https://wikidata.reconci.link/en/suggest/property?prefix={query}"
            response = requests.get(url)
            data = json.loads(response.text)
            if data["result"][0]:
                value["result"] = (data["result"][0],)
            else:
                value["result"] = "No matching property found."


print(parsed_statement)

Querying Wikidata for: maidservant
Querying Wikidata for: field of this occupation
Querying Wikidata for: milk
{'subject': {'name': 'maidservant', 'category': 'profession', 'result': {'description': "female employee household work in the employer's home", 'id': 'Q833860', 'wd_name': 'maid'}}, 'verb': {'name': 'field of this occupation', 'result': ({'description': 'field corresponding to this occupation or profession (use only for occupations/professions - for people use Property:P101, for companies use P425)', 'id': 'P425', 'name': 'field of this occupation'},)}, 'object': {'name': 'milk', 'category': 'food', 'result': {'description': 'white liquid produced by the mammary glands of mammals', 'id': 'Q8495', 'wd_name': 'milk'}}}


In [31]:
print(json.dumps(parsed_statement, indent=4))


{
    "subject": {
        "name": "maidservant",
        "category": "profession",
        "result": {
            "description": "female employee household work in the employer's home",
            "id": "Q833860",
            "wd_name": "maid"
        }
    },
    "verb": {
        "name": "field of this occupation",
        "result": [
            {
                "description": "field corresponding to this occupation or profession (use only for occupations/professions - for people use Property:P101, for companies use P425)",
                "id": "P425",
                "name": "field of this occupation"
            }
        ]
    },
    "object": {
        "name": "milk",
        "category": "food",
        "result": {
            "description": "white liquid produced by the mammary glands of mammals",
            "id": "Q8495",
            "wd_name": "milk"
        }
    }
}


In [None]:
from pyvis.network import Network

graph = Network(height="800px", width="100%", notebook=True)

graph.add_node(parsed_statement["subject"]["result"]["id"], label=parsed_statement["subject"]["result"]["wd_name"])
graph.add_node(parsed_statement["object"]["result"]["id"], label=parsed_statement["object"]["result"]["wd_name"])
graph.add_edge(parsed_statement["subject"]["result"]["id"], parsed_statement["object"]["result"]["id"], label=parsed_statement["verb"]["name"])

graph.show("knowledge_graph.html")
