In [6]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU available")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("MPS available")
else:
    device = torch.device("cpu")

tokenizer = AutoTokenizer.from_pretrained("ibm/knowgl-large")
model = AutoModelForSeq2SeqLM.from_pretrained("ibm/knowgl-large").to(device)

GPU available


In [8]:
input_text = """The painting shows the Last Judgement, with God the Father at the top centre and Jesus just below him. 
To Jesus' right is his mother the Virgin Mary and to his left is Moses, holding the tablets showing the Ten Commandments. 
Rising up the left-hand side of the painting (at Jesus' right hand) are the blessed, whilst the damned fall into hell on the right-hand side. 
At the base of the painting are the dead, dying and those just resurrected. """

In [9]:
import nltk

nltk.download("punkt") 
from nltk.tokenize import sent_tokenize

# Split the input text into sentences using nltk tokenizer
sentences = sent_tokenize(input_text)
print(sentences)



['The painting shows the Last Judgement, with God the Father at the top centre and Jesus just below him.', "To Jesus' right is his mother the Virgin Mary and to his left is Moses, holding the tablets showing the Ten Commandments.", "Rising up the left-hand side of the painting (at Jesus' right hand) are the blessed, whilst the damned fall into hell on the right-hand side.", 'At the base of the painting are the dead, dying and those just resurrected.']


[nltk_data] Downloading package punkt to /home/kristbaum/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [10]:
decoded_outputs = []

for sentence in sentences:
    inputs = tokenizer(sentence, return_tensors="pt").to(device)

    num_beams = 15
    output = model.generate(**inputs, max_length=1000, num_beams=num_beams)

    decoded_output = tokenizer.decode(output[0].to(device), skip_special_tokens=True)
    decoded_outputs.append(decoded_output)

print(decoded_outputs)

['[(God the Father#God the Father#God)|child|(Jesus#Jesus#human biblical figure)]', '[(Jesus#Jesus#human biblical figure)|mother|(Virgin Mary#Mary, mother of Jesus#human biblical figure)]$[(Moses#Moses#human biblical figure)|mother|(Virgin Mary#Mary, mother of Jesus#human biblical figure)]$[(Ten Commandments#Ten Commandments#commandment)|author|(Moses#Moses#human biblical figure)]', '[(blessed#Blessed#None)|opposite of|(damned#Damnation#punishment)]$[(damned#Damnation#punishment)|opposite of|(blessed#Blessed#None)]', '[(resurrection#Resurrection#occurrence)|opposite of|(dead#Death#biological process)]']


In [17]:
def parse_string(s):
    s = s.strip("[]")
    # Split into subject, relation, object
    parts = s.split("|")
    result = {}
    for i, part in enumerate(parts):
        part = part.strip("()")
        mention_label_type = part.split("#")
        if i == 0:
            result["subject"] = {
                "mention": mention_label_type[0],
                "label": mention_label_type[1],
                "type": mention_label_type[2],
            }
        elif i == 1:
            result["relation"] = {"label": mention_label_type[0]}
        else:
            result["object"] = {
                "mention": mention_label_type[0],
                "label": mention_label_type[1],
                "type": mention_label_type[2],
            }
    return result


statements = []
for decoded_output in decoded_outputs:
    single_statement_text = decoded_output.split("$")
    for statement_text in single_statement_text:
        parsed_statement = parse_string(statement_text)
        statements.append(parsed_statement)
        print(parsed_statement)




{'subject': {'mention': 'God the Father', 'label': 'God the Father', 'type': 'God'}, 'relation': {'label': 'child'}, 'object': {'mention': 'Jesus', 'label': 'Jesus', 'type': 'human biblical figure'}}
{'subject': {'mention': 'Jesus', 'label': 'Jesus', 'type': 'human biblical figure'}, 'relation': {'label': 'mother'}, 'object': {'mention': 'Virgin Mary', 'label': 'Mary, mother of Jesus', 'type': 'human biblical figure'}}
{'subject': {'mention': 'Moses', 'label': 'Moses', 'type': 'human biblical figure'}, 'relation': {'label': 'mother'}, 'object': {'mention': 'Virgin Mary', 'label': 'Mary, mother of Jesus', 'type': 'human biblical figure'}}
{'subject': {'mention': 'Ten Commandments', 'label': 'Ten Commandments', 'type': 'commandment'}, 'relation': {'label': 'author'}, 'object': {'mention': 'Moses', 'label': 'Moses', 'type': 'human biblical figure'}}
{'subject': {'mention': 'blessed', 'label': 'Blessed', 'type': 'None'}, 'relation': {'label': 'opposite of'}, 'object': {'mention': 'damned',

In [18]:
import requests
import json

for key, value in statements.items():
    if "name" in value:
        if key == "subject" or key == "object":
            query = value["name"]
            print("Querying Wikidata for: " + query)
            url = f"https://wikidata.reconci.link/en/api?queries=%7B%22q0%22%3A%7B%22query%22%3A%22{query}%22%2C%22limit%22%3A1%7D%7D"
            response = requests.get(url)
            data = json.loads(response.text)
            value["result"] = {
                "description": data["q0"]["result"][0]["description"],
                "id": data["q0"]["result"][0]["id"],
                "wd_name": data["q0"]["result"][0]["name"],
            }
        elif key == "relation":
            query = value["name"]
            print("Querying Wikidata for: " + query)
            url = f"https://wikidata.reconci.link/en/suggest/property?prefix={query}"
            response = requests.get(url)
            data = json.loads(response.text)
            if data["result"][0]:
                value["result"] = (data["result"][0],)
            else:
                value["result"] = "No matching property found."


print(parsed_statement)

AttributeError: 'list' object has no attribute 'items'

In [None]:
print(json.dumps(parsed_statement, indent=4))


{
    "subject": {
        "name": "maidservant",
        "category": "profession",
        "result": {
            "description": "female employee household work in the employer's home",
            "id": "Q833860",
            "wd_name": "maid"
        }
    },
    "verb": {
        "name": "field of this occupation",
        "result": [
            {
                "description": "field corresponding to this occupation or profession (use only for occupations/professions - for people use Property:P101, for companies use P425)",
                "id": "P425",
                "name": "field of this occupation"
            }
        ]
    },
    "object": {
        "name": "milk",
        "category": "food",
        "result": {
            "description": "white liquid produced by the mammary glands of mammals",
            "id": "Q8495",
            "wd_name": "milk"
        }
    }
}


In [None]:
from pyvis.network import Network

graph = Network(height="800px", width="100%", notebook=True)

graph.add_node(parsed_statement["subject"]["result"]["id"], label=parsed_statement["subject"]["result"]["wd_name"])
graph.add_node(parsed_statement["object"]["result"]["id"], label=parsed_statement["object"]["result"]["wd_name"])
graph.add_edge(parsed_statement["subject"]["result"]["id"], parsed_statement["object"]["result"]["id"], label=parsed_statement["relation"]["name"])

graph.show("knowledge_graph.html")


knowledge_graph.html
