In [25]:
import csv
import os

import rdflib

import networkx as nx
from matplotlib import pyplot as plt

In [26]:
# TODO: Add comments and text blocks
SAVE_PATH = f"{os.getcwd()}/flat_kg.csv"
ROW_COL_HEADER = "node name"
TYPE_COL_HEADER = "type"
HEADER_IDX = 0
PRED_TYPE = "Predicate"
NODE_NAME_IDX = -1

In [27]:
def get_all_triples(kg, allow_literals: bool = False):
    optional_literal_filter = ""
    if not allow_literals:
        optional_literal_filter = "FILTER(!isLiteral(?object)) ."
    all_triple_query = f"""
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT DISTINCT ?subject ?predicate ?object ?predIsType
    WHERE {{
        ?subject ?predicate ?object .
        BIND(rdf:type = ?predicate AS ?predIsType) .
        {optional_literal_filter}
}}
"""
    return kg.query(all_triple_query)

In [28]:
def flatten_kg(kg):
    triples = get_all_triples(kg, allow_literals=False)
    type_dict = {}
    flat_graph = nx.DiGraph()
    for triple in triples:
        # Get the last bit after the last '/' because that is more readable and easier to work with
        subj = triple.subject.split("/")[NODE_NAME_IDX]
        pred = triple.predicate.split("/")[NODE_NAME_IDX]
        obj = triple.object.split("/")[NODE_NAME_IDX]
                
        if triple.predIsType:
            type_dict[subj] = obj
            continue
        if pred not in type_dict:
            type_dict[pred] = "Predicate"
        
        flat_graph.add_edge(subj, pred)
        flat_graph.add_edge(pred, obj)
    return flat_graph, type_dict

In [29]:
def get_kg():
    # TODO: Implement a real kg here, not a test one
    g = rdflib.Graph()

    rdf_text = """
    @prefix schema: <http://schema.org/> .
    @prefix dbo:   <http://dbpedia.org/ontology/> .
    @prefix :      <http://foo.com/> .
    @prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
    @prefix xsd:   <http://www.w3.org/2001/XMLSchema#> .
    @prefix rdfs:  <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix foaf: <http://xmlns.com/foaf/0.1/> .
    
    :IDS  a            dbo:Organisation ;
            dbo:foundedBy  :Michel ;
            dbo:location   :Maastricht .
    
    :Michel  a            schema:Person ;
            dbo:birthDate   "1975-04-19"^^xsd:date;
            dbo:birthPlace  :Winnipeg .
    
    :Shervin  a            schema:Person ;
            dbo:birthPlace  :Ottawa .
    
    :Shervin foaf:knows :Michel .
    
    :Winnipeg  rdfs:label   "Winnipeg" ;
            a dbo:City ;
            dbo:isCityOf  :Canada .
    
    :Canada  rdfs:label           "Canada" ;
            a dbo:Country ;
            dbo:capital          :Ottawa ;
            dbo:populationTotal  "40000000"^^xsd:int .
    
    :Ottawa  rdfs:label   "Ottawa" ;
            a dbo:City ;
            dbo:isCityOf  :Canada .
    
    :Maastricht  rdfs:label   "Maastricht" ;
                 a dbo:City .
    """ 
    
    g.parse(data=rdf_text,format="turtle")
    return g

In [30]:
def display_flattened_graph(flat_graph):
    nx.draw(flat_graph, with_labels=True)
    plt.show()

In [31]:
knowledge_graph = get_kg()
flattened_graph, type_map = flatten_kg(knowledge_graph)

Ottawa isCityOf Canada
Shervin birthPlace Ottawa
Ottawa 22-rdf-syntax-ns#type City
Winnipeg 22-rdf-syntax-ns#type City
Maastricht 22-rdf-syntax-ns#type City
Canada 22-rdf-syntax-ns#type Country
Winnipeg isCityOf Canada
Canada capital Ottawa
IDS location Maastricht
Michel 22-rdf-syntax-ns#type Person
Shervin knows Michel
Shervin 22-rdf-syntax-ns#type Person
Michel birthPlace Winnipeg
IDS foundedBy Michel
IDS 22-rdf-syntax-ns#type Organisation


In [32]:
display_flattened_graph(flattened_graph)

ValueError: object __array__ method not producing an array

<Figure size 640x480 with 1 Axes>

In [33]:
def make_readable_adj_matrix(graph, type_dict):    
    adj_matrix = nx.adjacency_matrix(graph).todense().tolist()
    
    graph_nodes = list(graph.nodes)
    
    headers = [ROW_COL_HEADER]
    headers.extend(graph_nodes)
    headers.append(TYPE_COL_HEADER)
    
    adj_matrix.insert(HEADER_IDX, headers)
        
    for i, row in enumerate(adj_matrix):
        # The headers are in row 0
        if i == HEADER_IDX:
            continue
        # Subtract 1 because a header index is added
        node_idx = i - 1
        curr_node = graph_nodes[node_idx]
        row.insert(HEADER_IDX, curr_node)
        
        node_type = type_dict[curr_node]
        row.append(node_type)
    print(adj_matrix)
    return adj_matrix

In [34]:
def save_graph(flat_graph, type_dict):
    readable_adj_matrix = make_readable_adj_matrix(flat_graph, type_dict)
    
    with open(SAVE_PATH, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(readable_adj_matrix)

In [35]:
save_graph(flattened_graph, type_map)

[['node name', 'Ottawa', 'isCityOf', 'Canada', 'Shervin', 'birthPlace', 'Winnipeg', 'capital', 'IDS', 'location', 'Maastricht', 'knows', 'Michel', 'foundedBy', 'type'], ['Ottawa', 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'City'], ['isCityOf', 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'Predicate'], ['Canada', 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 'Country'], ['Shervin', 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 'Person'], ['birthPlace', 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 'Predicate'], ['Winnipeg', 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'City'], ['capital', 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'Predicate'], ['IDS', 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 'Organisation'], ['location', 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 'Predicate'], ['Maastricht', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'City'], ['knows', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 'Predicate'], ['Michel', 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 'Person'], ['foundedBy', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 'Predicate']]
