In [1]:
import os
import io
import bz2
import gzip
import urllib
from tqdm import tqdm, trange
from itertools import combinations
import pandas as pd
from rdflib import Graph, URIRef, BNode, Literal
from rdflib.store import NO_STORE, VALID_STORE
from SPARQLWrapper import SPARQLWrapper, JSON

# 1. Extract properties to materialize

In [2]:
subproperties_file = "../Data/interim/DBpedia/properties/DBpedia_subproperties.csv"
inverse_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_inverse_prop.csv"
symmetric_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_symmetric_prop.csv"
transitive_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_transitive_prop.csv"
ontology_file = "../Data/raw/DBpedia/dbpedia_2016-10.nt"

In [3]:
def run_query(query):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    results_df = pd.io.json.json_normalize(results["results"]["bindings"])
    return results_df

## 1.1. Extract subproperties

In [4]:
subproperties = pd.read_csv(subproperties_file)
subproperties.head()

Unnamed: 0,Subproperty,SubpropertyLabel,Property,PropertyLabel
0,http://dbpedia.org/ontology/championInDoubleFe...,champion in double female,http://dbpedia.org/ontology/championInDouble,champion in double
1,http://dbpedia.org/ontology/championInDouble,champion in double,http://dbpedia.org/ontology/champion,champion
2,http://dbpedia.org/ontology/maximumDepth,maximum depth (μ),http://dbpedia.org/ontology/depth,depth (μ)
3,http://dbpedia.org/ontology/eastPlace,east place,http://dbpedia.org/ontology/closeTo,is close to
4,http://dbpedia.org/ontology/meshNumber,MeSH number,http://dbpedia.org/ontology/code,code


In [5]:
subproperties_count = len(subproperties)
print("Queried {} subproperties from DBPedia.".format(subproperties_count))

Queried 75 subproperties from DBPedia.


## 1.2. Extract inverse properties

In [6]:
inverse_properties = pd.read_csv(inverse_properties_file)
inverse_properties.head()

Unnamed: 0,DBpediaProp,DBPediaPropLabel,WikidataProp,WikidataPropLabel,DBpediaInvProp,DBpediaInvPropLabel,WikidataInvProp,WikidataInvPropLabel
0,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child,http://dbpedia.org/ontology/father,father,http://www.wikidata.org/entity/P22,father
1,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child,http://dbpedia.org/ontology/mother,mother,http://www.wikidata.org/entity/P25,mother
2,http://dbpedia.org/ontology/father,father,http://www.wikidata.org/entity/P22,father,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child
3,http://dbpedia.org/ontology/mother,mother,http://www.wikidata.org/entity/P25,mother,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child
4,http://dbpedia.org/ontology/followedBy,followed by,http://www.wikidata.org/entity/P156,followed by,http://dbpedia.org/ontology/follows,follows,http://www.wikidata.org/entity/P155,follows


In [7]:
inverse_properties_count = len(inverse_properties)
print("Queried {} inverse properties from DBPedia.".format(inverse_properties_count))

Queried 8 inverse properties from DBPedia.


## 1.3. Extract symmetric properties

In [8]:
symmetric_properties = pd.read_csv(symmetric_properties_file)
symmetric_properties.head()

Unnamed: 0,DBpediaProp,DBpediaPropLabel,WikidataProp,WikidataPropLabel


In [9]:
symmetric_properties_count = len(symmetric_properties)
print("Queried {} symmetric properties from DBPedia.".format(symmetric_properties_count))

Queried 0 symmetric properties from DBPedia.


In [10]:
sym_prop = ["http://dbpedia.org/ontology/neighboringMunicipality", 
            "http://dbpedia.org/ontology/sisterCollege", 
            "http://dbpedia.org/ontology/currentPartner"]
symmetric_properties = pd.DataFrame(sym_prop, columns = ["DBpediaProp"])
symmetric_properties

Unnamed: 0,DBpediaProp
0,http://dbpedia.org/ontology/neighboringMunicip...
1,http://dbpedia.org/ontology/sisterCollege
2,http://dbpedia.org/ontology/currentPartner


In [11]:
symmetric_properties_count = len(symmetric_properties)
print("Queried {} symmetric properties from DBPedia.".format(symmetric_properties_count))

Queried 3 symmetric properties from DBPedia.


## 1.4. Extract transitive properties

In [12]:
transitive_properties = pd.read_csv(transitive_properties_file)
transitive_properties.head()

Unnamed: 0,DBpediaProp,DBpediaPropLabel,WikidataProp,WikidataPropLabel
0,http://dbpedia.org/ontology/city,city,http://www.wikidata.org/entity/P131,located in the administrative territorial entity
1,http://dbpedia.org/ontology/county,county,http://www.wikidata.org/entity/P131,located in the administrative territorial entity
2,http://dbpedia.org/ontology/district,district,http://www.wikidata.org/entity/P131,located in the administrative territorial entity
3,http://dbpedia.org/ontology/isPartOf,is part of,http://www.wikidata.org/entity/P361,part of
4,http://dbpedia.org/ontology/locatedInArea,located in area,http://www.wikidata.org/entity/P131,located in the administrative territorial entity


In [13]:
transitive_properties_count = len(transitive_properties)
print("Queried {} transitive properties from DBPedia.".format(transitive_properties_count))

Queried 7 transitive properties from DBPedia.


# 2. Enrich graph with extracted properties

## 2.1. Construct graph

In [14]:
dbpedia_file_instances = "../Data/raw/DBpedia/instance_types_transitive_en.ttl.bz2"
dbpedia_file_objects = "../Data/raw/DBpedia/mappingbased_objects_en.ttl.bz2"
dbpedia_graph = "../Data/interim/DBpedia/graph/dbpedia_graph.nt.gz"
dbpedia_enriched_graph = "../Data/interim/DBpedia/graph/dbpedia_enriched_graph.nt.gz"
path_store = "../Data/interim/DBpedia/graph/DBpediaLibStore"

In [15]:
def quote(string):
    return urllib.parse.quote(string, encoding = "utf-8", safe = ":/%#")

def create_node(string):
    if string.startswith("<"):
        if string[-2] != "/":
            return URIRef(quote(string[1:-1]))
        else:
            return URIRef(quote(string[1:-2]))
    elif string.startswith('"'):
        return Literal(string[1:string.rindex('"')])
    elif string.startswith("_:"):
        return BNode(string[2:])
    else:
        return "Invalid node type"

In [17]:
# Load file into graph
print("Reading file and creating graph.")

# Open previously created store, or create it if it doesn't exist yet
graph = Graph("Sleepycat")

rt = graph.open(path_store, create=False)

if rt == NO_STORE:
    # There is no underlying Sleepycat infrastructure, create it
    graph.open(path_store, create = True)
else:
    assert rt == VALID_STORE, "The underlying store is corrupt."
    
print('Triples in graph before add: ', len(graph))

with tqdm(desc="Reading triples and adding to graph: ", total = 4529682753, unit = "bytes") as pbar:
    with bz2.open(filename=dbpedia_file_instances, encoding="utf-8", mode="rt") as f:
        for line in f:
            pbar.update(len(line))
            if line not in ['\n', '\r\n'] and "# started " not in line and "# completed" not in line:
                triples = line.split(" ", maxsplit=2)
                subj = create_node(triples[0])
                pred = create_node(triples[1])
                obj = create_node(triples[2].rsplit(" .")[0])
                graph.add((subj,pred,obj))
                        
        
print('Triples in graph after adding instances: ', len(graph))

with tqdm(desc="Reading triples and adding to graph: ", total = 2576292481, unit = "bytes") as pbar:
    with bz2.open(filename=dbpedia_file_objects, encoding="utf-8", mode="rt") as f:
        for line in f:
            pbar.update(len(line))
            if line not in ['\n', '\r\n'] and "# started " not in line and "# completed" not in line:
                triples = line.split(" ", maxsplit=2)
                subj = create_node(triples[0])
                pred = create_node(triples[1])
                obj = create_node(triples[2].rsplit(" .")[0])
                graph.add((subj,pred,obj))
                
print('Triples in graph after adding objects: ', len(graph))

Reading triples and adding to graph:   0%|          | 0/4529682753 [00:00<?, ?bytes/s]

Reading file and creating graph.
Triples in graph before add:  0


Reading triples and adding to graph: 100%|██████████| 4529682753/4529682753 [3:34:24<00:00, 352100.50bytes/s]  
Reading triples and adding to graph:   0%|          | 0/2576292481 [00:00<?, ?bytes/s]

Triples in graph after adding instances:  31254270


Reading triples and adding to graph: 100%|██████████| 2576292481/2576292481 [2:12:39<00:00, 323670.65bytes/s]  


Triples in graph after adding objects:  50000412


## 2.2. Saving graph

In [18]:
print("Serializing and saving graph.")
with gzip.open(dbpedia_graph, "wb") as f:
    graph.serialize(destination=f, format="nt")
print("Graph saved.")

Serializing and saving graph.
Graph saved.


## 2.3. Materialize subproperties

In [35]:
count_subproperties = 0
for idx in trange(len(subproperties), desc = "Iterating over subproperties: ", unit = "subproperties"):
    subProp = URIRef(subproperties["Subproperty"].loc[idx])
    prop = URIRef(subproperties["Property"].loc[idx])
    instances = list(graph.subject_objects(predicate=subProp))
    for row in range(len(instances)):
        lnode = instances[row][0]
        rnode = instances[row][1]
        if not ((rnode, prop, lnode)) in graph:
            graph.add((rnode, prop, lnode))
            count_subproperties += 1

Iterating over subproperties: 100%|██████████| 75/75 [00:17<00:00,  4.50subproperties/s]


In [36]:
print("Added {} new triples by materializing subproperties.".format(count_subproperties))
print("The graph contains {} triples after materializing subproperties.".format(len(graph)))

Added 0 new triples by materializing subproperties.
The graph contains 50506250 triples after materializing subproperties.


## 2.4. Materialize inverse properties

In [37]:
count_inverse = 0
for idx in trange(len(inverse_properties), desc="Iterating over inverse properties: ", unit = "inverse properties"):
    prop = URIRef(inverse_properties["DBpediaProp"].loc[idx])
    inverse_prop = URIRef(inverse_properties["DBpediaInvProp"].loc[idx])
    if prop != "http://dbpedia.org/ontology/child":
        instances = list(graph.subject_objects(predicate=prop))
        for row in range(len(instances)):
            lnode = instances[row][0]
            rnode = instances[row][1]
            if not ((rnode, inverse_prop, lnode)) in graph:
                graph.add((rnode, inverse_prop, lnode))
                count_inverse += 1
    else:
        instances = list(graph.subject_objects(predicate=inverse_prop))
        for row in range(len(instances)):
            lnode = instances[row][0]
            rnode = instances[row][1]
            if not ((rnode, prop, lnode)) in g:
                graph.add((rnode, prop, lnode))
                count_inverse += 1

Iterating over inverse properties: 100%|██████████| 8/8 [00:15<00:00,  1.69s/inverse properties]


In [38]:
print("Added {} new triples by materializing inverse properties.".format(count_inverse))
print("The graph contains {} triples after materializing inverse properties.".format(len(graph)))

Added 0 new triples by materializing inverse properties.
The graph contains 50506250 triples after materializing inverse properties.


## 2.5. Materialize symmetric properties

In [39]:
count_symmetric = 0
for symProp in symmetric_properties["DBpediaProp"]:
    prop = URIRef(symProp)
    print(prop)
    instances = list(graph.subject_objects(predicate = prop))
    for row in range(len(instances)):
        lnode = URIRef(instances[row][1])
        rnode = URIRef(instances[row][0])
        if not (lnode, prop, rnode) in graph:
            graph.add((lnode, prop, rnode))
            count_symmetric += 1

http://dbpedia.org/ontology/neighboringMunicipality
http://dbpedia.org/ontology/sisterCollege
http://dbpedia.org/ontology/currentPartner


In [40]:
print("Added {} new triples by materializing symmetric properties.".format(count_symmetric))
print("The graph contains {} triples after materializing symmetric properties.".format(len(graph)))

Added 0 new triples by materializing symmetric properties.
The graph contains 50506250 triples after materializing symmetric properties.


## 2.6. Materialize transitive properties

In [41]:
count_transitive = 0
for row in trange(len(transitive_properties), desc="Iterating over transitive properties: ", unit="transitive properties"):
    prop = URIRef(transitive_properties["DBpediaProp"].loc[row])
    print(prop)
    instances = set(graph.subjects(predicate=prop))
    with tqdm(desc="Iterating over instances: ", total = len(instances), unit = "instances") as pbar:
        for root in instances:
            pbar.update(1)
            transitive_objects = list(graph.transitive_objects(property=prop, subject=root))
            for new_node in transitive_objects:
                if root != new_node:
                    if not (root, prop, new_node) in graph:
                        graph.add((root, prop, new_node))
                        count_transitive += 1                    

Iterating over transitive properties:   0%|          | 0/7 [00:00<?, ?transitive properties/s]

http://dbpedia.org/ontology/city



Iterating over instances:   0%|          | 0/87306 [00:00<?, ?instances/s][A
Iterating over instances:   0%|          | 260/87306 [00:00<00:33, 2598.16instances/s][A
Iterating over instances:   1%|          | 516/87306 [00:00<00:33, 2586.53instances/s][A
Iterating over instances:   1%|          | 781/87306 [00:00<00:33, 2604.02instances/s][A
Iterating over instances:   1%|          | 1051/87306 [00:00<00:32, 2629.98instances/s][A
Iterating over instances:   2%|▏         | 1319/87306 [00:00<00:32, 2642.67instances/s][A
Iterating over instances:   2%|▏         | 1589/87306 [00:00<00:32, 2655.88instances/s][A
Iterating over instances:   2%|▏         | 1843/87306 [00:00<00:32, 2617.15instances/s][A
Iterating over instances:   2%|▏         | 2119/87306 [00:00<00:32, 2657.93instances/s][A
Iterating over instances:   3%|▎         | 2375/87306 [00:00<00:32, 2627.17instances/s][A
Iterating over instances:   3%|▎         | 2631/87306 [00:01<00:32, 2605.01instances/s][A
Iterating over

Iterating over instances:  53%|█████▎    | 45941/87306 [00:18<00:15, 2641.66instances/s][A
Iterating over instances:  53%|█████▎    | 46209/87306 [00:18<00:15, 2651.48instances/s][A
Iterating over instances:  53%|█████▎    | 46487/87306 [00:18<00:15, 2687.64instances/s][A
Iterating over instances:  54%|█████▎    | 46756/87306 [00:18<00:15, 2677.48instances/s][A
Iterating over instances:  54%|█████▍    | 47024/87306 [00:19<00:15, 2676.54instances/s][A
Iterating over instances:  54%|█████▍    | 47292/87306 [00:19<00:15, 2632.63instances/s][A
Iterating over instances:  54%|█████▍    | 47556/87306 [00:19<00:15, 2629.00instances/s][A
Iterating over instances:  55%|█████▍    | 47820/87306 [00:19<00:15, 2614.31instances/s][A
Iterating over instances:  55%|█████▌    | 48082/87306 [00:19<00:15, 2614.87instances/s][A
Iterating over instances:  55%|█████▌    | 48344/87306 [00:19<00:15, 2575.11instances/s][A
Iterating over instances:  56%|█████▌    | 48608/87306 [00:19<00:14, 2594.07inst

http://dbpedia.org/ontology/county



Iterating over instances:   0%|          | 0/8879 [00:00<?, ?instances/s][A
Iterating over instances:   3%|▎         | 306/8879 [00:00<00:02, 3057.42instances/s][A
Iterating over instances:   7%|▋         | 627/8879 [00:00<00:02, 3101.04instances/s][A
Iterating over instances:  11%|█         | 936/8879 [00:00<00:02, 3094.95instances/s][A
Iterating over instances:  14%|█▍        | 1251/8879 [00:00<00:02, 3109.80instances/s][A
Iterating over instances:  18%|█▊        | 1576/8879 [00:00<00:02, 3149.99instances/s][A
Iterating over instances:  21%|██▏       | 1899/8879 [00:00<00:02, 3171.30instances/s][A
Iterating over instances:  25%|██▍       | 2217/8879 [00:00<00:02, 3173.58instances/s][A
Iterating over instances:  28%|██▊       | 2510/8879 [00:01<00:05, 1185.88instances/s][A
Iterating over instances:  31%|███▏      | 2792/8879 [00:01<00:04, 1435.42instances/s][A
Iterating over instances:  35%|███▍      | 3080/8879 [00:01<00:03, 1688.77instances/s][A
Iterating over instances:

http://dbpedia.org/ontology/district



Iterating over instances:   0%|          | 0/45699 [00:00<?, ?instances/s][A
Iterating over instances:   1%|          | 304/45699 [00:00<00:14, 3038.24instances/s][A
Iterating over instances:   1%|▏         | 613/45699 [00:00<00:14, 3052.50instances/s][A
Iterating over instances:   2%|▏         | 928/45699 [00:00<00:14, 3080.75instances/s][A
Iterating over instances:   3%|▎         | 1239/45699 [00:00<00:14, 3088.82instances/s][A
Iterating over instances:   3%|▎         | 1553/45699 [00:00<00:14, 3103.73instances/s][A
Iterating over instances:   4%|▍         | 1870/45699 [00:00<00:14, 3121.74instances/s][A
Iterating over instances:   5%|▍         | 2196/45699 [00:00<00:13, 3159.86instances/s][A
Iterating over instances:   6%|▌         | 2516/45699 [00:00<00:13, 3169.50instances/s][A
Iterating over instances:   6%|▌         | 2833/45699 [00:00<00:13, 3169.23instances/s][A
Iterating over instances:   7%|▋         | 3152/45699 [00:01<00:13, 3173.47instances/s][A
Iterating over

http://dbpedia.org/ontology/isPartOf



Iterating over instances:   0%|          | 0/373292 [00:00<?, ?instances/s][A
Iterating over instances:   0%|          | 65/373292 [00:00<12:04, 515.50instances/s][A
Iterating over instances:   0%|          | 139/373292 [00:00<10:58, 566.75instances/s][A
Iterating over instances:   0%|          | 226/373292 [00:00<09:49, 632.64instances/s][A
Iterating over instances:   0%|          | 332/373292 [00:00<08:39, 717.61instances/s][A
Iterating over instances:   0%|          | 437/373292 [00:00<07:50, 791.67instances/s][A
Iterating over instances:   0%|          | 552/373292 [00:00<07:07, 870.98instances/s][A
Iterating over instances:   0%|          | 680/373292 [00:00<06:27, 961.10instances/s][A
Iterating over instances:   0%|          | 792/373292 [00:00<06:17, 985.79instances/s][A
Iterating over instances:   0%|          | 894/373292 [00:00<06:14, 994.56instances/s][A
Iterating over instances:   0%|          | 1016/373292 [00:01<05:54, 1050.97instances/s][A
Iterating over inst

Iterating over instances:   3%|▎         | 9627/373292 [00:10<06:24, 945.48instances/s][A
Iterating over instances:   3%|▎         | 9729/373292 [00:10<06:21, 953.77instances/s][A
Iterating over instances:   3%|▎         | 9830/373292 [00:10<06:27, 937.13instances/s][A
Iterating over instances:   3%|▎         | 9928/373292 [00:10<07:15, 835.17instances/s][A
Iterating over instances:   3%|▎         | 10017/373292 [00:11<08:05, 748.19instances/s][A
Iterating over instances:   3%|▎         | 10134/373292 [00:11<07:13, 837.53instances/s][A
Iterating over instances:   3%|▎         | 10233/373292 [00:11<06:54, 876.42instances/s][A
Iterating over instances:   3%|▎         | 10326/373292 [00:11<06:49, 886.25instances/s][A
Iterating over instances:   3%|▎         | 10444/373292 [00:11<06:19, 957.17instances/s][A
Iterating over instances:   3%|▎         | 10544/373292 [00:11<06:28, 933.08instances/s][A
Iterating over instances:   3%|▎         | 10641/373292 [00:11<07:29, 807.57instance

Iterating over instances:   8%|▊         | 28986/373292 [00:40<06:22, 899.96instances/s][A
Iterating over instances:   8%|▊         | 29087/373292 [00:40<06:10, 928.96instances/s][A
Iterating over instances:   8%|▊         | 29199/373292 [00:40<05:52, 977.36instances/s][A
Iterating over instances:   8%|▊         | 29322/373292 [00:40<05:30, 1040.02instances/s][A
Iterating over instances:   8%|▊         | 29430/373292 [00:40<07:49, 731.78instances/s] [A
Iterating over instances:   8%|▊         | 29519/373292 [00:40<07:25, 771.86instances/s][A
Iterating over instances:   8%|▊         | 29608/373292 [00:40<07:11, 795.76instances/s][A
Iterating over instances:   8%|▊         | 29696/373292 [00:41<07:49, 731.63instances/s][A
Iterating over instances:   8%|▊         | 29800/373292 [00:41<07:07, 802.85instances/s][A
Iterating over instances:   8%|▊         | 29887/373292 [00:41<08:14, 695.09instances/s][A
Iterating over instances:   8%|▊         | 29976/373292 [00:41<07:42, 742.89in

Iterating over instances:  13%|█▎        | 47970/373292 [01:04<05:16, 1029.15instances/s][A
Iterating over instances:  13%|█▎        | 48094/373292 [01:04<05:00, 1083.94instances/s][A
Iterating over instances:  13%|█▎        | 48220/373292 [01:04<04:47, 1131.20instances/s][A
Iterating over instances:  13%|█▎        | 48335/373292 [01:04<04:55, 1098.50instances/s][A
Iterating over instances:  13%|█▎        | 48466/373292 [01:04<04:41, 1152.41instances/s][A
Iterating over instances:  13%|█▎        | 48583/373292 [01:04<05:00, 1081.66instances/s][A
Iterating over instances:  13%|█▎        | 48694/373292 [01:04<05:16, 1025.01instances/s][A
Iterating over instances:  13%|█▎        | 48799/373292 [01:04<06:27, 837.77instances/s] [A
Iterating over instances:  13%|█▎        | 48893/373292 [01:05<06:14, 865.96instances/s][A
Iterating over instances:  13%|█▎        | 48985/373292 [01:05<06:51, 788.80instances/s][A
Iterating over instances:  13%|█▎        | 49105/373292 [01:05<06:08, 87

Iterating over instances:  18%|█▊        | 67380/373292 [01:24<06:08, 830.03instances/s][A
Iterating over instances:  18%|█▊        | 67496/373292 [01:24<05:37, 907.02instances/s][A
Iterating over instances:  18%|█▊        | 67608/373292 [01:24<05:17, 961.43instances/s][A
Iterating over instances:  18%|█▊        | 67708/373292 [01:24<05:14, 972.42instances/s][A
Iterating over instances:  18%|█▊        | 67834/373292 [01:24<04:52, 1043.25instances/s][A
Iterating over instances:  18%|█▊        | 67942/373292 [01:24<06:13, 817.83instances/s] [A
Iterating over instances:  18%|█▊        | 68063/373292 [01:24<05:37, 903.96instances/s][A
Iterating over instances:  18%|█▊        | 68167/373292 [01:24<05:27, 930.40instances/s][A
Iterating over instances:  18%|█▊        | 68278/373292 [01:24<05:12, 977.21instances/s][A
Iterating over instances:  18%|█▊        | 68382/373292 [01:25<05:28, 927.97instances/s][A
Iterating over instances:  18%|█▊        | 68497/373292 [01:25<05:09, 983.98in

Iterating over instances:  23%|██▎       | 86495/373292 [01:44<04:10, 1144.91instances/s][A
Iterating over instances:  23%|██▎       | 86611/373292 [01:44<04:09, 1147.06instances/s][A
Iterating over instances:  23%|██▎       | 86727/373292 [01:45<05:12, 918.03instances/s] [A
Iterating over instances:  23%|██▎       | 86827/373292 [01:45<05:07, 931.76instances/s][A
Iterating over instances:  23%|██▎       | 86926/373292 [01:45<05:05, 938.05instances/s][A
Iterating over instances:  23%|██▎       | 87024/373292 [01:45<05:03, 943.97instances/s][A
Iterating over instances:  23%|██▎       | 87153/373292 [01:45<04:38, 1026.00instances/s][A
Iterating over instances:  23%|██▎       | 87261/373292 [01:45<04:34, 1041.13instances/s][A
Iterating over instances:  23%|██▎       | 87389/373292 [01:45<04:19, 1100.29instances/s][A
Iterating over instances:  23%|██▎       | 87509/373292 [01:45<04:13, 1125.57instances/s][A
Iterating over instances:  23%|██▎       | 87630/373292 [01:45<04:08, 114

Iterating over instances:  28%|██▊       | 105712/373292 [02:07<05:44, 775.85instances/s][A
Iterating over instances:  28%|██▊       | 105803/373292 [02:07<05:29, 811.39instances/s][A
Iterating over instances:  28%|██▊       | 105923/373292 [02:07<04:57, 898.62instances/s][A
Iterating over instances:  28%|██▊       | 106021/373292 [02:07<05:37, 791.76instances/s][A
Iterating over instances:  28%|██▊       | 106152/373292 [02:07<04:57, 897.98instances/s][A
Iterating over instances:  28%|██▊       | 106277/373292 [02:07<04:32, 979.36instances/s][A
Iterating over instances:  28%|██▊       | 106385/373292 [02:08<04:33, 974.12instances/s][A
Iterating over instances:  29%|██▊       | 106489/373292 [02:08<04:41, 946.67instances/s][A
Iterating over instances:  29%|██▊       | 106589/373292 [02:08<04:55, 903.88instances/s][A
Iterating over instances:  29%|██▊       | 106716/373292 [02:08<04:29, 987.93instances/s][A
Iterating over instances:  29%|██▊       | 106830/373292 [02:08<04:56,

Iterating over instances:  33%|███▎      | 124034/373292 [02:30<06:31, 636.88instances/s][A
Iterating over instances:  33%|███▎      | 124111/373292 [02:30<06:34, 631.48instances/s][A
Iterating over instances:  33%|███▎      | 124183/373292 [02:30<06:44, 616.16instances/s][A
Iterating over instances:  33%|███▎      | 124285/373292 [02:30<05:56, 699.01instances/s][A
Iterating over instances:  33%|███▎      | 124364/373292 [02:30<06:24, 647.64instances/s][A
Iterating over instances:  33%|███▎      | 124445/373292 [02:30<06:01, 688.92instances/s][A
Iterating over instances:  33%|███▎      | 124532/373292 [02:31<05:38, 733.99instances/s][A
Iterating over instances:  33%|███▎      | 124627/373292 [02:31<05:16, 786.14instances/s][A
Iterating over instances:  33%|███▎      | 124752/373292 [02:31<04:45, 871.82instances/s][A
Iterating over instances:  33%|███▎      | 124866/373292 [02:31<04:32, 911.60instances/s][A
Iterating over instances:  33%|███▎      | 124962/373292 [02:31<06:02,

Iterating over instances:  38%|███▊      | 142636/373292 [02:53<04:11, 915.60instances/s][A
Iterating over instances:  38%|███▊      | 142752/373292 [02:53<03:55, 977.33instances/s][A
Iterating over instances:  38%|███▊      | 142884/373292 [02:53<03:37, 1058.36instances/s][A
Iterating over instances:  38%|███▊      | 142994/373292 [02:53<03:58, 967.20instances/s] [A
Iterating over instances:  38%|███▊      | 143095/373292 [02:53<04:09, 924.22instances/s][A
Iterating over instances:  38%|███▊      | 143206/373292 [02:53<03:56, 972.99instances/s][A
Iterating over instances:  38%|███▊      | 143319/373292 [02:53<03:47, 1012.06instances/s][A
Iterating over instances:  38%|███▊      | 143425/373292 [02:53<03:44, 1025.36instances/s][A
Iterating over instances:  38%|███▊      | 143543/373292 [02:54<03:35, 1066.14instances/s][A
Iterating over instances:  38%|███▊      | 143652/373292 [02:54<03:48, 1006.96instances/s][A
Iterating over instances:  39%|███▊      | 143760/373292 [02:54<

Iterating over instances:  43%|████▎     | 161240/373292 [03:13<03:43, 948.98instances/s][A
Iterating over instances:  43%|████▎     | 161342/373292 [03:14<03:56, 897.73instances/s][A
Iterating over instances:  43%|████▎     | 161439/373292 [03:14<03:50, 917.48instances/s][A
Iterating over instances:  43%|████▎     | 161535/373292 [03:14<03:48, 928.69instances/s][A
Iterating over instances:  43%|████▎     | 161631/373292 [03:14<04:00, 880.46instances/s][A
Iterating over instances:  43%|████▎     | 161754/373292 [03:14<03:40, 961.47instances/s][A
Iterating over instances:  43%|████▎     | 161873/373292 [03:14<03:27, 1019.04instances/s][A
Iterating over instances:  43%|████▎     | 162002/373292 [03:14<03:14, 1084.45instances/s][A
Iterating over instances:  43%|████▎     | 162128/373292 [03:14<03:06, 1129.33instances/s][A
Iterating over instances:  43%|████▎     | 162251/373292 [03:14<03:02, 1157.09instances/s][A
Iterating over instances:  43%|████▎     | 162370/373292 [03:15<03

Iterating over instances:  48%|████▊     | 179551/373292 [03:35<03:56, 820.45instances/s][A
Iterating over instances:  48%|████▊     | 179680/373292 [03:35<03:30, 919.16instances/s][A
Iterating over instances:  48%|████▊     | 179781/373292 [03:35<03:43, 864.82instances/s][A
Iterating over instances:  48%|████▊     | 179912/373292 [03:35<03:20, 962.63instances/s][A
Iterating over instances:  48%|████▊     | 180048/373292 [03:35<03:03, 1053.47instances/s][A
Iterating over instances:  48%|████▊     | 180167/373292 [03:35<03:00, 1069.69instances/s][A
Iterating over instances:  48%|████▊     | 180280/373292 [03:35<03:49, 842.46instances/s] [A
Iterating over instances:  48%|████▊     | 180384/373292 [03:36<03:36, 892.40instances/s][A
Iterating over instances:  48%|████▊     | 180483/373292 [03:36<03:39, 878.28instances/s][A
Iterating over instances:  48%|████▊     | 180578/373292 [03:36<03:49, 840.05instances/s][A
Iterating over instances:  48%|████▊     | 180697/373292 [03:36<03:

Iterating over instances:  53%|█████▎    | 198291/373292 [03:55<03:09, 922.32instances/s][A
Iterating over instances:  53%|█████▎    | 198398/373292 [03:55<03:02, 960.29instances/s][A
Iterating over instances:  53%|█████▎    | 198506/373292 [03:56<03:19, 877.25instances/s][A
Iterating over instances:  53%|█████▎    | 198604/373292 [03:56<03:13, 905.00instances/s][A
Iterating over instances:  53%|█████▎    | 198707/373292 [03:56<03:07, 929.03instances/s][A
Iterating over instances:  53%|█████▎    | 198802/373292 [03:56<03:14, 895.30instances/s][A
Iterating over instances:  53%|█████▎    | 198894/373292 [03:56<04:24, 659.67instances/s][A
Iterating over instances:  53%|█████▎    | 198971/373292 [03:56<04:22, 665.00instances/s][A
Iterating over instances:  53%|█████▎    | 199073/373292 [03:56<03:54, 741.74instances/s][A
Iterating over instances:  53%|█████▎    | 199158/373292 [03:56<03:46, 770.43instances/s][A
Iterating over instances:  53%|█████▎    | 199264/373292 [03:57<03:27,

Iterating over instances:  58%|█████▊    | 216655/373292 [04:18<02:52, 906.43instances/s][A
Iterating over instances:  58%|█████▊    | 216748/373292 [04:18<03:11, 815.76instances/s][A
Iterating over instances:  58%|█████▊    | 216864/373292 [04:18<02:54, 895.48instances/s][A
Iterating over instances:  58%|█████▊    | 216959/373292 [04:18<03:02, 857.39instances/s][A
Iterating over instances:  58%|█████▊    | 217074/373292 [04:18<02:48, 927.12instances/s][A
Iterating over instances:  58%|█████▊    | 217171/373292 [04:18<02:47, 934.57instances/s][A
Iterating over instances:  58%|█████▊    | 217289/373292 [04:18<02:36, 996.15instances/s][A
Iterating over instances:  58%|█████▊    | 217392/373292 [04:18<02:38, 983.11instances/s][A
Iterating over instances:  58%|█████▊    | 217520/373292 [04:18<02:27, 1055.32instances/s][A
Iterating over instances:  58%|█████▊    | 217629/373292 [04:19<02:47, 930.10instances/s] [A
Iterating over instances:  58%|█████▊    | 217727/373292 [04:19<02:4

Iterating over instances:  63%|██████▎   | 235530/373292 [04:38<02:22, 966.02instances/s][A
Iterating over instances:  63%|██████▎   | 235629/373292 [04:38<02:50, 809.12instances/s][A
Iterating over instances:  63%|██████▎   | 235716/373292 [04:39<02:46, 826.15instances/s][A
Iterating over instances:  63%|██████▎   | 235818/373292 [04:39<02:38, 867.71instances/s][A
Iterating over instances:  63%|██████▎   | 235909/373292 [04:39<03:07, 732.57instances/s][A
Iterating over instances:  63%|██████▎   | 235989/373292 [04:39<03:29, 654.43instances/s][A
Iterating over instances:  63%|██████▎   | 236093/373292 [04:39<03:06, 735.92instances/s][A
Iterating over instances:  63%|██████▎   | 236222/373292 [04:39<02:42, 844.43instances/s][A
Iterating over instances:  63%|██████▎   | 236318/373292 [04:39<02:37, 869.64instances/s][A
Iterating over instances:  63%|██████▎   | 236429/373292 [04:39<02:27, 929.19instances/s][A
Iterating over instances:  63%|██████▎   | 236529/373292 [04:40<02:26,

Iterating over instances:  68%|██████▊   | 254899/373292 [04:58<02:12, 896.81instances/s][A
Iterating over instances:  68%|██████▊   | 254992/373292 [04:58<02:18, 855.95instances/s][A
Iterating over instances:  68%|██████▊   | 255080/373292 [04:58<02:17, 861.82instances/s][A
Iterating over instances:  68%|██████▊   | 255186/373292 [04:58<02:09, 912.61instances/s][A
Iterating over instances:  68%|██████▊   | 255280/373292 [04:59<02:15, 869.02instances/s][A
Iterating over instances:  68%|██████▊   | 255392/373292 [04:59<02:08, 914.03instances/s][A
Iterating over instances:  68%|██████▊   | 255486/373292 [04:59<02:08, 919.26instances/s][A
Iterating over instances:  68%|██████▊   | 255602/373292 [04:59<02:00, 978.34instances/s][A
Iterating over instances:  68%|██████▊   | 255702/373292 [04:59<02:06, 928.49instances/s][A
Iterating over instances:  69%|██████▊   | 255797/373292 [04:59<02:16, 861.16instances/s][A
Iterating over instances:  69%|██████▊   | 255915/373292 [04:59<02:05,

Iterating over instances:  73%|███████▎  | 273091/373292 [05:19<01:36, 1037.09instances/s][A
Iterating over instances:  73%|███████▎  | 273218/373292 [05:19<01:31, 1096.87instances/s][A
Iterating over instances:  73%|███████▎  | 273340/373292 [05:19<01:28, 1129.40instances/s][A
Iterating over instances:  73%|███████▎  | 273457/373292 [05:19<01:32, 1081.98instances/s][A
Iterating over instances:  73%|███████▎  | 273583/373292 [05:20<01:28, 1128.30instances/s][A
Iterating over instances:  73%|███████▎  | 273700/373292 [05:20<01:43, 960.80instances/s] [A
Iterating over instances:  73%|███████▎  | 273820/373292 [05:20<01:42, 970.66instances/s][A
Iterating over instances:  73%|███████▎  | 273922/373292 [05:20<01:54, 871.53instances/s][A
Iterating over instances:  73%|███████▎  | 274015/373292 [05:20<01:55, 856.68instances/s][A
Iterating over instances:  73%|███████▎  | 274107/373292 [05:20<01:53, 874.44instances/s][A
Iterating over instances:  73%|███████▎  | 274197/373292 [05:20<

Iterating over instances:  78%|███████▊  | 291812/373292 [05:40<01:51, 731.39instances/s][A
Iterating over instances:  78%|███████▊  | 291919/373292 [05:40<01:42, 790.69instances/s][A
Iterating over instances:  78%|███████▊  | 292005/373292 [05:40<01:42, 789.57instances/s][A
Iterating over instances:  78%|███████▊  | 292119/373292 [05:40<01:33, 868.84instances/s][A
Iterating over instances:  78%|███████▊  | 292217/373292 [05:40<01:30, 897.68instances/s][A
Iterating over instances:  78%|███████▊  | 292311/373292 [05:40<01:39, 809.95instances/s][A
Iterating over instances:  78%|███████▊  | 292403/373292 [05:40<01:36, 836.59instances/s][A
Iterating over instances:  78%|███████▊  | 292520/373292 [05:40<01:28, 914.44instances/s][A
Iterating over instances:  78%|███████▊  | 292644/373292 [05:41<01:21, 991.36instances/s][A
Iterating over instances:  78%|███████▊  | 292755/373292 [05:41<01:18, 1022.92instances/s][A
Iterating over instances:  78%|███████▊  | 292873/373292 [05:41<01:15

Iterating over instances:  83%|████████▎ | 310523/373292 [06:01<01:02, 1004.08instances/s][A
Iterating over instances:  83%|████████▎ | 310630/373292 [06:01<01:03, 983.59instances/s] [A
Iterating over instances:  83%|████████▎ | 310733/373292 [06:01<01:05, 953.75instances/s][A
Iterating over instances:  83%|████████▎ | 310832/373292 [06:01<01:06, 941.87instances/s][A
Iterating over instances:  83%|████████▎ | 310952/373292 [06:01<01:01, 1006.63instances/s][A
Iterating over instances:  83%|████████▎ | 311056/373292 [06:01<01:07, 926.79instances/s] [A
Iterating over instances:  83%|████████▎ | 311156/373292 [06:01<01:05, 946.71instances/s][A
Iterating over instances:  83%|████████▎ | 311270/373292 [06:01<01:02, 995.79instances/s][A
Iterating over instances:  83%|████████▎ | 311374/373292 [06:02<01:01, 1008.42instances/s][A
Iterating over instances:  83%|████████▎ | 311494/373292 [06:02<00:59, 1034.96instances/s][A
Iterating over instances:  83%|████████▎ | 311601/373292 [06:02<

Iterating over instances:  88%|████████▊ | 329467/373292 [06:23<00:41, 1052.25instances/s][A
Iterating over instances:  88%|████████▊ | 329577/373292 [06:23<00:43, 1001.62instances/s][A
Iterating over instances:  88%|████████▊ | 329700/373292 [06:23<00:43, 1013.75instances/s][A
Iterating over instances:  88%|████████▊ | 329805/373292 [06:23<00:45, 956.48instances/s] [A
Iterating over instances:  88%|████████▊ | 329921/373292 [06:24<00:43, 1008.50instances/s][A
Iterating over instances:  88%|████████▊ | 330025/373292 [06:24<00:44, 970.45instances/s] [A
Iterating over instances:  88%|████████▊ | 330143/373292 [06:24<00:44, 971.18instances/s][A
Iterating over instances:  88%|████████▊ | 330249/373292 [06:24<00:44, 959.81instances/s][A
Iterating over instances:  88%|████████▊ | 330347/373292 [06:24<00:45, 939.85instances/s][A
Iterating over instances:  89%|████████▊ | 330474/373292 [06:24<00:42, 1017.26instances/s][A
Iterating over instances:  89%|████████▊ | 330579/373292 [06:24

Iterating over instances:  93%|█████████▎| 348454/373292 [06:43<00:26, 938.14instances/s][A
Iterating over instances:  93%|█████████▎| 348575/373292 [06:44<00:25, 960.97instances/s][A
Iterating over instances:  93%|█████████▎| 348675/373292 [06:44<00:25, 956.67instances/s][A
Iterating over instances:  93%|█████████▎| 348799/373292 [06:44<00:23, 1025.22instances/s][A
Iterating over instances:  93%|█████████▎| 348930/373292 [06:44<00:22, 1095.92instances/s][A
Iterating over instances:  94%|█████████▎| 349044/373292 [06:44<00:23, 1048.48instances/s][A
Iterating over instances:  94%|█████████▎| 349161/373292 [06:44<00:22, 1076.03instances/s][A
Iterating over instances:  94%|█████████▎| 349275/373292 [06:44<00:21, 1092.50instances/s][A
Iterating over instances:  94%|█████████▎| 349392/373292 [06:44<00:22, 1079.79instances/s][A
Iterating over instances:  94%|█████████▎| 349519/373292 [06:44<00:21, 1128.84instances/s][A
Iterating over instances:  94%|█████████▎| 349634/373292 [06:45

Iterating over instances:  98%|█████████▊| 367500/373292 [07:04<00:08, 686.09instances/s][A
Iterating over instances:  98%|█████████▊| 367624/373292 [07:04<00:07, 791.96instances/s][A
Iterating over instances:  99%|█████████▊| 367720/373292 [07:04<00:06, 835.67instances/s][A
Iterating over instances:  99%|█████████▊| 367830/373292 [07:04<00:06, 784.06instances/s][A
Iterating over instances:  99%|█████████▊| 367918/373292 [07:04<00:06, 790.96instances/s][A
Iterating over instances:  99%|█████████▊| 368035/373292 [07:04<00:06, 875.63instances/s][A
Iterating over instances:  99%|█████████▊| 368163/373292 [07:05<00:05, 967.03instances/s][A
Iterating over instances:  99%|█████████▊| 368267/373292 [07:05<00:05, 916.41instances/s][A
Iterating over instances:  99%|█████████▊| 368382/373292 [07:05<00:05, 975.67instances/s][A
Iterating over instances:  99%|█████████▊| 368485/373292 [07:05<00:05, 903.22instances/s][A
Iterating over instances:  99%|█████████▊| 368580/373292 [07:05<00:06,

http://dbpedia.org/ontology/locatedInArea



Iterating over instances:   0%|          | 0/24161 [00:00<?, ?instances/s][A
Iterating over instances:   1%|          | 188/24161 [00:00<00:12, 1873.47instances/s][A
Iterating over instances:   2%|▏         | 377/24161 [00:00<00:12, 1876.79instances/s][A
Iterating over instances:   2%|▏         | 565/24161 [00:00<00:12, 1877.12instances/s][A
Iterating over instances:   3%|▎         | 758/24161 [00:00<00:12, 1892.64instances/s][A
Iterating over instances:   4%|▍         | 955/24161 [00:00<00:12, 1913.97instances/s][A
Iterating over instances:   5%|▍         | 1150/24161 [00:00<00:11, 1922.86instances/s][A
Iterating over instances:   6%|▌         | 1355/24161 [00:00<00:11, 1958.75instances/s][A
Iterating over instances:   6%|▋         | 1554/24161 [00:00<00:11, 1965.67instances/s][A
Iterating over instances:   7%|▋         | 1752/24161 [00:00<00:11, 1967.01instances/s][A
Iterating over instances:   8%|▊         | 1942/24161 [00:01<00:11, 1913.16instances/s][A
Iterating over i

http://dbpedia.org/ontology/province



Iterating over instances:   0%|          | 0/11245 [00:00<?, ?instances/s][A
Iterating over instances:   3%|▎         | 327/11245 [00:00<00:03, 3262.12instances/s][A
Iterating over instances:   6%|▌         | 644/11245 [00:00<00:03, 3232.79instances/s][A
Iterating over instances:   9%|▊         | 972/11245 [00:00<00:03, 3246.26instances/s][A
Iterating over instances:  12%|█▏        | 1306/11245 [00:00<00:03, 3272.39instances/s][A
Iterating over instances:  15%|█▍        | 1642/11245 [00:00<00:02, 3296.51instances/s][A
Iterating over instances:  18%|█▊        | 1969/11245 [00:00<00:02, 3284.48instances/s][A
Iterating over instances:  20%|██        | 2303/11245 [00:00<00:02, 3298.45instances/s][A
Iterating over instances:  24%|██▎       | 2646/11245 [00:00<00:02, 3335.76instances/s][A
Iterating over instances:  27%|██▋       | 2986/11245 [00:00<00:02, 3353.73instances/s][A
Iterating over instances:  30%|██▉       | 3319/11245 [00:01<00:02, 3345.53instances/s][A
Iterating over

http://dbpedia.org/ontology/settlement





In [42]:
print("Added {} new triples by materializing transitive properties.".format(count_transitive))
print("The graph contains {} triples after materializing transitive properties.".format(len(graph)))

Added 0 new triples by materializing transitive properties.
The graph contains 50506250 triples after materializing transitive properties.


## 2.7. Save enriched graph

In [43]:
print("Enriched graph saved. The graph contains {} triples.".format(len(graph)))

Enriched graph saved. The graph contains 50506250 triples.


In [44]:
print("Serializing and saving graph.")
with gzip.open(dbpedia_enriched_graph, "wb") as f:
    graph.serialize(destination=f, format="nt")
print("Graph saved.")

Serializing and saving graph.
Graph saved.


In [45]:
graph.close()

In [46]:
print("Deleting temporary files.")
for f in os.listdir(path_store):
    os.unlink(path_store + '/' + f)
os.rmdir(path_store)

Deleting temporary files.
