In [1]:
import os
import gzip
import pickle
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
from subprocess import Popen, PIPE
from SPARQLWrapper import SPARQLWrapper, JSON

# 1. Extract object properties

In [2]:
def run_query(query):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    results_df = pd.io.json.json_normalize(results["results"]["bindings"])
    return results_df

In [3]:
#Extract object properties
query = """
    SELECT DISTINCT ?prop  ?label
    WHERE {
     ?prop a owl:ObjectProperty .
     ?prop rdfs:label ?label .
     FILTER(langmatches(lang(?label), 'en'))
     FILTER NOT EXISTS {?prop rdfs:domain owl:Thing}
    }
"""
results_df = run_query(query)
object_properties = results_df[["prop.value", "label.value"]]
object_properties.columns = ["DBpediaObjProp", "DBPediaPropLabel"]
object_properties = object_properties[object_properties["DBpediaObjProp"].str.contains("http")]
object_properties.head()

Unnamed: 0,DBpediaObjProp,DBPediaPropLabel
0,http://dbpedia.org/ontology/deathPlace,death place
1,http://dbpedia.org/ontology/birthPlace,birth place
2,http://dbpedia.org/ontology/wikiPageWikiLink,Link from a Wikipage to another Wikipage
3,http://dbpedia.org/ontology/wikiPageExternalLink,Link from a Wikipage to an external page
4,http://dbpedia.org/ontology/wikiPageRedirects,Wikipage redirect


In [4]:
object_properties_count = len(object_properties)
print("Queried {} object properties from DBpedia.".format(object_properties_count))

Queried 1105 object properties from DBpedia.


In [5]:
object_properties_file = "../Data/interim/DBpedia/properties/DBpedia_object_properties.csv"
object_properties.to_csv(object_properties_file, index = False)

# 2. DL-Learner

## 2.1. Functions

In [6]:
def _run_dllearner(results_file, object_property):
    proc = Popen(["C:/_Andreea/HiwiJob/RDF2Vec/dllearner-1.3.0/bin/enrichment.bat", 
                       "-e", "http://dbpedia.org/sparql", 
                       "-g", "http://dbpedia.org",
                       "-r", object_property,
                       "-o", results_file,
                       "-f", "n-triples",
                       "-t", "0"],
                    stderr = PIPE, stdout = PIPE)
    out, err = proc.communicate()

In [7]:
def _get_symmetry_transitivity(file):
    symmetric_statement = "Symmetric: <"
    transitive_statement = "Transitive: <"
    symmetry_score = None
    transitivity_score = None
    with open(file, encoding="utf-8", mode="rt") as f:
        for line in f:
            if symmetric_statement in line:
                next_line = next(f)
                symmetry_score = float(next_line.split("^^")[0].split('"')[-2])
            if transitive_statement in line:
                next_line = next(f)
                transitivity_score = float(next_line.split("^^")[0].split('"')[-2])
    return symmetry_score, transitivity_score

In [8]:
def _get_inverses(file):
    inverses_dict = dict()
    inverse_statement = "InverseOf"
    with open(file, encoding="utf-8", mode="rt") as f:
        for line in f:
            if inverse_statement in line:
                inverse = line.split("InverseOf <")[-1].split(">")[0]
                next_line = next(f)
                inverse_score = float(next_line.split("^^")[0].split('"')[-2])
                inverses_dict[inverse] = inverse_score
    return inverses_dict

In [9]:
def _get_subproperties(file):
    subproperties_dict = dict()
    subproperty_statement = "SubPropertyOf"
    with open(file, encoding="utf-8", mode="rt") as f:
        for line in f:
            if subproperty_statement in line:
                subproperty = line.split("SubPropertyOf <")[-1].split(">")[0]
                next_line = next(f)
                subproperty_score = float(next_line.split("^^")[0].split('"')[-2])
                subproperties_dict[subproperty] = subproperty_score
    return subproperties_dict

## 2.2. Run DL-Learner

In [10]:
symmetries = defaultdict()
transitivities = defaultdict()
inverses = defaultdict()

In [11]:
tmp_folder = "../Data/interim/DBpedia/properties/DLLearner/"
if not os.path.isdir(tmp_folder):
    os.mkdir(tmp_folder)

In [13]:
with tqdm(desc="Running DLLearner for object properties: ", total = object_properties_count, unit = "properties") as pbar:
    for object_property in list(object_properties["DBpediaObjProp"]):
        object_property_name = object_property.split("/")[-1]
        results_file = tmp_folder + object_property_name + ".nt"

        # Run the DL-Learner
        _run_dllearner(results_file, object_property)

        if os.path.isfile(results_file):
            # Get symmetry and transitivity scores
            symmetry_score, transitivity_score = _get_symmetry_transitivity(results_file)
            if symmetry_score is not None:
                symmetries[object_property] = symmetry_score
            if transitivity_score is not None:
                transitivities[object_property] = transitivity_score

            # Get inverses and corresponding 
            inverses_dict = _get_inverses(results_file)
            if inverses_dict:
                inverses_df = pd.DataFrame(list(inverses_dict.items()), columns=["InverseProperty", "InverseScore"])
                inverses[object_property] = inverses_df       

            # Delete temporary results file
            os.remove(results_file)

        pbar.update(1)

Running DLLearner for object properties: 100%|█| 1105/1105 [89:50:47<00:00, 12.73s/properties]     


## 2.3. Save results

In [48]:
symmetries_df = pd.DataFrame(list(symmetries.items()), columns=["Property", "SymmetryScore"])
symmetries_df.sort_values(ascending=False, by=["SymmetryScore"], inplace=True)
symmetries_df.head()

Unnamed: 0,Property,SymmetryScore
162,http://dbpedia.org/ontology/currentPartner,0.81388
403,http://dbpedia.org/ontology/neighboringMunicip...,0.760849
237,http://dbpedia.org/ontology/formerPartner,0.737923
93,http://dbpedia.org/ontology/capitalCountry,0.5
97,http://dbpedia.org/ontology/capitalRegion,0.428571


In [49]:
symmetries_file = "../Data/interim/DBpedia/properties/DLLearner/all_symmetries.csv"
symmetries_df.to_csv(symmetries_file, index = False)

In [50]:
transitivities_df = pd.DataFrame(list(transitivities.items()), columns=["Property", "TransitivityScore"])
transitivities_df.sort_values(ascending=False, by=["TransitivityScore"], inplace=True)
transitivities_df.head()

Unnamed: 0,Property,TransitivityScore
47,http://dbpedia.org/ontology/associate,0.865232
307,http://dbpedia.org/ontology/isPartOfWineRegion,0.590566
304,http://dbpedia.org/ontology/isPartOf,0.580795
92,http://dbpedia.org/ontology/capitalCountry,0.5
512,http://dbpedia.org/ontology/relative,0.47506


In [51]:
transitivities_file = "../Data/interim/DBpedia/properties/DLLearner/all_transitivities.csv"
transitivities_df.to_csv(transitivities_file, index = False)

In [52]:
for key in inverses.keys():
    inverses[key].sort_values(ascending=False, by=["InverseScore"], inplace=True)

In [53]:
inverses_file = "../Data/interim/DBpedia/properties/DLLearner/all_inverses.pkl"
with open(inverses_file, "wb") as f:
    pickle.dump(inverses, f)

# 3. Process results

In [84]:
symmetric_properties_file = "../Data/interim/DBpedia/properties/DBpedia_dllearner_symmetric_properties.csv"
transitive_properties_file = "../Data/interim/DBpedia/properties/DBpedia_dllearner_transitive_properties.csv"
inverse_properties_file = "../Data/interim/DBpedia/properties/DBpedia_dllearner_inverse_properties.csv"

## 3.1. Symmetric properties

In [85]:
symmetric_properties = list(symmetries_df[symmetries_df["SymmetryScore"]>0.53]["Property"])
symmetric_properties.extend(["http://dbpedia.org/ontology/spouse",
                             "http://dbpedia.org/ontology/sisterStation",
                             "http://dbpedia.org/ontology/sisterCollege",
                             "http://dbpedia.org/ontology/sisterNewspaper"
                            ])
symmetric_properties_df = pd.DataFrame(symmetric_properties, columns = ["SymmetricProperty"])
symmetric_properties_df

Unnamed: 0,SymmetricProperty
0,http://dbpedia.org/ontology/currentPartner
1,http://dbpedia.org/ontology/neighboringMunicip...
2,http://dbpedia.org/ontology/formerPartner
3,http://dbpedia.org/ontology/spouse
4,http://dbpedia.org/ontology/sisterStation
5,http://dbpedia.org/ontology/sisterCollege
6,http://dbpedia.org/ontology/sisterNewspaper


In [86]:
symmetric_properties_df.to_csv(symmetric_properties_file, index=False)

## 3.2. Transitive properties

In [92]:
transitive_properties = list(transitivities_df[transitivities_df["TransitivityScore"]>0.45]["Property"])
transitive_properties_df = pd.DataFrame(transitive_properties, columns = ["TransitiveProperty"])
transitive_properties_df

Unnamed: 0,TransitiveProperty
0,http://dbpedia.org/ontology/associate
1,http://dbpedia.org/ontology/isPartOfWineRegion
2,http://dbpedia.org/ontology/isPartOf
3,http://dbpedia.org/ontology/capitalCountry
4,http://dbpedia.org/ontology/relative
5,http://dbpedia.org/ontology/related


In [128]:
transitive_properties_df.to_csv(transitive_properties_file, index=False)

## 3.3. Inverse properties

In [181]:
inverses_list = list()
for key in inverses.keys():
    inverses_list.append((key, inverses[key]["InverseProperty"].iloc[0], inverses[key]["InverseScore"].iloc[0]))
inverses_temp_df = pd.DataFrame(inverses_list, columns = ["Property", "InverseProperty", "InverseScore"])
inverses_temp_df.sort_values(ascending=False, by=["InverseScore"], inplace=True)
inverses_temp_df

Unnamed: 0,Property,InverseProperty,InverseScore
86,http://dbpedia.org/ontology/countySeat,http://dbpedia.org/ontology/isPartOf,0.900993
39,http://dbpedia.org/ontology/binomial,http://dbpedia.org/ontology/wikiPageRedirects,0.869663
205,http://dbpedia.org/ontology/lastWin,http://dbpedia.org/ontology/lastWin,0.852632
138,http://dbpedia.org/ontology/followingEvent,http://dbpedia.org/ontology/previousEvent,0.821658
201,http://dbpedia.org/ontology/largestCity,http://dbpedia.org/ontology/largestCity,0.787079
260,http://dbpedia.org/ontology/nextEvent,http://dbpedia.org/ontology/previousEvent,0.723178
240,http://dbpedia.org/ontology/mission,http://dbpedia.org/property/crewMembers,0.619269
56,http://dbpedia.org/ontology/capitalMountain,http://dbpedia.org/ontology/timeZone,0.585293
57,http://dbpedia.org/ontology/capitalPlace,http://dbpedia.org/ontology/state,0.585293
58,http://dbpedia.org/ontology/capitalRegion,http://dbpedia.org/ontology/state,0.571429


In [209]:
inverse_properties = list()
inverse_properties.extend([
    ("http://dbpedia.org/ontology/successor", "http://dbpedia.org/ontology/predecessor"),
    ("http://dbpedia.org/ontology/rightChild", "http://dbpedia.org/ontology/leftChild"),
    ("http://dbpedia.org/ontology/parentCompany", "http://dbpedia.org/ontology/subsidiary"),
    ("http://dbpedia.org/ontology/childOrganisation", "http://dbpedia.org/ontology/parentOrganisation"),
    ("http://dbpedia.org/ontology/influenced", "http://dbpedia.org/ontology/influencedBy"),
    ("http://dbpedia.org/ontology/child", "http://dbpedia.org/ontology/parent"),
    ("http://dbpedia.org/ontology/previousInfrastructure", "http://dbpedia.org/ontology/subsequentInfrastructure"),
    ("http://dbpedia.org/ontology/branchFrom", "http://dbpedia.org/ontology/branchTo"),
    ("http://dbpedia.org/ontology/predecessor", "http://dbpedia.org/ontology/successor"),
    ("http://dbpedia.org/ontology/nextEvent", "http://dbpedia.org/ontology/previousEvent"),
    ("http://dbpedia.org/ontology/doctoralAdvisor", "http://dbpedia.org/ontology/doctoralStudent"),
    ("http://dbpedia.org/ontology/doctoralStudent", "http://dbpedia.org/ontology/doctoralAdvisor"),
    ("http://dbpedia.org/ontology/previousEvent", "http://dbpedia.org/ontology/nextEvent"),
    ("http://dbpedia.org/ontology/followingEvent", "http://dbpedia.org/ontology/previousEvent")
])
inverse_properties

[('http://dbpedia.org/ontology/successor',
  'http://dbpedia.org/ontology/predecessor'),
 ('http://dbpedia.org/ontology/rightChild',
  'http://dbpedia.org/ontology/leftChild'),
 ('http://dbpedia.org/ontology/parentCompany',
  'http://dbpedia.org/ontology/subsidiary'),
 ('http://dbpedia.org/ontology/childOrganisation',
  'http://dbpedia.org/ontology/parentOrganisation'),
 ('http://dbpedia.org/ontology/influenced',
  'http://dbpedia.org/ontology/influencedBy'),
 ('http://dbpedia.org/ontology/child', 'http://dbpedia.org/ontology/parent'),
 ('http://dbpedia.org/ontology/previousInfrastructure',
  'http://dbpedia.org/ontology/subsequentInfrastructure'),
 ('http://dbpedia.org/ontology/branchFrom',
  'http://dbpedia.org/ontology/branchTo'),
 ('http://dbpedia.org/ontology/predecessor',
  'http://dbpedia.org/ontology/successor'),
 ('http://dbpedia.org/ontology/nextEvent',
  'http://dbpedia.org/ontology/previousEvent'),
 ('http://dbpedia.org/ontology/doctoralAdvisor',
  'http://dbpedia.org/ontol

In [210]:
inverse_properties_df = pd.DataFrame(inverse_properties, columns = ["Property", "InverseProperty"])
inverse_properties_df

Unnamed: 0,Property,InverseProperty
0,http://dbpedia.org/ontology/successor,http://dbpedia.org/ontology/predecessor
1,http://dbpedia.org/ontology/rightChild,http://dbpedia.org/ontology/leftChild
2,http://dbpedia.org/ontology/parentCompany,http://dbpedia.org/ontology/subsidiary
3,http://dbpedia.org/ontology/childOrganisation,http://dbpedia.org/ontology/parentOrganisation
4,http://dbpedia.org/ontology/influenced,http://dbpedia.org/ontology/influencedBy
5,http://dbpedia.org/ontology/child,http://dbpedia.org/ontology/parent
6,http://dbpedia.org/ontology/previousInfrastruc...,http://dbpedia.org/ontology/subsequentInfrastr...
7,http://dbpedia.org/ontology/branchFrom,http://dbpedia.org/ontology/branchTo
8,http://dbpedia.org/ontology/predecessor,http://dbpedia.org/ontology/successor
9,http://dbpedia.org/ontology/nextEvent,http://dbpedia.org/ontology/previousEvent


In [211]:
inverse_properties_df.to_csv(inverse_properties_file, index=False)