In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import os

In [2]:
dbpedia_sparql = "http://dbpedia.org/sparql"
wikidata_sparql = "https://query.wikidata.org/sparql"
dbpedia_instances_count = 5489629

In [3]:
def run_query(sparql_endpoint, query):
    sparql = SPARQLWrapper(sparql_endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    results_df = pd.io.json.json_normalize(results["results"]["bindings"])
    return results_df

# 1. DBpedia instances with properties mapped to Wikidata properties

## 1.1. Retrieve DBpedia properties mapped to Wikidata

Query and save mapped properties

In [4]:
query = """
    PREFIX       owl:  <http://www.w3.org/2002/07/owl#>
    PREFIX      rdfs:  <http://www.w3.org/2000/01/rdf-schema#>

    SELECT DISTINCT ?DBpediaProp ?DBpediaPropLabel ?WikidataProp ?WikidataPropLabel
    WHERE
      {
        ?DBpediaProp  owl:equivalentProperty  ?WikidataProp .
                      FILTER ( CONTAINS ( str(?WikidataProp) , 'wikidata' ) ) .
        ?DBpediaProp  rdfs:label              ?DBpediaPropLabel .
                      FILTER (lang(?DBpediaPropLabel) = 'en')
        ?WikidataProp  rdfs:label              ?WikidataPropLabel .
                      FILTER (lang(?WikidataPropLabel) = 'en')
      }
    ORDER BY  ?DBpediaProp
    """
results_df = run_query(dbpedia_sparql, query)
mapped_properties = results_df[["DBpediaProp.value", "DBpediaPropLabel.value", "WikidataProp.value", "WikidataPropLabel.value"]]
mapped_properties.columns = ["DBpediaProp", "DBpediaPropLabel", "WikidataProp", "WikidataPropLabel"]
mapped_properties.head()

Unnamed: 0,DBpediaProp,DBpediaPropLabel,WikidataProp,WikidataPropLabel
0,http://dbpedia.org/ontology/absoluteMagnitude,absolute magnitude,http://www.wikidata.org/entity/P1457,absolute magnitude
1,http://dbpedia.org/ontology/address,address,http://www.wikidata.org/entity/P969,located at street address
2,http://dbpedia.org/ontology/alias,alias,http://www.wikidata.org/entity/P742,pseudonym
3,http://dbpedia.org/ontology/almaMater,alma mater,http://www.wikidata.org/entity/P69,educated at
4,http://dbpedia.org/ontology/amgid,amgId,http://www.wikidata.org/entity/P1562,AllMovie movie ID


In [5]:
mapped_properties_count = len(mapped_properties)
print("Queried {} properties from Dbpedia mapped to properties in Wikidata".format(mapped_properties_count))

Queried 177 properties from Dbpedia mapped to properties in Wikidata


In [6]:
mapped_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_properties.csv"
mapped_properties.to_csv(mapped_properties_file, index = False)

Compute percentage of DBpedia properties mapped

In [7]:
#Extract object properties
query = """
    SELECT DISTINCT ?prop  ?label
    WHERE {
     ?prop a owl:ObjectProperty .
     ?prop rdfs:label ?label .
     FILTER(langmatches(lang(?label), 'en'))
     FILTER NOT EXISTS {?prop rdfs:domain owl:Thing}
    }
"""
results_df = run_query(dbpedia_sparql, query)
object_properties = results_df[["prop.value", "label.value"]]
object_properties.columns = ["DBpediaObjProp", "DBPediaPropLabel"]
object_properties.head()

Unnamed: 0,DBpediaObjProp,DBPediaPropLabel
0,http://dbpedia.org/ontology/deathPlace,death place
1,http://dbpedia.org/ontology/birthPlace,birth place
2,http://dbpedia.org/ontology/wikiPageWikiLink,Link from a Wikipage to another Wikipage
3,http://dbpedia.org/ontology/wikiPageExternalLink,Link from a Wikipage to an external page
4,http://dbpedia.org/ontology/wikiPageRedirects,Wikipage redirect


In [8]:
object_properties_count = len(object_properties)
print("Queried {} object properties from DBpedia.".format(object_properties_count))

Queried 1657 object properties from DBpedia.


In [9]:
mapped_obj_prop = [prop for prop in list(mapped_properties["DBpediaProp"]) if prop in list(object_properties["DBpediaObjProp"])]
mapped_object_properties = mapped_properties[mapped_properties["DBpediaProp"].isin(mapped_obj_prop)]
mapped_object_properties.reset_index(drop = True, inplace = True)
print("There are {} DBpedia object properties mapped to Wikidata.".format(len(mapped_object_properties)))
mapped_object_properties.head()

There are 98 DBpedia object properties mapped to Wikidata.


Unnamed: 0,DBpediaProp,DBpediaPropLabel,WikidataProp,WikidataPropLabel
0,http://dbpedia.org/ontology/almaMater,alma mater,http://www.wikidata.org/entity/P69,educated at
1,http://dbpedia.org/ontology/anthem,anthem,http://www.wikidata.org/entity/P85,anthem
2,http://dbpedia.org/ontology/architect,architect,http://www.wikidata.org/entity/P84,architect
3,http://dbpedia.org/ontology/architecturalStyle,architectural style,http://www.wikidata.org/entity/P149,architectural style
4,http://dbpedia.org/ontology/artist,performer,http://www.wikidata.org/entity/P175,performer


In [10]:
mapped_obj_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_obj_prop.csv"
mapped_object_properties.to_csv(mapped_obj_properties_file, index = False)

## 1.2. Compute percentage of instances with mapped properties

In [11]:
mapped_instances = set()
for mapped_prop in mapped_properties["DBpediaProp"]:
    prop = mapped_prop.split("/")[-1]
    print(prop)
    query = """
        SELECT DISTINCT ?item
        WHERE {
            ?item dbo:""" + prop + """ [] .
        }
    """
    results_df = run_query(dbpedia_sparql, query)
    if not results_df.empty:
        mapped_instances.update(list(results_df["item.value"]))

absoluteMagnitude
address
alias
almaMater
amgid
anthem
apparentMagnitude
architect
architecturalStyle
areaCode
artist
atomicNumber
author
award
bSide
basedOn
battle
bibsysId
birthDate
birthName
birthPlace
birthYear
bnfId
builder
capital
causeOfDeath
ceo
child
chromosome
cinematography
citizenship
city
coatOfArms
coden
colour
composer
compressionRatio
computingPlatform
constellation
constructionMaterial
continent
cosparId
country
county
coverArtist
cpu
creator
crewMember
crosses
currency
currencyCode
date
dcc
deathCause
deathDate
deathPlace
designer
developer
diocese
director
discipline
discovered
discoverer
diseasesDb
distributor
district
doctoralAdvisor
doctoralStudent
ecNumber
editing
editor
education
einecsNumber
elo
emblem
employer
endDate
ethnicity
eyeColor
father
flag
followedBy
follows
formationDate
foundedBy
founder
foundingDate
fuelSystem
gameEngine
generalManager
genre
giniCoefficient
handedness
headquarter
highestPoint
highwaySystem
homeport
iataAirlineCode
iataLocationIdent

In [12]:
mapped_instances_count = len(mapped_instances)
print("{} instances from DBpedia have a property matched to a property in Wikidata.".format(mapped_instances_count))
print("Percentage of all DBpedia instances: {}".format(100 * mapped_instances_count/dbpedia_instances_count))

624384 instances from DBpedia have a property matched to a property in Wikidata.
Percentage of all DBpedia instances: 11.373883371717834


In [13]:
mapped_obj_instances = set()
for mapped_prop in mapped_object_properties["DBpediaProp"]:
    prop = mapped_prop.split("/")[-1]
    print(prop)
    query = """
        SELECT DISTINCT ?item
        WHERE {
            ?item dbo:""" + prop + """ [] .
        }
    """
    results_df = run_query(dbpedia_sparql, query)
    if not results_df.empty:
        mapped_obj_instances.update(list(results_df["item.value"]))

almaMater
anthem
architect
architecturalStyle
artist
author
award
basedOn
battle
birthPlace
builder
capital
ceo
child
cinematography
citizenship
city
coatOfArms
colour
composer
computingPlatform
constellation
constructionMaterial
continent
country
county
coverArtist
cpu
creator
crewMember
crosses
currency
deathCause
deathPlace
designer
developer
diocese
director
discipline
discoverer
distributor
district
doctoralAdvisor
doctoralStudent
editing
editor
education
employer
ethnicity
father
followedBy
follows
foundedBy
founder
fuelSystem
gameEngine
generalManager
genre
handedness
headquarter
highestPoint
highwaySystem
homeport
ideology
illustrator
industry
inflow
instrument
isPartOf
launchSite
launchVehicle
league
license
locatedInArea
maintainedBy
manager
manufacturer
mother
movement
musicBy
musicalArtist
namedAfter
occupation
originalLanguage
owner
party
placeOfBurial
primeMinister
province
recordLabel
religion
residence
settlement
spouse
starring
taoiseach
team
terytCode


In [14]:
mapped_obj_instances_count = len(mapped_obj_instances)
print("{} instances from DBpedia have an object property matched to a property in Wikidata.".format(mapped_obj_instances_count))
print("Percentage of all DBpedia instances: {}".format(100 * mapped_obj_instances_count/dbpedia_instances_count))

466866 instances from DBpedia have an object property matched to a property in Wikidata.
Percentage of all DBpedia instances: 8.504509138960028


# 2. Inverse, Symmetric, Transitive Properties

## 2.1. Inverse properties

### 2.1.1. Determine inverse properties

Query inverseOf properties from Wikidata

In [15]:
#Retrieve all inverse properties from Wikidata
query = """
    PREFIX  wikibase:  <http://wikiba.se/ontology#>
    PREFIX        bd:  <http://www.bigdata.com/rdf#>
    
    SELECT ?prop ?invProp 
    WHERE{
      ?prop wdt:P1696 ?invProp .
      FILTER (?prop != ?invProp)
      SERVICE wikibase:label { bd:serviceParam  wikibase:language  "en" } .
      }
"""
results_df = run_query(wikidata_sparql, query)
wiki_inverse_prop = results_df[["prop.value", "invProp.value"]]
wiki_inverse_prop.columns = ["WikidataProperty", "WikidataInvProperty"]
wiki_inverse_prop.head()

Unnamed: 0,WikidataProperty,WikidataInvProperty
0,http://www.wikidata.org/entity/P2499,http://www.wikidata.org/entity/P2500
1,http://www.wikidata.org/entity/P2502,http://www.wikidata.org/entity/P2501
2,http://www.wikidata.org/entity/P2501,http://www.wikidata.org/entity/P2502
3,http://www.wikidata.org/entity/P3148,http://www.wikidata.org/entity/P2568
4,http://www.wikidata.org/entity/P2579,http://www.wikidata.org/entity/P2578


In [16]:
wiki_inverse_prop_count = len(wiki_inverse_prop)
print("Queried {} inverseOf pairs of properties from Wikidata.".format(wiki_inverse_prop_count))

Queried 136 inverseOf pairs of properties from Wikidata.


Compute how many DBpedia properties are mapped to inverseOf properties in Wikidata

In [17]:
dbpedia_inverse_prop = list()
for idx in range(len(wiki_inverse_prop)):
    wiki_prop = wiki_inverse_prop["WikidataProperty"].iloc[idx]
    wiki_inv_prop = wiki_inverse_prop["WikidataInvProperty"].iloc[idx]
    dbpedia_prop_idx = mapped_object_properties[mapped_object_properties["WikidataProp"]==wiki_prop]["DBpediaProp"].index.tolist()
    dbpedia_inv_prop_idx = mapped_object_properties[mapped_object_properties["WikidataProp"]==wiki_inv_prop]["DBpediaProp"].index.tolist()
    if dbpedia_prop_idx and dbpedia_inv_prop_idx:
        dbpedia_prop_idx = int(dbpedia_prop_idx[0])
        dbpedia_inv_prop_idx = int(dbpedia_inv_prop_idx[0])
        dbpedia_inverse_prop.append(list(mapped_object_properties.iloc[dbpedia_prop_idx]) +
              list(mapped_object_properties.iloc[dbpedia_inv_prop_idx]))
        
mapped_inverse_properties = pd.DataFrame(dbpedia_inverse_prop,
                                        columns = ["DBpediaProp", "DBPediaPropLabel", 
                                                  "WikidataProp", "WikidataPropLabel",
                                                  "DBpediaInvProp", "DBpediaInvPropLabel",
                                                  "WikidataInvProp", "WikidataInvPropLabel"])

mapped_inverse_properties.reset_index(inplace=True, drop=True)
mapped_inverse_properties.head()

Unnamed: 0,DBpediaProp,DBPediaPropLabel,WikidataProp,WikidataPropLabel,DBpediaInvProp,DBpediaInvPropLabel,WikidataInvProp,WikidataInvPropLabel
0,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child,http://dbpedia.org/ontology/father,father,http://www.wikidata.org/entity/P22,father
1,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child,http://dbpedia.org/ontology/mother,mother,http://www.wikidata.org/entity/P25,mother
2,http://dbpedia.org/ontology/father,father,http://www.wikidata.org/entity/P22,father,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child
3,http://dbpedia.org/ontology/mother,mother,http://www.wikidata.org/entity/P25,mother,http://dbpedia.org/ontology/child,child,http://www.wikidata.org/entity/P40,child
4,http://dbpedia.org/ontology/followedBy,followed by,http://www.wikidata.org/entity/P156,followed by,http://dbpedia.org/ontology/follows,follows,http://www.wikidata.org/entity/P155,follows


In [18]:
mapped_inverse_prop_count = len(mapped_inverse_properties)
print("There are {} DBpedia properties mapped to a property in Wikidata marked as inverseOf.".format(
    mapped_inverse_prop_count))

There are 8 DBpedia properties mapped to a property in Wikidata marked as inverseOf.


In [19]:
mapped_inverse_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_inverse_prop.csv"
mapped_inverse_properties.to_csv(mapped_inverse_properties_file, index = False)

### 2.1.2. Compute percentage of mapped instances with inverse properties

Compute percentage of instances with mapped inverse properties

In [20]:
mapped_inverse_instances = set()
for inverse_prop in mapped_inverse_properties["DBpediaProp"]:
    prop = inverse_prop.split("/")[-1]
    print(prop)
    query = """
        SELECT DISTINCT ?item
        WHERE {
            ?item dbo:""" + prop + """ [] .
        }
     """
    results_df = run_query(dbpedia_sparql, query)
    if not results_df.empty:
        mapped_inverse_instances.update(list(results_df["item.value"]))

child
child
father
mother
followedBy
follows
doctoralStudent
doctoralAdvisor


In [21]:
mapped_inverse_instances_count = len(mapped_inverse_instances)
print("{} instances from DBpedia have an object property matched to an inverse property in Wikidata.".format(
    mapped_inverse_instances_count))
print("Percentage of DBpedia instances with mapped inverse properties (out of all DBpedia instances with mapped properties): {}"\
      .format(100 * mapped_inverse_instances_count/mapped_instances_count))
print("Percentage of DBpedia instances with mapped inverse properties (out of all DBpedia instances with mapped object properties) : {}"\
      .format(100 * mapped_inverse_instances_count/mapped_obj_instances_count))
print("Percentage of DBpedia instances with mapped inverse properties (out of all DBpedia instances) : {}"\
      .format(100 * mapped_inverse_instances_count/dbpedia_instances_count))

16507 instances from DBpedia have an object property matched to an inverse property in Wikidata.
Percentage of DBpedia instances with mapped inverse properties (out of all DBpedia instances with mapped properties): 2.64372565600656
Percentage of DBpedia instances with mapped inverse properties (out of all DBpedia instances with mapped object properties) : 3.535704034990768
Percentage of DBpedia instances with mapped inverse properties (out of all DBpedia instances) : 0.3006942727823684


## 2.2. Symmetric properties

### 2.2.1. Determine symmetric properties

Query symmetric properties from Wikidata

In [22]:
#Retrieve all symmetric properties from Wikidata
query = """
    PREFIX  wikibase:  <http://wikiba.se/ontology#>
    PREFIX        bd:  <http://www.bigdata.com/rdf#>
    
    SELECT ?prop 
    WHERE{
      ?prop wdt:P31 wd:Q18647518 .
      SERVICE wikibase:label { bd:serviceParam  wikibase:language  "en" } .
      }
"""
results_df = run_query(wikidata_sparql, query)
wiki_symmetric_prop = results_df[["prop.value"]]
wiki_symmetric_prop.columns = ["WikidataProperty"]
wiki_symmetric_prop.head()

Unnamed: 0,WikidataProperty
0,http://www.wikidata.org/entity/P460
1,http://www.wikidata.org/entity/P1560
2,http://www.wikidata.org/entity/P1889
3,http://www.wikidata.org/entity/P2743
4,http://www.wikidata.org/entity/P2959


In [23]:
wiki_symmetric_prop_count = len(wiki_symmetric_prop)
print("Queried {} symmetric properties from Wikidata.".format(wiki_symmetric_prop_count))

Queried 8 symmetric properties from Wikidata.


Compute how many DBpedia properties are mapped to symmetric properties in Wikidata

In [24]:
mapped_symmetric_properties = mapped_object_properties[mapped_object_properties["WikidataProp"].isin(
    wiki_symmetric_prop["WikidataProperty"])]
mapped_symmetric_properties.reset_index(inplace=True, drop=True)
mapped_symmetric_properties.head()

Unnamed: 0,DBpediaProp,DBpediaPropLabel,WikidataProp,WikidataPropLabel


In [25]:
mapped_symmetric_prop_count = len(mapped_symmetric_properties)
print("There are {} DBpedia properties mapped to a property in Wikidata marked as symmetric.".format(
    mapped_symmetric_prop_count))

There are 0 DBpedia properties mapped to a property in Wikidata marked as symmetric.


In [26]:
mapped_symmetric_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_symmetric_prop.csv"
mapped_symmetric_properties.to_csv(mapped_symmetric_properties_file, index = False)

### 2.2.2. Compute percentage of mapped instances with symmetric properties

Compute percentage of instances with mapped symmetric properties

In [27]:
mapped_symmetric_instances = set()
for symmetric_prop in mapped_symmetric_properties["DBpediaProp"]:
    prop = symmetric_prop.split("/")[-1]
    print(prop)
    query = """
        SELECT DISTINCT ?item
        WHERE {
            ?item dbo:""" + prop + """ [] .
        }
     """
    results_df = run_query(dbpedia_sparql, query)
    if not results_df.empty:
        mapped_symmetric_instances.update(list(results_df["item.value"]))

In [28]:
mapped_symmetric_instances_count = len(mapped_symmetric_instances)
print("{} instances from DBpedia have an object property matched to a symmetric property in Wikidata.".format(
    mapped_symmetric_instances_count))
print("Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances with mapped properties): {}"\
      .format(100 * mapped_symmetric_instances_count/mapped_instances_count))
print("Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances with mapped object properties) : {}"\
      .format(100 * mapped_symmetric_instances_count/mapped_obj_instances_count))
print("Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances): {}"\
      .format(100 * mapped_symmetric_instances_count/dbpedia_instances_count))

0 instances from DBpedia have an object property matched to a symmetric property in Wikidata.
Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances with mapped properties): 0.0
Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances with mapped object properties) : 0.0
Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances): 0.0


## 2.3. Transitive properties

### 2.3.1. Determine transitive properties

Query transitive properties from Wikidata

In [29]:
#Retrieve all symmetric properties from Wikidata
query = """
    PREFIX  wikibase:  <http://wikiba.se/ontology#>
    PREFIX        bd:  <http://www.bigdata.com/rdf#>
    
    SELECT ?prop 
    WHERE{
      ?prop wdt:P31 wd:Q18647515 .
      SERVICE wikibase:label { bd:serviceParam  wikibase:language  "en" } .
      }
"""
results_df = run_query(wikidata_sparql, query)
wiki_transitive_prop = results_df[["prop.value"]]
wiki_transitive_prop.columns = ["WikidataProperty"]
wiki_transitive_prop.head()

Unnamed: 0,WikidataProperty
0,http://www.wikidata.org/entity/P131
1,http://www.wikidata.org/entity/P276
2,http://www.wikidata.org/entity/P279
3,http://www.wikidata.org/entity/P361
4,http://www.wikidata.org/entity/P403


In [30]:
wiki_transitive_prop_count = len(wiki_transitive_prop)
print("Queried {} symmetric properties from Wikidata.".format(wiki_transitive_prop_count))

Queried 15 symmetric properties from Wikidata.


Compute how many DBpedia properties are mapped to transitive properties in Wikidata

In [31]:
mapped_transitive_properties = mapped_object_properties[mapped_object_properties["WikidataProp"].isin(
    wiki_transitive_prop["WikidataProperty"])]
mapped_transitive_properties.reset_index(inplace=True, drop=True)
mapped_transitive_properties.head()

Unnamed: 0,DBpediaProp,DBpediaPropLabel,WikidataProp,WikidataPropLabel
0,http://dbpedia.org/ontology/city,city,http://www.wikidata.org/entity/P131,located in the administrative territorial entity
1,http://dbpedia.org/ontology/county,county,http://www.wikidata.org/entity/P131,located in the administrative territorial entity
2,http://dbpedia.org/ontology/district,district,http://www.wikidata.org/entity/P131,located in the administrative territorial entity
3,http://dbpedia.org/ontology/isPartOf,is part of,http://www.wikidata.org/entity/P361,part of
4,http://dbpedia.org/ontology/locatedInArea,located in area,http://www.wikidata.org/entity/P131,located in the administrative territorial entity


In [32]:
mapped_transitive_prop_count = len(mapped_transitive_properties)
print("There are {} DBpedia properties mapped to a property in Wikidata marked as transitive.".format(
    mapped_transitive_prop_count))

There are 7 DBpedia properties mapped to a property in Wikidata marked as transitive.


In [33]:
mapped_transitive_properties_file = "../Data/interim/DBpedia/properties/DBpedia_Wikidata_mapped_transitive_prop.csv"
mapped_transitive_properties.to_csv(mapped_transitive_properties_file, index = False)

### 2.3.2. Compute percentage of mapped instances with transitive properties

Compute percentage of instances with mapped transitive properties

In [34]:
mapped_transitive_instances = set()
for transitive_prop in mapped_transitive_properties["DBpediaProp"]:
    prop = transitive_prop.split("/")[-1]
    print(prop)
    query = """
        SELECT DISTINCT ?item
        WHERE {
            ?item dbo:""" + prop + """ [] .
        }
     """
    results_df = run_query(dbpedia_sparql, query)
    if not results_df.empty:
        mapped_transitive_instances.update(list(results_df["item.value"]))

city
county
district
isPartOf
locatedInArea
province
settlement


In [35]:
mapped_transtive_instances_count = len(mapped_transitive_instances)
print("{} instances from DBpedia have an object property matched to a transitive property in Wikidata.".format(
    mapped_transtive_instances_count))
print("Percentage of DBpedia instances with mapped transitive properties (out of all DBpedia instances with mapped properties): {}"\
      .format(100 * mapped_transtive_instances_count/mapped_instances_count))
print("Percentage of DBpedia instances with mapped transitive properties (out of all DBpedia instances with mapped object properties : {}"\
      .format(100 * mapped_transtive_instances_count/mapped_obj_instances_count))
print("Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances): {}"\
      .format(100 * mapped_transtive_instances_count/dbpedia_instances_count))

57667 instances from DBpedia have an object property matched to a transitive property in Wikidata.
Percentage of DBpedia instances with mapped transitive properties (out of all DBpedia instances with mapped properties): 9.23582282697827
Percentage of DBpedia instances with mapped transitive properties (out of all DBpedia instances with mapped object properties : 12.351938243521696
Percentage of DBpedia instances with mapped symmetric properties (out of all DBpedia instances): 1.0504717167590014


# 3. Subproperties

## 3.1. Query DBpedia subproperties

In [36]:
dbpedia_ontology_file = "../Data/raw/DBpedia/dbpedia_2016-10.nt"

In [37]:
import rdflib

In [38]:
from rdflib import Graph
graph = rdflib.Graph()
graph.parse(dbpedia_ontology_file, format = "nt")

<Graph identifier=N9d4b1a26565144f1999d1eacb6afd8ed (<class 'rdflib.graph.Graph'>)>

In [39]:
query = """
    PREFIX      rdfs:  <http://www.w3.org/2000/01/rdf-schema#>

    SELECT DISTINCT ?subProp ?subPropLabel ?prop ?propLabel
    WHERE
      {
        ?subProp  rdfs:subPropertyOf  ?prop .
        ?subProp  rdfs:label          ?subPropLabel .
        ?prop     rdfs:label          ?propLabel .
        FILTER (lang(?subPropLabel) = 'en')
        FILTER (lang(?propLabel) = 'en')
      }
    """
results = graph.query(query, processor="sparql")
results.serialize(format="json")

b'{"results": {"bindings": [{"subProp": {"type": "uri", "value": "http://dbpedia.org/ontology/championInDoubleFemale"}, "prop": {"type": "uri", "value": "http://dbpedia.org/ontology/championInDouble"}, "propLabel": {"type": "literal", "value": "champion in double", "xml:lang": "en"}, "subPropLabel": {"type": "literal", "value": "champion in double female", "xml:lang": "en"}}, {"subProp": {"type": "uri", "value": "http://dbpedia.org/ontology/championInDouble"}, "prop": {"type": "uri", "value": "http://dbpedia.org/ontology/champion"}, "propLabel": {"type": "literal", "value": "champion", "xml:lang": "en"}, "subPropLabel": {"type": "literal", "value": "champion in double", "xml:lang": "en"}}, {"subProp": {"type": "uri", "value": "http://dbpedia.org/ontology/maximumDepth"}, "prop": {"type": "uri", "value": "http://dbpedia.org/ontology/depth"}, "propLabel": {"type": "literal", "value": "depth (\xce\xbc)", "xml:lang": "en"}, "subPropLabel": {"type": "literal", "value": "maximum depth (\xce\x

In [40]:
results_list = list()
for row in range(len(results)):
    results_list.append([
        str(results.bindings[row]["subProp"]),
        str(results.bindings[row]["subPropLabel"]),
        str(results.bindings[row]["prop"]),
        str(results.bindings[row]["propLabel"])
    ])
dbpedia_subproperties = pd.DataFrame(results_list,
                         columns = ["Subproperty", "SubpropertyLabel", "Property", "PropertyLabel"])
dbpedia_subproperties.head()

Unnamed: 0,Subproperty,SubpropertyLabel,Property,PropertyLabel
0,http://dbpedia.org/ontology/championInDoubleFe...,champion in double female,http://dbpedia.org/ontology/championInDouble,champion in double
1,http://dbpedia.org/ontology/championInDouble,champion in double,http://dbpedia.org/ontology/champion,champion
2,http://dbpedia.org/ontology/maximumDepth,maximum depth (μ),http://dbpedia.org/ontology/depth,depth (μ)
3,http://dbpedia.org/ontology/eastPlace,east place,http://dbpedia.org/ontology/closeTo,is close to
4,http://dbpedia.org/ontology/meshNumber,MeSH number,http://dbpedia.org/ontology/code,code


In [41]:
dbpedia_subproperties_count = len(dbpedia_subproperties)
print("Queried {} subproperties from DBpedia.".format(dbpedia_subproperties_count))

Queried 75 subproperties from DBpedia.


In [42]:
dbpedia_subproperties_file = "../Data/interim/DBpedia/properties/DBpedia_subproperties.csv"
dbpedia_subproperties.to_csv(dbpedia_subproperties_file, index = False)

## 3.2. Compute percentage of instances with subproperties

In [43]:
instances_with_subprop = set()
for sub_prop in dbpedia_subproperties["Subproperty"]:
    prop = sub_prop.split("/")[-1]
    print(prop)
    query = """
        SELECT DISTINCT ?item
        WHERE {
            ?item dbo:""" + prop + """ [] .
        }
     """
    results_df = run_query(dbpedia_sparql, query)
    if not results_df.empty:
        instances_with_subprop.update(list(results_df["item.value"]))

championInDoubleFemale
championInDouble
maximumDepth
eastPlace
meshNumber
politicGovernmentDepartment
olympicOathSwornByJudge
codeNationalMonument
nameInWadeGilesChinese
northPlace
latinName
ofsCode
dutchPPNCode
isPartOfMilitaryConflict
northEastPlace
ekatteCode
chorusCharacterInPlay
owningOrganisation
nextEvent
isPartOfWineRegion
southPlace
codeIndex
senator
championInMixedDouble
communityIsoCode
commandant
isPartOfAnatomicalStructure
messierName
dutchArtworkCode
playRole
championInDoubleMale
nameInJapanese
premiereDate
nameInCantoneseChinese
provinceIsoCode
dutchWinkelID
awayColourHexCode
westPlace
southWestPlace
championInSingleFemale
numberOfPixels
locationCity
ngcName
percentageLiteracyMen
nameInPinyinChinese
olympicOathSwornByAthlete
nameInHangulKorean
locationCountry
championInSingle
nextMission
nameInSimplifiedChinese
averageDepth
inseeCode
northWestPlace
southEastPlace
nameInHanjaKorean
subClassis
codeListOfHonour
capital
codeProvincialMonument
greekName
championInSingleMale
o

In [44]:
instances_with_subprop_count = len(instances_with_subprop)
print("{} instances from DBpedia have an object property matched to a transitive property in Wikidata.".format(
    instances_with_subprop_count))
print("Percentage of DBpedia instances with  subproperties (out of all DBpedia instances): {}"\
      .format(100 * instances_with_subprop_count/dbpedia_instances_count))

82314 instances from DBpedia have an object property matched to a transitive property in Wikidata.
Percentage of DBpedia instances with  subproperties (out of all DBpedia instances): 1.4994455909497708
