In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
import requests
import time
pd.options.mode.chained_assignment = None
import json
from SPARQLWrapper import SPARQLWrapper, JSON

### class_instances
__Input__: a wikidata type

__Output__: number of entities(members) having this type

In [None]:
def class_instances(c):

    rq="""select  (count(?x) AS ?cnt)
{{
 ?x wdt:P31 wd:{0}
 }}"""
    sparql = "https://query.wikidata.org/sparql"  
    #print(rq.format(c))
    r = requests.get(sparql, params = {'format': 'json', 'query': rq.format(c)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        result=pd.io.json.json_normalize(data['results']['bindings'])
        result=result.rename(columns={"cnt.value":"count"})
        result["type"]=c
        result=result[["type","count"]]
                
    return result

### Example:
Input wikidata types are __Q40231__:election and __Q175331__:demonstration


In [None]:
#Examples:
types=["Q40231","Q175331"]

final_results=pd.DataFrame()
for i in range(len(types)):
    tmp_df=class_instances(types[i])
    final_results=final_results.append(tmp_df)
print(final_results)

### get_superclass
__Input__: a wikidata type

__Output__: all superclasses of the wikidata type up to the highest available class in the hierarchy

In [None]:
def get_superclass(c):

    rq="""  
select ?superclass 
{{
wd:{0} wdt:P279* ?superclass

    }}
    """
    sparql = "https://query.wikidata.org/sparql"  
    #print(rq.format(c))
    r = requests.get(sparql, params = {'format': 'json', 'query': rq.format(c)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        result=pd.io.json.json_normalize(data['results']['bindings'])
    result["type"]=c  
    result=result.rename(columns={"superclass.value":"superclass"})
    result["superclass"]=result.apply(lambda row: row.superclass[row.superclass.rfind("/")+1:], axis=1)
    result=result[["type","superclass"]]
    return result

In [None]:
#Examples:
types=["Q40231","Q175331"]

final_results=pd.DataFrame()
for i in range(len(types)):
    tmp_df=get_superclass(types[i])
    final_results=final_results.append(tmp_df)
print(final_results)

### get_subclass
__Input__: a wikidata type 

__Output__: all subclasses of the wikidata type until the lowest available class in the hierarchy

In [None]:
def get_subclass(c):

    rq="""  
select ?subclass 
{{
?subclass wdt:P279* wd:{0}

    }}
    """
    sparql = "https://query.wikidata.org/sparql"  
    #print(rq.format(c))
    r = requests.get(sparql, params = {'format': 'json', 'query': rq.format(c)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        result=pd.io.json.json_normalize(data['results']['bindings'])
    result["type"]=c  
    result=result.rename(columns={"subclass.value":"subclass"})
    result["subclass"]=result.apply(lambda row: row.subclass[row.subclass.rfind("/")+1:], axis=1)
    result=result[["type","subclass"]] 
    return result

In [None]:
#Examples:
types=["Q40231","Q175331"]

final_results=pd.DataFrame()
for i in range(len(types)):
    tmp_df=get_subclass(types[i])
    final_results=final_results.append(tmp_df)
print(final_results)

### get_property_label
__Input__: a wikidata property 

__Output__: label of property

In [None]:
def get_property_label(prop):
    property_label_rq="""

SELECT ?prop ?propLabel 
WHERE
{{  
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }} 
  ?prop wikibase:directClaim wdt:{0} .
}}"""
        
    sparql = "https://query.wikidata.org/sparql"  
  #  print(wiki_rq.format(prop))
    #print(property_label_rq.format(prop))
    r = requests.get(sparql, params = {'format': 'json', 'query': property_label_rq.format(prop)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        result=pd.io.json.json_normalize(data['results']['bindings'])
    result=result.rename(columns={"propLabel.value":"propLabel"})
    result["propLabel"]=result.apply(lambda row: row.propLabel[row.propLabel.rfind("/")+1:], axis=1)
    result["property"]=prop
    result=result[["property","propLabel"]]
                   
    return result

In [None]:
#Example:
properties=["P585","P569"]

final_results=pd.DataFrame()
for i in range(len(properties)):
    tmp_df=get_property_label(properties[i])
    final_results=final_results.append(tmp_df)
print(final_results)

### type_label
__Input__: a wikidata class 

__Output__: English label of the wikidata class

In [None]:
def type_label(wiki_class):
   
    wiki_rq='''PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
SELECT  *
WHERE {{
        wd:{0} rdfs:label ?label .
        FILTER (langMatches( lang(?label), "EN" ) )
      }} 
limit 1'''
    sparql = "https://query.wikidata.org/sparql"  
    #print(wiki_rq.format(wiki_class))
    r = requests.get(sparql, params = {'format': 'json', 'query': wiki_rq.format(wiki_class)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        result=pd.io.json.json_normalize(data['results']['bindings'])
    result["type"]=wiki_class
    result=result.rename(columns={"label.value":"label"})
    result=result[["type","label"]] 
                 #  
    return result

In [None]:
#Examples:
types=["Q40231","Q175331"]

final_results=pd.DataFrame()
for i in range(len(types)):
    tmp_df=type_label(types[i])
    final_results=final_results.append(tmp_df)
print(final_results)

### get_datatypes
__Input__: a property 

__Output__: datatypes used for this property

In [None]:
def get_datatypes(prop):
    
    wiki_rq='''SELECT ?datatype 
WHERE 
{{
  wd:{0} wikibase:propertyType ?datatype .
}}'''
    sparql = "https://query.wikidata.org/sparql"  
  #  print(wiki_rq.format(prop))
    r = requests.get(sparql, params = {'format': 'json', 'query': wiki_rq.format(prop)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        result=pd.io.json.json_normalize(data['results']['bindings'])
        result=result.rename(columns={"datatype.value":"datatype"})
        result["datatype"]=result.apply(lambda row: row.datatype[row.datatype.rfind("/")+1:], axis=1)
        result["property"]=prop
        result=result[["property","datatype"]]
                 #  
    return result

In [None]:
#Example:
properties=["P585","P569"]

final_results=pd.DataFrame()
for i in range(len(properties)):
    tmp_df=get_datatypes(properties[i])
    final_results=final_results.append(tmp_df)
print(final_results)

### constraints

__Input__: a property(p) and a wikidata type (type of subjects)

__Output__: list of wikidata types used as the object in triples with given property and subject type. In addition to these wikidata types, number of times they are used is also provided.


In [None]:
def constraints(p,h):
    
    wiki_rq="""
SELECT  ?tail_type (count(*) as ?cnt) {{
?subject wdt:{0} ?object.
?subject wdt:P31 wd:{1}.  
?object wdt:P31 ?tail_type. 
}}
GROUP BY ?tail_type 
    """
    sparql = "https://query.wikidata.org/sparql"  
    r = requests.get(sparql, params = {'format': 'json', 'query': wiki_rq.format(p,h)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        
        result=pd.io.json.json_normalize(data['results']['bindings'])[["tail_type.value","cnt.value"]]
        result=result.rename(columns={"tail_type.value":"tail_type","cnt.value":"cnt"})
        result["property"]=p
        result["type"]=h
        result["tail_type"]=result.apply(lambda row: row.tail_type[row.tail_type.rfind("/")+1:], axis=1)
    return result
    

In [None]:
#Example:
types=["Q40231"]
properties=["P17"]

final_results=pd.DataFrame()
for i in range(len(types)):
    tmp_df=constraints(properties[i],types[i])
    final_results=final_results.append(tmp_df)
final_results['tail_cnt'] = final_results[["tail_type","cnt"]].apply(lambda x: ':'.join(x.dropna().astype(str)),
    axis=1)
final_results=final_results.groupby(["property","type"])["tail_cnt"].apply(list).reset_index()
print(final_results)

### type_properties
__Input__: a wikidata type

__Output__: Properties and the number of times these properties has been used for instances of this type

In [None]:
def type_properties(c):
    wiki_rq='''
    SELECT  ?prop (COUNT(?prop) AS ?cnt)
WHERE 
{{  
  ?head wdt:P31 wd:{0}.
  ?head ?prop ?obj .
}} GROUP BY ?prop ?typ

    '''
    sparql = "https://query.wikidata.org/sparql"  
    r = requests.get(sparql, params = {'format': 'json', 'query': wiki_rq.format(c)})
    data = r.json()
    if (pd.io.json.json_normalize(data['results']['bindings']).empty):
        return False
    else:
        result=pd.io.json.json_normalize(data['results']['bindings'])
        result=pd.io.json.json_normalize(data['results']['bindings'])[["prop.value","cnt.value"]]
        result=result.rename(columns={"prop.value":"prop", "cnt.value":"count"})
        result["property"]=result.apply(lambda row: row.prop[row.prop.rfind("/")+1:], axis=1)
        result["type"]=c
        del result["prop"]
    return result

In [None]:
#Examples:
types=["Q40231","Q175331"]

final_results=pd.DataFrame()
for i in range(len(types)):
    tmp_df=type_properties(types[i])
    final_results=final_results.append(tmp_df)
final_results=final_results.loc[final_results["property"].str.startswith("P"),]
print(final_results)