In [3]:
from tools.datasets import *

glasgow_truthy_url = get_wikidata_link("Q4093", flavor="simple")
glasgow_truthy_json = "Q4093.json"

fetch_dataset(glasgow_truthy_url, glasgow_truthy_json)

File Q4093.json was already downloaded. Skipping...


In [2]:
import json

with wrap_open(glasgow_truthy_json) as fp:
    glasgow_truthy_dict = json.load(fp)

In [3]:
predicates = glasgow_truthy_dict["entities"]["Q4093"]["claims"]

print(predicates.keys())

dict_keys(['P190', 'P131', 'P373', 'P227', 'P17', 'P94', 'P281', 'P421', 'P473', 'P300', 'P625', 'P31', 'P613', 'P910', 'P948', 'P982', 'P646', 'P1082', 'P901', 'P1036', 'P856', 'P1464', 'P1465', 'P1566', 'P1740', 'P1792', 'P605', 'P206', 'P935', 'P214', 'P47', 'P1997', 'P2633', 'P1281', 'P242', 'P2046', 'P1343', 'P1255', 'P268', 'P906', 'P998', 'P2468', 'P1617', 'P3616', 'P3120', 'P3417', 'P2347', 'P1225', 'P3219', 'P4527', 'P1549', 'P6', 'P2716', 'P1889', 'P5573', 'P949', 'P4672', 'P1151', 'P1667', 'P18', 'P1830', 'P1376', 'P244', 'P1705', 'P1448', 'P2936', 'P1296', 'P691', 'P6766', 'P6849', 'P7350', 'P836', 'P7867', 'P7818', 'P7829', 'P7827', 'P7832', 'P7859', 'P7959'])


In [4]:


def get_edges(entity_df):
    """Generate triples: [<subject>, <predicate>, <object>].
        Only consider objects for which there is a linked entity"""
    
    root = entity_df["entities"]
    subjects = list(entity_df["entities"].keys())
    
    predicates = [(subject, claim, root[subject]["claims"][claim]) for subject in subjects for claim in root[subject]["claims"]]

    tuples = []
    
    for (subject, claim, snaks) in predicates:
        for snak in snaks:
            mainsnak = snak["mainsnak"]
            if mainsnak["snaktype"] == "value" and mainsnak["datavalue"]["type"] == "wikibase-entityid":
                tuples.append((subject, claim, mainsnak["datavalue"]["value"]["id"]))
            
    
    return tuples

get_edges(glasgow_truthy_dict)

[('Q4093', 'P190', 'Q5776'),
 ('Q4093', 'P190', 'Q2090'),
 ('Q4093', 'P190', 'Q23482'),
 ('Q4093', 'P190', 'Q908'),
 ('Q4093', 'P190', 'Q495'),
 ('Q4093', 'P190', 'Q1563'),
 ('Q4093', 'P190', 'Q11739'),
 ('Q4093', 'P190', 'Q216479'),
 ('Q4093', 'P131', 'Q55934339'),
 ('Q4093', 'P131', 'Q68826097'),
 ('Q4093', 'P131', 'Q77606319'),
 ('Q4093', 'P131', 'Q77607750'),
 ('Q4093', 'P17', 'Q145'),
 ('Q4093', 'P17', 'Q174193'),
 ('Q4093', 'P17', 'Q161885'),
 ('Q4093', 'P17', 'Q230791'),
 ('Q4093', 'P421', 'Q6574'),
 ('Q4093', 'P31', 'Q515'),
 ('Q4093', 'P31', 'Q1549591'),
 ('Q4093', 'P910', 'Q6789903'),
 ('Q4093', 'P1464', 'Q8050662'),
 ('Q4093', 'P1465', 'Q9218504'),
 ('Q4093', 'P1740', 'Q8458102'),
 ('Q4093', 'P1792', 'Q6791290'),
 ('Q4093', 'P206', 'Q19721'),
 ('Q4093', 'P206', 'Q183161'),
 ('Q4093', 'P47', 'Q2746326'),
 ('Q4093', 'P47', 'Q209142'),
 ('Q4093', 'P2633', 'Q5535147'),
 ('Q4093', 'P1343', 'Q4114391'),
 ('Q4093', 'P1343', 'Q4173137'),
 ('Q4093', 'P1343', 'Q19180675'),
 ('Q4093', 

In [7]:
wikidata_sparql.setQuery("""
SELECT ?label
WHERE
{
    wd:Q4093 rdfs:label ?label.
    FILTER(LANG(?label) = "en").
}
""")
results = wikidata_sparql.query().convert()

In [13]:
import pandas as pd
results_df = pd.json_normalize(results['results']['bindings'])

In [15]:
results_df[['label.value']].head()

Unnamed: 0,label.value
0,Glasgow


In [31]:
# let's generalize

def sparql_values_in(l, prefix="wd:"):
    return " ".join([prefix + entity for entity in l])

def annotate_entity(entity_list):
    query_list = sparql_values_in(entity_list)
    
    print(query_list)
    
    # multi-line f-strings seem to be broken with this version of jupyter.
    query = """
    SELECT ?entity ?label
    WHERE
    {
        VALUES ?entity {query_list}.
        ?entity rdfs:label ?label.
        FILTER(LANG(?label) = "en").
    }"""
    
    query = query.replace("query_list", query_list)
    wikidata_sparql.setQuery(query)
    results =  wikidata_sparql.query().convert()
    return pd.json_normalize(results['results']['bindings'])
    

def annotate_property(property_list):
    query_list = sparql_values_in(property_list)
    
    query = """
    SELECT ?property ?propertyLabel WHERE {
        ?property a wikibase:Property .
        VALUES ?property {?property_list}
        SERVICE wikibase:label {
            bd:serviceParam wikibase:language "en" .
        }
    }
    """
    
    query = query.replace("?property_list", query_list)
    wikidata_sparql.setQuery(query)
    results =  wikidata_sparql.query().convert()
    return pd.json_normalize(results['results']['bindings'])
    
    
#annotate_entity(["Q42", "Q43"])
annotate_property(["P6", "P10"])


Unnamed: 0,property.type,property.value,propertyLabel.xml:lang,propertyLabel.type,propertyLabel.value
0,uri,http://www.wikidata.org/entity/P6,en,literal,head of government
1,uri,http://www.wikidata.org/entity/P10,en,literal,video
