We need a couple of known things for this setup to work

Initially that'll be a property that can be used to state what other properties map to

As well as a property that can be used to say Items and Properties map to Wikidata.

In [4]:
from SPARQLWrapper import SPARQLWrapper, JSON
from wikidataintegrator import wdi_core, wdi_login
import pandas as pd
import re
import json

%run 'Shared.ipynb'
wbsparql = init_wb_sparql()
wb_endpoint, wb_login = init_wb()

In [10]:
# Entities used to describe the data
entities = {
    # Properties
    'source': {
        "entity_type": "property",
        "label": "Source",
        "aliases": ["Data Source", "source"],
        "description": "Source of the data or mapping. For example 'wikidata'",
        "datatype": "string",
    },
    'source-id': {
        "entity_type": "property",
        "label": "Source ID",
        "aliases": ["ID at Source", "source-id", "ID @ Source"],
        "description": "ID of the item at the source. For example for 'wikidata' perhaps 'P123' or 'Q567'",
        "datatype": "string",
    },
    # Items
    'wikidata': {
        "entity_type": "item",
        "label": "Wikidata",
        "aliases": ["wikidata", "wikidata.org"],
        "description": "Wikidata is a free and open knowledge base that can be read and edited by both humans and machines.",
    },
}

In [13]:
known_entities = {}
with open('./map.json') as f:
    known_entities = json.load(f)

def save_known_map():
    with open('./map.json', 'w') as f:
        json.dump(known_entities, f)

# Create Entities
for key, value in entities.items():
    if key in known_entities:
        print("Entity already exists: " + value['label'] + " is " + known_entities[key])
        continue

    entity = wdi_core.WDItemEngine(mediawiki_api_url=wb_endpoint)
    entity.set_label(value['label'])
    if 'aliases' in value:
        entity.set_aliases(value['aliases'])
    if 'description' in value:
        entity.set_description(value['description'])

    # Write and catch errors, as wikibase.cloud can be a bit slow in allowing us to lookup existing things sometimes
    try:
        if value['entity_type'] == 'item':
            entity.write(login=wb_login)
        elif value['entity_type'] == 'property':
            entity.write(login=wb_login,entity_type=value['entity_type'], property_datatype=value['datatype'])
        else:
            print("Unknown entity type: " + value['entity_type'])
            exit()
        
        known_entities[key] = entity.wd_item_id
        save_known_map()
        print("Created entity: " + key + " is " + entity.wd_item_id)
    except Exception as e:
        if 'label-conflict' in str(e):
            known_entities[key] = re.search(r'\[\[(Property|Item):([^|]+)\|', str(e)).group(2)
            save_known_map()
            print("Entity already exists: " + key + " is " + map[key]['id'])
        else:
            print("Error creating entity: " + str(e))
            exit()

print("These entities should now all exist")


Entity already exists: Source is P347
Entity already exists: Source ID is P348
Created entity: wikidata is Q1
These entities should now all exist
