# Bulk import all Wikidata Properties in Wikibase Instance

This is a notebook from https://github.com/SuLab/WikidataIntegrator/blob/main/notebooks/CreateWikidataPropertiesParallel.ipynb

It get's all Property-IDs from Wikidata and imports it to another Wikibase instance.

In [1]:
%%capture
!pip install wikidataintegrator
from wikidataintegrator import wdi_core, wdi_login

## Get all Wikidata properties

In [2]:
query = """
SELECT DISTINCT ?property ?propertyLabel ?propertyDescription ?propType WHERE {
   ?property wikibase:directClaim ?p ;
             wikibase:propertyType ?propType ;
             schema:description ?propertyDescription ;
             rdfs:label ?propertyLabel .
   FILTER (lang(?propertyLabel) = 'en')
   FILTER (lang(?propertyDescription) = 'en')}
"""
propertiesSparql = wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe=True)
propertiesSparql
datatype_map = {'http://wikiba.se/ontology#CommonsMedia': 'commonsMedia',
                'http://wikiba.se/ontology#ExternalId': 'external-id',
                'http://wikiba.se/ontology#GeoShape': 'geo-shape',
                'http://wikiba.se/ontology#GlobeCoordinate': 'globe-coordinate',
                'http://wikiba.se/ontology#Math': 'math',
                'http://wikiba.se/ontology#Monolingualtext': 'monolingualtext',
                'http://wikiba.se/ontology#Quantity': 'quantity',
                'http://wikiba.se/ontology#String': 'string',
                'http://wikiba.se/ontology#TabularData': 'tabular-data',
                'http://wikiba.se/ontology#Time': 'time',
                'http://wikiba.se/ontology#Url': 'url',
                'http://wikiba.se/ontology#WikibaseItem': 'wikibase-item',
                'http://wikiba.se/ontology#WikibaseLexeme': 'lexeme',
                'http://wikiba.se/ontology#WikibaseForm': 'form',
                'http://wikiba.se/ontology#WikibaseSense': 'sense',
                'http://wikiba.se/ontology#MusicalNotation': 'musical-notation',
                'http://wikiba.se/ontology#WikibaseProperty': 'wikibase-property'}
propertiesSparql['datatype']= ""
for index, row in propertiesSparql.iterrows():
  row["datatype"] = datatype_map[row["propType"]] 
propertiesSparql

Unnamed: 0,property,propType,propertyLabel,propertyDescription,datatype
0,http://www.wikidata.org/entity/P364,http://wikiba.se/ontology#WikibaseItem,original language of film or TV show,language in which a film or a performance work...,wikibase-item
1,http://www.wikidata.org/entity/P360,http://wikiba.se/ontology#WikibaseItem,is a list of,common element between all listed items,wikibase-item
2,http://www.wikidata.org/entity/P611,http://wikiba.se/ontology#WikibaseItem,religious order,order of monks or nuns to which an individual ...,wikibase-item
3,http://www.wikidata.org/entity/P612,http://wikiba.se/ontology#WikibaseItem,mother house,principal house or community for a religious i...,wikibase-item
4,http://www.wikidata.org/entity/P361,http://wikiba.se/ontology#WikibaseItem,part of,object of which the subject is a part (if this...,wikibase-item
...,...,...,...,...,...
9713,http://www.wikidata.org/entity/P7314,http://wikiba.se/ontology#ExternalId,TDV İslam Ansiklopedisi ID,identifier for the Islamic Encyclopedia create...,external-id
9714,http://www.wikidata.org/entity/P7310,http://wikiba.se/ontology#ExternalId,Maine Trail Finder ID,identifier for a trail on the Maine Trail Find...,external-id
9715,http://www.wikidata.org/entity/P7311,http://wikiba.se/ontology#ExternalId,Aozora Bunko author ID,identifier of a list page of works by author o...,external-id
9716,http://www.wikidata.org/entity/P7315,http://wikiba.se/ontology#String,IP Code,identifier which classifies and rates the degr...,string


In [25]:
propertiesSparql.columns

## Login to Wikibase

In [26]:
from getpass import getpass
import pprint
import os

wikibase = "http://removena.katharinabrunner.de/"
api = "http://removena.katharinabrunner.de/api.php"
sparql = "http://removena.katharinabrunner.de:8834/proxy/wdqs/bigdata/namespace/wdq/sparql"
entityUri = wikibase.replace("https:", "http:")+"entity/"

WBUSER = os.environ["WIKIBASE_USER"]
WBPASS = os.environ["WIKIBASE_USER_PASSWORD"]
login = wdi_login.WDLogin(WBUSER, WBPASS, mediawiki_api_url=api)

#### Retrieve all properties from wikibase

In [18]:
def get_properties():
    property_lookup = {}

    query = """
    SELECT ?property ?label
    WHERE {
        ?property a wikibase:Property .
        ?property rdfs:label ?label .
        FILTER (LANG(?label) = "en" )
    }"""

    results = wdi_core.WDItemEngine.execute_sparql_query(query=query, endpoint=sparql)

    for result in results["results"]["bindings"]:
        label = result["label"]["value"].split("/")[-1]
        property_lookup[label] = result["property"]["value"].split("/")[-1]

    return property_lookup

# Dictionary key = name of property, value = Pxxx
property_lookup = get_properties()

#### Function to create properties

In [19]:
def createProperty(login=login, wdprop=None, label="", description="", property_datatype=""):
  if wdprop== None:
    s = []
  else:
    s = [wdi_core.WDUrl(wdprop, prop_nr="P1")]
  localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
  item = localEntityEngine(data=s)
  item.set_label(label)
  item.set_description(description)
  print(item.write(login, entity_type="property", property_datatype=property_datatype))

#create mapping to wikidata property
if "property in wikidata" not in property_lookup:
  createProperty(login, label="property in wikidata", description="The same property in Wikidata", property_datatype="url")

P1


#### Create Wikidata properties

In [21]:
datatype_map = {'http://wikiba.se/ontology#CommonsMedia': 'commonsMedia',
                'http://wikiba.se/ontology#ExternalId': 'external-id',
                'http://wikiba.se/ontology#GeoShape': 'geo-shape',
                'http://wikiba.se/ontology#GlobeCoordinate': 'globe-coordinate',
                'http://wikiba.se/ontology#Math': 'math',
                'http://wikiba.se/ontology#Monolingualtext': 'monolingualtext',
                'http://wikiba.se/ontology#Quantity': 'quantity',
                'http://wikiba.se/ontology#String': 'string',
                'http://wikiba.se/ontology#TabularData': 'tabular-data',
                'http://wikiba.se/ontology#Time': 'time',
                'http://wikiba.se/ontology#Url': 'url',
                'http://wikiba.se/ontology#WikibaseItem': 'wikibase-item',
                'http://wikiba.se/ontology#WikibaseProperty': 'wikibase-property'}


In [22]:
from joblib import Parallel, delayed
import multiprocessing
     
# what are your inputs, and what operation do you want to 
# perform on each input. For example...
inputs = range(10) 
def createPropertyStage(row, property_lookup):
    if row.propertyLabel in property_lookup:
        pass
    else:
        try:
            createProperty(login=login, wdprop=row.property, label=row.propertyLabel, description=row.propertyDescription, property_datatype=datatype_map[row.propType])
        except:
            print("Failed")
 
num_cores = multiprocessing.cpu_count()
     
results = Parallel(n_jobs=num_cores)(delayed(createPropertyStage)(i, property_lookup) for i in propertiesSparql.itertuples())

Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props
Please set P2302 and Q215024

P2
P3
P4
P5
P6
P8
P7
P9
P10
P11
P12
P13
P14
P15
P16
P17
P18
P19
P20
P21
P22
P24
P23
P25
P26
P27
P28
P29
P30
P31
P32
P34
P33
P36
P37
P35
P38
P39
P40
P42
P41
P43
P44
P45
P46
P47
P48
P49
P50
P51
P52
P53
P54
P55
P56
P57
P59
P58
P60
P61
P62
P63
P64
P65
P66
P67
P69
P68
P70
P71
P72
P74
P73
P75
P76
P77
P78
P79
P80
P83
P81
P82
P84
P85
P86
P87
P88
P89
P91
P90
P92
P93
P94
P95
P96
P97
P98
P100
P99
P101
P102
P103
P105
P104
P106
P107
P108
P109
P110
P111
P113
P112
P114
P115
P116
P117
P118
P119
P120
P122
P121
P124
P125
P123
P127
P126
P128
P129
P130
P131
P132
P133
P134
P135
P136
P137
P138
P139
P141
P140
P142
P143
P144
P145
P146
P147
P148
P150
P149
P151
P152
P153
P154
P155
P156
P157
P158
P160
P159
P161
P162
P163
P164
P165
P166
P167
P168
P169
P170
P171
P172
P173
P175
P174
P176
P177
P178
P180
P179
P181
P182
P183
P184
P185
P186
P187
P188
P189
P190
P191
P192
P193
P194
P195
P197
P196
P199
P198
P200
P201
P202
P203
P204
P205
P206
P207
P208
P209
P210
P211
P212
P213
P216
P215
P214
P217
P218
P219
P221
P220
P222
P