In [42]:
# Supplement Wikidata items about Met's objects

# Script to supplement P31 statements for Met objects in Wikidata
# Start with getting all P31 statements
# Lookup the objectName in the Met database

# SPARQL query to list Met items without the P1932 set:
# https://w.wiki/6NiH

# For running in JupyterLab at https://paws.wmcloud.org
# !pip install wikitables
# !pip install tabulate

import pywikibot
import pandas as pd
from pywikibot import pagegenerators as pg
from pywikibot.data.sparql import SparqlQuery
import logging

from types import SimpleNamespace

import requests
import re
import numpy as np

from tqdm.notebook import tqdm

wikidata_api_url = 'https://www.wikidata.org/w/api.php'

def stringify_pywikibot_target(intarget):
    '''
    Take pywikibot target object and return a string version, or QID, or printed date object
    '''
    returnstring = ''
    # print ('Type', type(intarget))
    if isinstance(intarget, str):
        return intarget
    elif isinstance(intarget, pywikibot.page._wikibase.ItemPage):
        return intarget.id
    elif isinstance(intarget, pywikibot.WbTime):
        return str(intarget)
    return None

def retrieve_claim_propqual(item, inclaimprop, inclaimtarget=None, inqualprop=None, inqualvalue=None):
    ''' Retrieve an entire Wikidata claim if property/qualifier match 
        Only works with returning strings or items, which get returned as QIDs
    '''
    returnlist = []

    item.get(force=True)
    if not item.claims.get(inclaimprop):
        return returnlist
    for statement in item.claims[inclaimprop]:
        if not inclaimtarget:
            returnlist += [stringify_pywikibot_target(statement.target)]
        elif statement.target.id == inclaimtarget:
            # Process qualifiers, if they exist
            if inqualprop and inqualprop in statement.qualifiers:
                for qual in statement.qualifiers[inqualprop]: #iterate over all P1932
                    returnqualstring = stringify_pywikibot_target(qual.target)
                    if inqualvalue:
                        if inqualvalue == returnqualstring:
                            returnlist.append(returnqualstring)
                    else:
                        returnlist.append(returnqualstring)
            elif not inqualprop:
                # inclaimprop-inclaimtarget triple matched, but no qualifier specified
                # Then just return the inclaimtarget QID
                returnlist.append(stringify_pywikibot_target(statement.target))
    return returnlist

def metid_to_objectName (in_df, metid:str):
    '''Lookup met id (integer) in Dataframe loaded from Met CSV'''
    result = None
    foundrow = in_df.loc[in_df['Object ID'] == int(metid)]
    if not foundrow.empty:
        result = foundrow.at[foundrow.index[0],'Object Name']
    return result

def handle_item (item:pywikibot.page.ItemPage):
    # global counter
    
    object_name = None
    matched_qids = None

    counter.total += 1

    tqdm.write(f"{item.id}")

    # Grab Met ID, which should return exactly one value
    metidlist    = retrieve_claim_propqual(item, 'P3634')
    instancelist = retrieve_claim_propqual(item, 'P31')
    if len(metidlist) != 1:
        tqdm.write(f"  {item.id}: Error, Met ID should be exactly one. Instead: {metidlist}")
        return

    metid = metidlist[0]  # Extract the lone Met ID

    # Look up object name
    object_name = metid_to_objectName (metdf, metid)
    if not object_name:
        tqdm.write(f"  {item.id}: Error, object name lookup with Met database returned empty")
        return

    # Grab Wikidata P31 claims
    item.claims.get('P31')
    for statement in item.claims.get('P31'):  # Get instance of

        if 'P1932' in statement.qualifiers:
            for qual in statement.qualifiers['P1932']: #iterate over all
                tqdm.write(f"{item.id}: Error, found unexpected P31|{statement.target.id}|P1932 claims: {qual}")
                # object_name = stringify_pywikibot_target(qual.target)
            continue

        # Output status message
        pbar.set_postfix_str(f"{item.id}: {metid}, should be {object_name}")

        tqdm.write(f"  Adding {item.id}: {metid}, {statement.target.id}, '{object_name}'")

        # Add qualifier with objectName string from Met
        qualifier = pywikibot.Claim(repo, 'P1932')
        try:
            qualifier.setTarget(object_name)
        except ValueError:
            tqdm.write(f"  could not set object name to {object_name}")
            return
        statement.addQualifier(qualifier) # summary=u'Adding a qualifier.'

        counter.statements += 1

    counter.supplemented += 1
    return

if __name__ == '__main__':

    # Init counters
    counterdict = {
        'total': 0,
        'supplemented': 0,
        'statements': 0
    }
    counter = SimpleNamespace(**counterdict)
    
    # LOAD Met CSV
    if metdf.empty:
        metdf = pd.read_csv('MetObjects.csv',low_memory=False)

    # Do SPARQL query to grab all entries of P31->Q18593264 and no qualifier
    # Return QID, Met ID
    QUERY = '''
    SELECT ?item ?thing ?metid WHERE {
      ?item wdt:P3634 ?metid .
      MINUS { ?item p:P31 [ps:P31 ?thing; pq:P1932 [] ] }
      ?item wdt:P31 ?thing .
    } LIMIT 29
    '''

    # For SPARQL generator
    wikidata_site = pywikibot.Site("wikidata", "wikidata")
    repo = wikidata_site.data_repository()
    generator = pg.WikidataSPARQLPageGenerator(QUERY, site=wikidata_site)

    pbar = tqdm(generator) # For progress bar
    for item in pbar:
        handle_item(item)

    # Output final report
    tqdm.write(f"Finished")
    tqdm.write(f"Items examined: {counter.total}")
    tqdm.write(f"Items supplemented: {counter.supplemented}")
    tqdm.write(f"Statements added: {counter.statements}")
    
# Consider adding references
# retrieved - 14 May 2015
# reference URL - http://www.metmuseum.org/collection/the-collection-online/search/435976
# Example: https://www.wikidata.org/wiki/Q18177495

0it [00:00, ?it/s]

Q116370778
  Adding Q116370778: 2977, Q50386939, 'Curtain knob'
Q116331065
  Adding Q116331065: 10014, Q16934861, 'Sewing table'
Q19924863
  Adding Q19924863: 10453, Q3305213, 'Painting'
Q19925264
  Adding Q19925264: 10607, Q3305213, 'Painting'
Q20169619
  Adding Q20169619: 12799, Q3305213, 'Painting'
Q20175025
  Adding Q20175025: 11278, Q3305213, 'Painting'
Q19925429
  Adding Q19925429: 10219, Q3305213, 'Painting, miniature'
Q19930644
  Adding Q19930644: 12825, Q3305213, 'Painting'
Q83560008
  Adding Q83560008: 247, Q11285759, 'Armchair'
Q20177796
  Adding Q20177796: 12684, Q3305213, 'Painting, miniature'
Q29382893
  Adding Q29382893: 1674, Q63495428, 'Side Chair'
Q116370738
  Adding Q116370738: 3069, Q3917316, 'Decanter'
Q29383186
  Adding Q29383186: 24927, Q1907525, 'Pair of flintlock pistols'
Q19925002
  Adding Q19925002: 14316, Q3305213, 'Painting, miniature'
Q116295966
  Adding Q116295966: 246164, Q831869, 'Oinochoe'
Q116241017
  Adding Q116241017: 19875, Q191851, 'Vase'
Q2017260

In [30]:
print (type(item))

<class 'pywikibot.page._wikibase.ItemPage'>
