In [5]:
# GLAM Double Vision browser
# Jupyter notebook to display Met Museum objects and associated Wikidata item for comparison
# Uses ipywidgets 

# !pip install --upgrade pip
# !pip install jsonpath-ng

import requests
import ipywidgets as widgets
from IPython.display import display, HTML
from jsonpath_ng import jsonpath, parse
import hashlib

wikidata_haswbstatement_query_url='https://www.wikidata.org/w/api.php?action=query&format=json&list=search&srsearch=haswbstatement:P3634={}'
wikidata_json_url='https://www.wikidata.org/wiki/Special:EntityData/{}.json'

def get_titles(data) -> list:
    ''' 
    Given JSON from Mediawiki's "Special:EntityData", return all Wikidata qids found in "titles"
    '''
    titles = []
    if 'query' in data and 'search' in data['query']:
        for result in data['query']['search']:
            if 'title' in result:
                titles.append(result['title'])
    return titles

def check_query_url(url) -> list:
    ''' 
    Get data from the Mediawiki URL in json format
    '''
    response = requests.get(url)
    try:
        json_data = response.json()
        return get_titles(json_data)
    except ValueError:
        return None

def check_metid(metid) -> list:
    ''' 
    Query Wikidata to see if the Met Object ID exists as P3634
    using the haswbstatement search URL for Wikidata/Wikibase
    
    Returns a list of Q numbers, but should really only be one
    '''
    return check_query_url(wikidata_haswbstatement_query_url.format(metid))

def commonsfile_to_url(filename:str) -> str:
    '''
    Convert 'Foo.jpg' to full URL on Commons using MD5 sum, such as
    https://upload.wikimedia.org/wikipedia/commons/0/0a/Foo.jpg
    '''
    if not filename:
        return None
    normalfilename = filename.replace(" ", "_")
    hashvalue = hashlib.md5(normalfilename.encode('utf-8')).hexdigest()
    part1 = hashvalue[:1]
    part2 = hashvalue[:2]
    fullurl = f"https://upload.wikimedia.org/wikipedia/commons/{part1}/{part2}/{str(normalfilename)}"
    return fullurl

def on_button_clicked(b):
    '''
    Handle interface button click to read the textarea widget, and populate Select widget.
    These items can be one per line, or separated by commas on a single line.
    '''
    content = textbox.value
    lines = content.strip().split('\n')
    identifiers = []
    for line in lines:
        parts = line.strip().split(',')
        identifiers.extend([x.strip() for x in parts])
    listbox.options = identifiers

def on_list_item_selected(change):
    '''
    Handle a selection event for the Select widget
    '''
    col1.clear_output() # Col 1 is Met content
    col2.clear_output() # Col 2 is Wikidata content
    
    objectID = change['new']
    
    # Grab Met API info for this object
    url = apiurlbox.value + objectID  # Craft Met API URL
    response = requests.get(url)
    try:
        data = response.json()
        response.raise_for_status() # In case of 404 or other odd error
    except ValueError:
        return None
    except requests.exceptions.HTTPError as e:
        print('Met API lookup fail:', str(e))

    # Safely retrieve these fields if they exist from Met API
    title = data.get('title')
    department = data.get('department')
    isPublicDomain = data.get('isPublicDomain')
    primaryImage = data.get('primaryImage')

    # Fetch from Wikidata: info for this Met object ID
    wd_qid = check_metid(objectID) # Returns a list of QIDs for this Met object ID
    wd_qid = wd_qid[0] if len(wd_qid) > 0 else None  # Should be 0 or 1 results. If more, then weird...

    # Setup jsonpath to parse complex JSON from Wikidata/Wikibase
    wd_imagefilename = None
    if wd_qid:
        wd_response = requests.get(wikidata_json_url.format(wd_qid))
        try:
            wd_response.raise_for_status() # In case of 404 or other odd error
            content = wd_response.json()
        except requests.exceptions.HTTPError as e:
            print('Wikidata EntityData lookup fail:', str(e))

        jsonpathstr = f"$.entities.{wd_qid}.claims.P18[0].mainsnak.datavalue.value"
        jsonpath_expr = parse(jsonpathstr)
        wd_imagefilename = [match.value for match in jsonpath_expr.find(content)]
        wd_imagefilename = wd_imagefilename[0] if len(wd_imagefilename) > 0 else None
    
    with col1:  # Met content
        display(widgets.HTML(f"<H2>Met Museum</H2>"))
        if primaryImage:
            display(widgets.HTML(f"<img style='max-width: 256px; height: auto; ' src='{primaryImage}' />"))
        display(widgets.HTML(f"<p><b>Object ID:</b> <a href='https://www.metmuseum.org/art/collection/search/{objectID}'>{objectID}</a></p>"))
        display(widgets.HTML(f"<p><b>Title:</b> {title}</p>"))
        display(widgets.HTML(f"<p><b>Department:</b> {department}</p>"))
        display(widgets.HTML(f"<p><b>Public domain:</b> {isPublicDomain}</p>"))
    with col2:  # Wikidata content
        display(widgets.HTML(f"<H2>Wikidata/Commons</H2>"))
        commonsurl = commonsfile_to_url(wd_imagefilename)
        if commonsurl:
            display(widgets.HTML(f"<img style='max-width: 256px; height: auto; ' src={commonsurl} />"))
        elif objectID:
            display(widgets.HTML(f"<p><b>GLAMingest link:</b> <a href='https://glamingest.toolforge.org/metid/{objectID}'>{objectID}</a></p>"))
        display(widgets.HTML(f"<p><b>Wikidata item:</b> <a href='https://wikidata.org/wiki/{wd_qid}'>{wd_qid}</a></p>"))
        display(widgets.HTML(f"<p><b>Image:</b> <a href=https://commons.wikimedia.org/wiki/File:{wd_imagefilename}>{wd_imagefilename}</a></p>"))

if __name__ == '__main__':
    apiurlbox = widgets.Text(description="API URL", value="https://collectionapi.metmuseum.org/public/collection/v1/objects/")
    textbox = widgets.Textarea(description="Met IDs:")
    button = widgets.Button(description="Populate list")
    button.on_click(on_button_clicked)
    listbox = widgets.Select(options=[], description="Object ID:", rows=15)
    listbox.observe(on_list_item_selected, names='value')
    # output = widgets.Output()

    left_box = widgets.VBox([textbox, apiurlbox, button, listbox], layout=widgets.Layout(flex='1 1 auto'))
    col1 = widgets.Output()
    col2 = widgets.Output()
    right_columns = widgets.HBox([col1, col2], layout=widgets.Layout(flex='1 1 auto'))
    right_box = widgets.VBox([right_columns], layout=widgets.Layout(flex='1 1 auto'))
    ui = widgets.HBox([left_box, right_box], layout=widgets.Layout(justify_content='space-between'))

    display(ui)

    # Simple query in Wikidata to generate 100 random Met IDs to work with:
    # https://w.wiki/6J$C

HBox(children=(VBox(children=(Textarea(value='', description='Met IDs:'), Text(value='https://collectionapi.me…