In [None]:
import requests
import json
from time import sleep
import csv
import urllib
import datetime

endpointUrl = 'https://wiki.bgbm.org/proxy/wdqs/bigdata/namespace/bdi/sparql'

# if the value passed is '' then the value will be retrieved.  Otherwise, the value is used to screen.
def searchStatementAtWikidata(qIds, prop, value, refPropList):
    # create a string for all of the Wikidata item IDs to be used as subjects in the query
    alternatives = ''
    for qId in qIds:
        alternatives += 'wd:' + qId + '\n'
        
    # create a string for the query
    query = 'select distinct ?id ?statement '
    # if no value was specified, find the value
    if value == '':
        query += '?statementValue '
    if len(refPropList) != 0:
        query += '?reference '
    for refPropIndex in range(0, len(refPropList)):
        query += '?refVal' + str(refPropIndex) + ' '
    query += '''where {
  VALUES ?id
{
''' + alternatives + '''}
  ?id p:'''+ prop + ''' ?statement.
  ?statement ps:'''+ prop
    
    if value == '':
        query += ' ?statementValue.'
    else:
        query += ' wd:' + value + '.'

    if len(refPropList) != 0:
        query += '''
  optional {
    ?statement prov:wasDerivedFrom ?reference.'''
        for refPropIndex in range(0, len(refPropList)):
            query +='''
    ?reference pr:''' + refPropList[refPropIndex] + ''' ?refVal''' + str(refPropIndex) + '''.'''
        query +='''
        }'''
    query +='''
  }'''
    #print(query)

    returnValue = []
    acceptMediaType = 'application/json'
    r = requests.get(endpointUrl, params={'query' : query}, headers=generateHeaderDictionary(acceptMediaType))
    data = r.json()
    results = data['results']['bindings']
    # ********** NOTE: need to deal with case where there are more than one reference per result
    # This will result in several results with the same qNumeber, orcid, and referenceHash
    for result in results:
        # remove wd: 'http://www.wikidata.org/entity/'
        qNumber = extractFromIri(result['id']['value'], 4)
        # remove wds: 'http://www.wikidata.org/entity/statement/'
        noDomain = extractFromIri(result['statement']['value'], 5)
        # need to remove the qNumber that's appended in front of the UUID
        
        # NOTE: formerly used this:
        #statementUuid = noDomain.partition(qNumber + '-')[2]
        # However, there was at least one case where the appended qNumber had a lower case Q and failed to match.
        # So needed a different approach.
        pieces = noDomain.split('-')
        lastPieces = pieces[1:len(pieces)]
        s = "-"
        statementUuid = s.join(lastPieces)

        # if no value was specified, get the value that was found in the search
        if value == '':
            statementValue = result['statementValue']['value']
        if len(refPropList) != 0:
            if 'reference' in result:
                # remove wdref: 'http://www.wikidata.org/reference/'
                referenceHash = extractFromIri(result['reference']['value'], 4)
            else:
                referenceHash = ''
            referenceValues = []
            for refPropIndex in range(0, len(refPropList)):
                if 'refVal' + str(refPropIndex) in result:
                    refVal = result['refVal' + str(refPropIndex)]['value']
                    # if it's a date, it comes down as 2019-12-05T00:00:00Z, but the API wants just the date: 2019-12-05
                    #if referenceProperty == 'P813': # the likely property is "retrieved"; just leave it if it's another property
                    #    referenceValue = referenceValue.split('T')[0]
                else:
                    refVal = ''
                referenceValues.append(refVal)
        resultsDict = {'qId': qNumber, 'statementUuid': statementUuid}
        # if no value was specified, get the value that was found in the search
        if value == '':
            resultsDict['statementValue'] = statementValue
        if len(refPropList) != 0:
            resultsDict['referenceHash'] = referenceHash
            resultsDict['referenceValues'] = referenceValues
        returnValue.append(resultsDict)

    # delay a quarter second to avoid hitting the SPARQL endpoint too rapidly
    sleep(0.25)
    
    return returnValue

# search for any of the "label" types: label, alias, description
def searchLabelsDescriptionsAtWikidata(qIds, labelType, language):
    # create a string for all of the Wikidata item IDs to be used as subjects in the query
    alternatives = ''
    for qId in qIds:
        alternatives += 'wd:' + qId + '\n'
        
    if labelType == 'label':
        predicate = 'rdfs:label'
    elif labelType == 'alias':
        predicate = 'skos:altLabel'
    elif labelType == 'description':
        predicate = 'schema:description'
    else:
        predicate = 'rdfs:label'        
        
    # create a string for the query
    query = 'select distinct ?id ?string '
    query += '''where {
  VALUES ?id
{
''' + alternatives + '''}
  ?id '''+ predicate + ''' ?string.
  filter(lang(?string)="''' + language + '''")
  }'''
    #print(query)

    returnValue = []
    acceptMediaType = 'application/json'
    r = requests.get(endpointUrl, params={'query' : query}, headers=generateHeaderDictionary(acceptMediaType))
    data = r.json()
    results = data['results']['bindings']
    for result in results:
        # remove wd: 'http://www.wikidata.org/entity/'
        qNumber = extractFromIri(result['id']['value'], 4)
        string = result['string']['value']
        resultsDict = {'qId': qNumber, 'string': string}
        returnValue.append(resultsDict)

    # delay a quarter second to avoid hitting the SPARQL endpoint too rapidly
    sleep(0.25)
    
    return returnValue