# Function definition and module import

In [None]:
import requests
import json
import csv
from time import sleep
import pandas as pd

endpoint = 'https://sparql.vanderbilt.edu/sparql'
user_agent_header = 'test/0.1 (https://github.com/HeardLibrary/linked-data/; mailto:steve.baskauf@vanderbilt.edu)'

sparql_request_header = {
        'Accept' : 'application/json',
#        'Content-Type': 'application/sparql-query',
        'Content-Type': 'application/x-www-form-urlencoded',
        'User-Agent': user_agent_header
    }

# Low level functions

def send_sparql_query(query_string, endpoint):
    """Sends a SPARQL query to an endpoint URL. Argument is the query string, returns a list of results."""
    # You can delete the two print statements if the queries are short. However, for large/long queries,
    # it's good to let the user know what's going on.
    #print('querying SPARQL endpoint to acquire item metadata')
    response = requests.get(endpoint, params={'query' : query_string}, headers=sparql_request_header)
    #response = requests.post(endpoint, data=query_string.encode('utf-8'), headers=sparql_request_header)
    #response = requests.post(endpoint, data=dict(query=query_string), headers=sparql_request_header)
    #print(response.text) # uncomment to view the raw response, e.g. if you are getting an error
    try:
        data = response.json()
    except:
        print(response.text)

    # Extract the values from the response JSON
    results = data['results']['bindings']
    
    #print('done retrieving data')
    #print(json.dumps(results, indent=2))
    
    return results

def extract_local_name(iri):
    """Extracts the local name part of an IRI, e.g. a qNumber from a Wikidata IRI. Argument is the IRI, returns the local name string."""
    # pattern is http://www.wikidata.org/entity/Q6386232
    pieces = iri.split('/')
    last_piece = len(pieces)
    return pieces[last_piece - 1]

# Write list of dictionaries to a CSV file
def write_dicts_to_csv(table, filename, fieldnames):
    with open(filename, 'w', newline='', encoding='utf-8') as csv_file_object:
        writer = csv.DictWriter(csv_file_object, fieldnames=fieldnames)
        writer.writeheader()
        for row in table:
            writer.writerow(row)

# Read from a CSV file into a list of dictionaries
def read_dicts_from_csv(filename):
    with open(filename, 'r', newline='', encoding='utf-8') as file_object:
        dict_object = csv.DictReader(file_object)
        array = []
        for row in dict_object:
            array.append(row)
    return array


# Download standard views used in Bioimages images

These are the standard views defined by Baskauf and Kirchoff (2008) and expressed as RDF at http://bioimages.vanderbilt.edu/rdf/stdview.rdf

The code performs a SPARQL query and outputs the view IRIs and labels into a CSV spreadsheet.

In [None]:
query_string = '''prefix dcterms: <http://purl.org/dc/terms/>
prefix dctype: <http://purl.org/dc/dcmitype/>
prefix Iptc4xmpExt: <http://iptc.org/std/Iptc4xmpExt/2008-02-29/>
prefix skos: <http://www.w3.org/2004/02/skos/core#>
select distinct ?cvterm ?partTerm ?partLabel ?viewLabel
from <http://bioimages.vanderbilt.edu/rdf/stdview>
from <http://bioimages.vanderbilt.edu/images>
where {
  ?image a dctype:StillImage.
  ?image Iptc4xmpExt:CVterm ?cvterm.
  ?cvterm skos:broader ?partTerm.
  ?partTerm skos:prefLabel ?partLabel.
  ?cvterm skos:prefLabel ?viewLabel.
  }
order by ?partLabel
'''

results = send_sparql_query(query_string, endpoint)

stdview_list = []
for result in results:
    stdview_dict = {}
    stdview_dict['iri'] = result['cvterm']['value']
    stdview_dict['part_label'] = result['partLabel']['value']
    stdview_dict['view_label'] = result['viewLabel']['value']
    stdview_list.append(stdview_dict)
    
#print(json.dumps(stdview_list, indent=2))
write_dicts_to_csv(stdview_list, 'stdviews_table.csv', ['iri', 'part_label', 'view_label'])
print('done')


# Apply mappings from standard views to AC views

After downloading the standard views into a CSV, columns were added to map the view IRIs to subjectPartLiteral and subjectOrientationLiteral values. 

The IRIs from the manual test data set were used to query the Bioimages RDF using SPARQL to find their view IRIs and then the mappings were applied to generate the AC views.

In [None]:
test_set = pd.read_csv('bioimages_images.csv', na_filter=False, dtype = str)
mapping_table = pd.read_csv('stdviews_table.csv', na_filter=False, dtype = str)
#mapping_table.set_index('iri', inplace=True)
mapping_table.head()

In [None]:
image_list = list(test_set['image_iri'])
image_list_text = '<' + '>\n<'.join(image_list) + '>'
#print(image_list_text)

query_string = '''prefix Iptc4xmpExt: <http://iptc.org/std/Iptc4xmpExt/2008-02-29/>
select distinct ?image ?cvterm
from <http://bioimages.vanderbilt.edu/images>
where {
values ?image {
''' + image_list_text + '''
}
  ?image Iptc4xmpExt:CVterm ?cvterm.
  }
'''
#print(query_string)

results = send_sparql_query(query_string, endpoint)
#print(json.dumps(results, indent=2))


In [None]:
output_list = []
for result in results:
    image_iri = result['image']['value']
    cvterm_iri = result['cvterm']['value']
    image_dict = {'image_iri': image_iri}

    # Look up the subject orientation based on the CVterm IRI retrieved by the SPARQL query
    # The .loc results in a series with a single item, so turn it into a list and get list item 0
    image_dict['subjectPartMap'] = list(mapping_table.loc[mapping_table.iri == cvterm_iri, 'subjectPart_cv_string'])[0]
    image_dict['subjectOrientationMap'] = list(mapping_table.loc[mapping_table.iri == cvterm_iri, 'subjectOrientation_cv_string'])[0]
    
    # Find the values that were used in the human test
    image_dict['subjectPartHuman'] = list(test_set.loc[test_set.image_iri == image_iri, 'ac:subjectPartLiteral'])[0]
    image_dict['subjectOrientationHuman'] = list(test_set.loc[test_set.image_iri == image_iri, 'ac:subjectOrientationLiteral'])[0]
    output_list.append(image_dict)

#print(json.dumps(output_list, indent=0))
fieldnames = ['image_iri', 'subjectPartHuman', 'subjectPartMap', 'subjectOrientationHuman', 'subjectOrientationMap']
write_dicts_to_csv(output_list, 'mapping_test.csv', fieldnames)

print('done')