# Open Context Zooarchaeology Measurements

This code gets meaurement data from Open Context to hopefully do some interesting things.

In [6]:
import requests
from time import sleep

class OpenContextAPI():
    """ Interacts with the Open Context API
        to get lists of records for analysis
        
        See API documentation here: https://opencontext.org/about/services
    """
    RECS_PER_REQUEST = 200  # number of records to retrieve per request
    FLATTEN_ATTRIBUTES = True  # make sure attributes are single value, not lists
    RESPONSE_TYPE_LIST = ['metadata', 'uri-meta']
    SLEEP_TIME = .35  # seconds to pause between requests

    def __init__(self):
        self.recs_per_request = self.RECS_PER_REQUEST
        self.sleep_time = self.SLEEP_TIME
        self.flatten_attributes = self.FLATTEN_ATTRIBUTES
        self.response_types = self.RESPONSE_TYPE_LIST
        self.add_standard_num_attribs = True  # automatically add standard numeric attributes to records
        self.attribute_slugs = []  # slug identifiers of additional attributes to include with records
        self.attribute_labels = []
    
    def get_selection_attributes(self, url):
        """ Each Open Context query selects records described by a
            different set of attributes.
            
            This method finds the numeric and general 
            gets attributes available for an Open Context search
        """
        headers = {
            'accept': 'application/json'
        }
        json_data = None
        try:
            sleep(self.sleep_time)  # pause to not overwhelm the API
            print('GET attributes for: ' + url)
            r = requests.get(url, headers=headers)
            r.raise_for_status()
            json_data = r.json()
        except:
            raise('Request fail at: ' + url)
            json_data = None
        if json_data is not None:
            if 'oc-api:has-facets' in json_data:
                for facet in json_data['oc-api:has-facets']:
                    if self.add_standard_num_attribs:
                        if 'rdfs:isDefinedBy' in facet:
                            if 'opencontext.org/vocabularies/' in facet['rdfs:isDefinedBy']:
                                # a facet option defined as part of an Open Context vocabulary
                                # this would have standard numeric attributes
                                if 'oc-api:has-numeric-options' in facet:
                                    for f_opt in facet['oc-api:has-numeric-options']:
                                        slug = f_opt['slug']
                                        label = f_opt['label']
                                        if slug not in self.attribute_slugs:
                                            # add the numeric attribute slug
                                            self.attribute_slugs.append(slug)
                                            if label not in self.attribute_labels:
                                                self.attribute_labels.append(label)
                                            else:
                                                label = label + '[' + slug + ']'
                                                self.attribute_labels.append(label)
    
    
    def get_paged_json_records(self, url, do_paging=True):
        """ gets data from a URL, and follows the next page
           link if "do_paging" is True
        """
        records = []
        headers = {
            'accept': 'application/json'
        }
        json_data = None
        add_params = False 
        params = {}
        params['rows'] = self.recs_per_request
        if len(self.attribute_slugs) > 0:
            params['attributes'] = ','.join(self.attribute_slugs)
        if len(self.response_types) > 0:
            params['response'] = ','.join(self.response_types)
        if self.flatten_attributes:
            params['flatten-attributes'] = 1
        for key, val in params.items():
            if (key + '=') not in url:
                add_params = True
        try:
            sleep(self.sleep_time)  # pause to not overwhelm the API
            print('GET: ' + url)
            if add_params:
                # we need to add request parameters
                r = requests.get(url, params=params, headers=headers)
            else:
                # the parameters are already in the URL
                r = requests.get(url, headers=headers)
            r.raise_for_status()
            json_data = r.json()
        except:
            raise('Request fail at: ' + url)
            json_data = None
        if json_data is not None:
            # successful request
            if 'oc-api:has-results' in json_data:
                # add the new records to the result
                records += json_data['oc-api:has-results']
            if 'next' in json_data and do_paging:
                new_url = json_data['next']
                if isinstance(new_url, str):
                    # recursively get the next page of results
                    records += self.get_paged_json_records(new_url, do_paging)
        return records


In [7]:
url = 'https://opencontext.org/subjects-search/?prop=biol-term-hastaxonomy---eol-p-1---eol-p-1642---eol-p-7678&prop=oc-zoo-has-anat-id---obo-uberon-0013588&prop=oc-zoo-anatomical-meas---oc-zoo-von-den-driesch-bone-meas'
oc_api = OpenContextAPI()
oc_api.get_selection_attributes(url)
records = oc_api.get_paged_json_records(url)
print('Number of records: ' + str(len(records)))

GET attributes for: https://opencontext.org/subjects-search/?prop=biol-term-hastaxonomy---eol-p-1---eol-p-1642---eol-p-7678&prop=oc-zoo-has-anat-id---obo-uberon-0013588&prop=oc-zoo-anatomical-meas---oc-zoo-von-den-driesch-bone-meas
GET: https://opencontext.org/subjects-search/?prop=biol-term-hastaxonomy---eol-p-1---eol-p-1642---eol-p-7678&prop=oc-zoo-has-anat-id---obo-uberon-0013588&prop=oc-zoo-anatomical-meas---oc-zoo-von-den-driesch-bone-meas
GET: https://opencontext.org/subjects-search/?flatten-attributes=1&prop=biol-term-hastaxonomy---eol-p-1---eol-p-1642---eol-p-7678&prop=oc-zoo-has-anat-id---obo-uberon-0013588&prop=oc-zoo-anatomical-meas---oc-zoo-von-den-driesch-bone-meas&attributes=oc-zoo-bd%2Coc-zoo-bp%2Coc-zoo-dd%2Coc-zoo-dp%2Coc-zoo-bfd%2Coc-zoo-sd%2Coc-zoo-gl%2Coc-zoo-dd-2%2Coc-zoo-lep%2Coc-zoo-ll%2Coc-zoo-ld%2Coc-zoo-cd%2Coc-zoo-slc%2Coc-zoo-gb%2Coc-zoo-gll&rows=200&start=200&response=metadata%2Curi-meta
GET: https://opencontext.org/subjects-search/?flatten-attributes=1&pro

In [13]:
records[100]['Bd']
    

22.3