# Register ScienceBase services with AGOL 

This code walks through a ScienceBase community and registers usable services and files with AGOL and attempts to assign categories to the items based on an existing [group category schema](https://developers.arcgis.com/rest/users-groups-and-items/update-group-items-with-content-categories.htm). Note that this does not check for existing items and will simply add new items which differ only in itemid.

In [None]:
import sciencebasepy #https://github.com/usgs/sciencebasepy
from arcgis.gis import GIS
import requests
import json
import re
from owslib.wms import WebMapService  #https://geopython.github.io/OWSLib/
import stringcase #https://pypi.org/project/stringcase/
import urllib3 # to suppress warnings about lack of certificate verification 
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

Set variables

In [None]:
# set the id of the parent sciencebase community
# ASC Data Backup Community
# geospatialTest/data
sb_id = 'put id here'

# set the id of the AGOL group where you want to create items
# Geospatial Test group
agol_id = 'put id here'
# name of the AGOL group to which services will be added
# Alaska Science Center Data Releases
AGOL_group = 'Geospatial test'
# name of the folder in which to organize items
content_folder = 'Services test'

# SB login parameters
sb_user = ''
sb_pw = ''

# AGOL login parameters
ag_user = ''
ag_pw = '' 

# this code currently only checks for ScienceBase WMS and WFS services and CSV files.
# investigate other type mappings by comparing the raw json of the item (print(item_json))
# and the AGOL type documentation
link_types = {'ScienceBase WMS Service': 'WMS',
              'ScienceBase WFS Service': 'WFS'
              }
# other mappings
# link_types = {'ArcGIS Rest Service': 'esri-mapServer',
#               'ArcGIS WMS Service': 'wms',
#               'ScienceBase WMS Service': 'wms-getCapabilities',
#               'ScienceBase WFS Service':'wfs-getCapabilities'
#               }

# blacklist the following service layers, we only want layers with data
black_list = ['sb:children', 'sb:childrenBoundingBox', 'sb:boundingBox', 'sb:footprint']

It is not possible to assign categories to items through the ArcGIS python api. This can only be done by sending a list of ```itemid : category``` in JSON format to the ```updataCategories``` REST operation. The URL takes the form below:

In [None]:
cs_url = 'http://usgs.maps.arcgis.com/sharing/rest/content/groups/{}/updateCategories'.format(agol_id)

Functions

In [None]:
def sb_url(id):
    #build a url to a sciencebase item when the id is known
    return 'https://www.sciencebase.gov/catalog/item/{}'.format(id)

In [None]:
def lineage(this_json, nibbles):
    # builds a path from the current item up to the top-most parent item
    if not this_json['parentId'] == sb_id:
        parent_id = this_json['parentId']
        parent_json = sb.get_json(sb_url(parent_id))
        parent_name = convert(parent_json['title'])
        nibbles.append(parent_name)
        lineage(parent_json, nibbles)

    # reverse the list
    nibbles.reverse()
    # and we need  /Categories at the beginning of the string to match the AGOLschema
    # and we only want 3 levels of categories, the same depth I specified when I built 
    # categories using sb-items-2-agol-categories
    path_string = '/Categories/' + '/'.join(nibbles[0:3])
    
    return path_string

In [None]:
def convert(name):
    # changes a name like aerialImagery to Aerial Imagery
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name)
    s2 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1)
    s3 = s2.replace('  ', ' ')
    return stringcase.capitalcase(s3)

In [None]:
def getToken(user, pw):
    #returns ssl value and user token
    data = {'username': user,
        'password': pw,
        'referer' : 'https://www.arcgis.com',
        'f': 'json'}
    url  = 'https://www.arcgis.com/sharing/rest/generateToken'
    jres = requests.post(url, data=data, verify=False).json()

    return jres['token'],jres['ssl']

In [None]:
def check_bb(link_d):
    # looking for layers in the service that are not simply bounding boxes
    wms = WebMapService(link_d['uri'])
    layers = [x for x in wms.contents if x not in black_list]
    
    return layers

In [None]:
def add_links(links_list):
    # look for and add the urls to the types of services in link_types dictionary
    for link in links_list:
        if link['title'] in link_types:
            if check_bb(link):
                print(child_name)
                agol_type = link_types[link['title']]
                url = link['uri']
                up_path = lineage(item_json, [])
                tags = up_path.split('/')[2:]
                params = {
                    'type': agol_type, 
                    'title': child_name, 
                    'url': url,
                    'tags': tags
                }
                item_id = gis.content.add(params, folder=content_folder)
                item_id.share(groups=[AGOL_group])
                
                # make a one entry dictionary of itemid: category-based-on-lineage
                obj_d = {str(item_id.id): [up_path]}
                
                # and add it to our global list of items to categorize
                categorize_list.append(obj_d)

In [None]:
def add_file_links(files_list):
    # urls for services and kml endpoints are in different parts of the item_json
    # so they have to be added separately. Just looking for kml files at this point
    for file in files_list:
        f_name = file['name']
        if f_name.endswith('.kml'):
            f_type = 'KML'
            url = file['url']
            up_path = lineage(item_json, [])
            tags = up_path.split('/')[2:]
            params = {
                'type': f_type, 
                'title': f_name, 
                'url': url,
                'tags': tags
            }
            item_id = gis.content.add(params, folder=content_folder)
            item_id.share(groups=[AGOL_group])

            # make a one entry dictionary of itemid: category-based-on-lineage
            obj_d = {str(item_id.id): [up_path]}

            # and add it to our global list of items to categorize
            categorize_list.append(obj_d)

Begin by starting a ScienceBase session. It is not necessary to log in for public items, but the connection seems more robust if you do. Use ```sb.loginc(sb_user)``` to log in interactively if you don't want to save the password in the script.

In [None]:
sb = sciencebasepy.SbSession()
sb.login(sb_user, sb_pw)

Log in to AGOL through the Python API. Use username to get a prompt for the password or pass both variables: ```GIS(username=ag_user, password=ag_pw)```

In [None]:
gis = GIS(username=ag_user, password=ag_pw)

Create an empty list to which we'll append dictionaries of ```{itemid: categories}```. After the list has been built we'll post it to the update group url

In [None]:
categorize_list = []

Start at the root id of the ScienceBase community and look for services

In [None]:
for child_id in sb.get_ancestor_ids(sb_id):
    item_json = sb.get_json(sb_url(child_id))
    child_name = convert(item_json['title'])
    if 'distributionLinks' in item_json:
        add_links(item_json['distributionLinks'])
    if 'files' in item_json:
        add_file_links(item_json['files'])

Check the results

In [None]:
print(categorize_list)

Log in to AGOL by getting a token.

In [None]:
token = getToken(ag_user, ag_pw)

Create the parameters for our POST request

In [None]:
params = {
    'f': 'json',
    'token': token[0],
    'items': str(categorize_list)
}
# and try to upload the list.
data = requests.post(cs_url, params=params)

Check the results. Note that if the JSON is valid and the operation can find the itemid but the category doesn't match up with anything at the group, the success report will still be true.

In [None]:
print(data.json())