# Build a TerriaJS data catalog file from items in a ScienceBase community
This code uses [sciencebasepy](https://github.com/usgs/sciencebasepy) to walk through the parent-child tree at a ScienceBase community looking for items with services and files to include in a [TerriaJS](https://terria.io/) JSON data catalog initialization file. 

I had originally hoped to use the [CSW server](https://my.usgs.gov/confluence/display/sciencebase/Catalog+Services) at ScienceBase to build the catalog but found that 1) the parent-child path of an item on ScienceBase is not represented through nested groups in the resulting catalog and 2) once TerriaJS finds a usable service or file at an item, it stops looking and does not add anymore (not to mention that I couldn't get it find anything other than WMS endpoints). This is a problem for items which have different facets for multiple types of features.

The example below starts at the [Alaska Science Center Data Backup community](https://www.sciencebase.gov/catalog/item/56b3ee22e4b0cc79997fb64b). Change repo_id for the community in which you are interested.

This code currently only checks for ScienceBase WMS and WFS services and CSV files. Investigate other type mappings by comparing the raw json of the item (`print(sb.get_json(sb_url(item_id)))`) and the [TerriaJS documentation](https://docs.terria.io/guide/)

In [None]:
import sciencebasepy #https://github.com/usgs/sciencebasepy
import requests
import json
import re
from owslib.wms import WebMapService  #https://geopython.github.io/OWSLib/
import stringcase #https://pypi.org/project/stringcase/

In [None]:
# set the id of the parent sciencebase item
# ASC Data Backup Community
repo_id = '56b3ee22e4b0cc79997fb64b'
user = ''

# mapping between ScienceBase link types and TerriaJS typs
# link_types = {'ArcGIS Rest Service': 'esri-mapServer',
#               'ArcGIS WMS Service': 'wms',
#               'ScienceBase WMS Service': 'wms-getCapabilities',
#               'ScienceBase WFS Service':'wfs-getCapabilities'
#               }

# this code currently only checks for ScienceBase WMS and WFS services and CSV files.
# investigate other type mappings by comparing the raw json of the item (print(item_json))
# and the TerriaJS documentation - https://docs.terria.io/guide/
link_types = {'ScienceBase WMS Service': 'wms',
              'ScienceBase WFS Service':'wfs'
              }

# blacklist these service layers, we only want layers with data
black_list = ['sb:children', 'sb:childrenBoundingBox', 'sb:boundingBox', 'sb:footprint']

# prepare a location for the output json file
out_json = 'C:\Temp\ASC_groups.json'

In [None]:
def sb_url(id):
    #build a url to a sciencebase item when the id is known
    return 'https://www.sciencebase.gov/catalog/item/{}'.format(id)

In [None]:
def convert(name):
    # changes a name like aerialImagery to Aerial Imagery
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name)
    s2 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1)
    return stringcase.capitalcase(s2)

In [None]:
def lineage(this_json):
    # Gets the names of all parent items up to the repo_id
    this_title = this_json['title']
    parentID = this_json['parentId']
    path_string = this_title
    while not parentID == repo_id:
        g_parent_json = sb.get_json(sb_url(parentID))
        path_string = "{}/{}".format(g_parent_json['title'], path_string)
        parentID = g_parent_json['parentId']
        
    return path_string  

In [None]:
def build_group(n):
    # parameters for a TerriaJS group dictionary
    g = {}
    g['name'] = n
    g['type'] = 'group'
    g['items'] = []
    
    return g

In [None]:
def make_item_dict(name, layers, uri):
    # parameters for a TerriaJS item dictionary
    g = {}
    g['name'] = name
    if name.endswith('.csv'):
        g['type'] = 'csv'
    else:
        g['type'] = link_types[name]
    if layers:    
        g['layers'] = ','.join(layers)
    g['url'] = uri
    
    return g

In [None]:
def check_bb(link_d):
    # looking for layers in the service that are not simply bounding boxes
    wms = WebMapService(link_d['uri'])
    layers = [x for x in wms.contents if x not in black_list]
    
    return layers
    

In [None]:
def add_csvs(files_list, items_dict):
    # look for and add the urls to csv files
    for f in files_list:
        if f['contentType'] == 'text/csv':
            item_dict = make_item_dict(f['name'], '', f['downloadUri'])
            items_dict['items'].append(item_dict)
            
    return items_dict

In [None]:
def add_links(links_list, items_dict):
    # look for and add the urls to the types of services in link_types dictionary
    for link in links_list:
        if link['title'] in link_types:
            data_only = check_bb(link)
            if data_only:
                item_dict = make_item_dict(link['title'], data_only, link['uri'])
                items_dict['items'].append(item_dict)    
    
    return items_dict

In [None]:
def work_children(item_id, l):
    # recursive function to add nested groups until we get to an item that has no children
    this_d = {}
    for i in sb.get_child_ids(item_id):
        item_json = sb.get_json(sb_url(i))
        
        # convert titles in camelCase to Capital Case
        child = convert(item_json['title'])
        this_d = build_group(child)
        
        if 'distributionLinks' in item_json:
            this_d = add_links(item_json['distributionLinks'], this_d)
            
        if 'files' in item_json:
            this_d = add_csvs(item_json['files'], this_d)
        
        l.append(this_d)
        next_l = this_d['items']
        work_children(i, next_l)

In [None]:
# start a sciencebase session
sb = sciencebasepy.SbSession()
# No need to log in for public item
#sb.loginc(user)

In [None]:
# stub out the catalog
# make changes here for starting condition of the map
config = {}
config['catalog'] = [{'name': 'ASC Data Releases', 'type': 'group', 'items': []}]
config['corsDomains'] = ['sciencebase.gov', 'corsproxy.com']
config['initialCamera'] = {"west": -154.47081370130456, "east": -140.1702524470959,
                           "north": 63.654356255141536, "south": 57.252224232856754}
config['homeCamera'] = {"west": -175, "east": -135, "north": 72, "south": 55}
config['baseMapName'] = "Bing Maps Roads"
config['initialViewerMode'] = '2d'

In [None]:
# iterate through the descendants
print('Running...\n')

work_children(repo_id, config['catalog'][0]['items'])

with open(out_json, 'w') as outfile:
    json.dump(config, outfile)
print('Done!')

In [None]:
# print the json here if you want to take a look
# this json will be unsorted and looks a mess, but TerriaJS sorts it and the groups and items will be listed 
# alphabetically
print(json.dumps(config, sort_keys=True, indent=4, separators=(',', ': ')))