# CSV to GeoBlacklight JSON

### This script takes an input CSV of metadata and converts it to a GeoBlacklight JSON

Import necessary modules

In [1]:
import csv
import json
import os
from datetime import datetime

This is a dictionary to translate single-value fields into GBLJson

In [2]:
single_dict = {
    "identifier":["layer_slug_s","dc_identifier_s"],
    "title":["dc_title_s"],
    "description":["dc_description_s"],
    "dateIssued":["dct_issued_s"],
    "type":["dc_type_s"],
    "format":["dc_format_s"],
    "geometryType":["layer_geom_type_s"],
    "code":["b1g_code_s"],
    "solrYear":["solr_year_i"],
    "provenance":["dct_provenance_s"]
    }

And this is a dictionary to translate multivalue fields into GBLJson

In [3]:
multiple_dict = {
    "subject":["dc_subject_sm"],
    "keyword":["b1g_keyword_sm"],
    "temporalCoverage":["dct_temporal_sm"],
    "spatialCoverage":["dct_spatial_sm"],
    "publisher":["dc_publisher_sm"],
    "creator":["dc_creator_sm"]
    }

This statement will create a folder to store the jsons if one does not already exist

In [4]:
if not os.path.exists("json"):
    os.mkdir("json")

Open the CSV with the GBL data. Change the string inside the open statement to match your file name

In [5]:
csvfile = open('hennepin-DCAT.csv', 'r')

Reads the CSV into a dictionary and sets the date modified to now.

In [6]:
reader = csv.DictReader(csvfile)
date_modified = datetime.today().strftime('%Y-%m-%d')+"T"+datetime.today().strftime('%X')+"Z"

### The script creates a Python dictionary and adds values from the CSV.

1. A dictionary is created first with default values that are the same for all records


2. Each row is examined for an identifying code. This code separates the records into collections. A folder for each code is created in the json folder so that the jsons can be sorted into their respective collection.


3. The script then goes through the single and multiple dictionaries that were defined above and writes them into the starting dictionary.

### Certain fields  need to be reordered or combined before being added to the dictionary

4. Next, the script looks for the the Bounding Box field and splits the W,S,E,N values into their own variables. These are reordered as W,E,N,S and put inside an well-known text format using ENVELOPE().


5. The references field needs to be created, so the script looks for relevant fields, writes them to a list if they have a value, and then formats and adds the list to the small dictionary before writing the json file.


6. Finally, the unique identifier is pulled out, the output filename is named according to that unique identifier, and the output json file is written. This happens for every row in the CSV, so each record will be written to its own JSON file.

In [7]:
for row in reader:
    code = ""
    ref = []
    
#starting dictionary with default values
    small_dict = {"geoblacklight_version":"1.0","dc_rights_s":"Public","layer_modified_dt":date_modified}
    for key,val in row.items():
        #Creates a new folder for each unique Code
        if key == "code":
            code = val
            if not os.path.exists("json/" + val):
                os.mkdir("json/" + val)
        
#Looks just for the single valued fields and creates a dictionary of them
        if key in single_dict:
            for fieldname in single_dict[key]:
                small_dict[fieldname] = val
        
#Looks for the multivalued fields (split with a pipe '|') and creates a dictionary of them.
        if key in multiple_dict:
            for fieldname in multiple_dict[key]:
                small_dict[fieldname] = val.split('|')
                
# Looks for bounding box, splits the values and re-orders them.
        if key == "spatial":
            val = val.split(',')
            if len(val) == 4: #takes care of bounding box values and calculates centroid
                west = val[0]
                south = val[1]
                east = val[2]
                north = val[3]
                small_dict["solr_geom"] = "ENVELOPE("+west+","+east+","+north+","+south+")"
            else: #if the bounding box doesn't have all coordinates, just write values as null
                small_dict["solr_geom"] = "NULL"
        if key == "landingPage" and val != '':
            to_append = '"http://schema.org/url":"' + val + '"'
            #print(to_append)
            ref.append(to_append)
        if key == "downloadURL" and val != '':
            to_append = '"http://schema.org/downloadUrl":"' + val + '"'
            ref.append(to_append)
        if key == "mapServer" and val != '':
            to_append = '"urn:x-esri:serviceType:ArcGIS#DynamicMapLayer":"' + val + '"'
            ref.append(to_append)
        if key == "featureServer" and val != '':
            to_append = '"urn:x-esri:serviceType:ArcGIS#FeatureLayer":"' + val + '"'
            ref.append(to_append)
        if key == "imageServer" and val != '':
            to_append = '"urn:x-esri:serviceType:ArcGIS#ImageMapLayer":"' + val + '"'
            ref.append(to_append)
    small_dict["dct_references_s"] = '{' + (','.join(ref)) + '}'
    iden = row['identifier']
    filename = iden + ".json"
    
    
#writes to a json with the identifier as the filename 
    with open("json/"+code+"/"+filename, 'w') as jsonfile: 
        json.dump(small_dict,jsonfile,indent=2)

*Script authored by Emily Ruetz @ruetz007; Updated by Karen Majewicz @karenmajewicz*