In [42]:
# ####################################### Gonna get us some DATA #######################################################
# ################ Data about Hubway Stations, Electric Charging Stations, and Open Spaces in Boston ###################


import urllib.request
import json
import dml
import prov.model
import datetime
import uuid
import geojson
import csv


def fetch_json(url, item):
    print("Downloading " + str(item) + " Dataset from: " + str(url))
    response = urllib.request.urlopen(url).read().decode("utf-8")
    r = json.loads(response)
    return r


def fetch_geojson(url, item):
    print("Downloading " + str(item) + " Dataset from: " + str(url))
    response = urllib.request.urlopen(url).read().decode("utf-8")
    r = geojson.loads(response)
    rdict = dict(r)
    rlist = rdict['features']
    return rlist


class getdata(dml.Algorithm):
    contributor = 'jhs2018_rpm1995'
    reads = []
    writes = ['jhs2018_rpm1995.hubway',
              'jhs2018_rpm1995.charge',
              'jhs2018_rpm1995.trees',
              'jhs2018_rpm1995.budget',
              'jhs2018_rpm1995.openspaces']

    @staticmethod
    def execute(trial=False):
        # Retrieve datasets
        startTime = datetime.datetime.now()

        # Set up the database connection.
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('jhs2018_rpm1995', 'jhs2018_rpm1995')

        # This fetches a dataset with details about Hubway stations in Boston
        r = fetch_geojson('http://bostonopendata-boston.opendata.arcgis.com/datasets'
                          '/ee7474e2a0aa45cbbdfe0b747a5eb032_0.geojson', "Hubway Stations")
        repo.dropCollection("hubway")
        repo.createCollection("hubway")
        repo['jhs2018_rpm1995.hubway'].insert_many(r)

#         # This fetches a dataset with details about Trees in Boston
#         r = fetch_geojson('http://datamechanics.io/data/Trees%20(1).geojson', "Trees")
#         repo.dropCollection("trees")
#         repo.createCollection("trees")
#         repo['jhs2018_rpm1995.trees'].insert_many(r)

        # This fetches a dataset with details about Charging Stations in Boston
        r = fetch_json('https://boston.opendatasoft.com/explore/dataset/charging-stations/download/?format=json'
                       '&timezone=America/New_York', "Charging Stations")
        repo.dropCollection("charge")
        repo.createCollection("charge")
        repo['jhs2018_rpm1995.charge'].insert_many(r)

#         # This fetches a dataset with details about Open Spaces in Boston
#         r = fetch_geojson('http://bostonopendata-boston.opendata.arcgis.com/datasets/2868d370c55d4d458d4ae2224ef8cddd_7'
#                           '.geojson', "Open Spaces")
#         repo.dropCollection("openspaces")
#         repo.createCollection("openspaces")
#         repo['jhs2018_rpm1995.openspaces'].insert_many(r)

        # This fetches a dataset with details about Budget Facilities in Boston
#         r = fetch_geojson('http://bostonopendata-boston.opendata.arcgis.com/datasets/106ab2544b3d4038ad110b531777931e_0.geojson', "Budget Facilities")
        url = 'http://datamechanics.io/data/Budget_Facilities_FY2017.csv'
        response = urllib.request.urlopen(url).read().decode("utf-8")
        reader = csv.DictReader(response.splitlines())
        dicky = []
        for row in reader:
            print(row)
            dicky.append(row)
        repo.dropCollection("budget")
        repo.createCollection("budget")
        repo['jhs2018_rpm1995.budget'].insert_many(dicky)

        repo.logout()

        endTime = datetime.datetime.now()

        return {"start": startTime, "end": endTime}

    @staticmethod
    def provenance(doc=prov.model.ProvDocument(), startTime=None, endTime=None):

            # Create the provenance document describing everything happening
            # in this script. Each run of the script will generate a new
            # document describing that invocation event.

        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('jhs2018_rpm1995', 'jhs2018_rpm1995')
        doc.add_namespace('alg', 'http://datamechanics.io/algorithm/')  # The scripts are in <folder>#<filename> format.
        doc.add_namespace('dat', 'http://datamechanics.io/data/')  # The data sets are in <user>#<collection> format.
        doc.add_namespace('ont', 'http://datamechanics.io/ontology#')  # 'Extension', 'DataResource', 'DataSet',
        # 'Retrieval', 'Query', or 'Computation'.
        doc.add_namespace('log', 'http://datamechanics.io/log/')  # The event log.
        doc.add_namespace('bwod', 'https://boston.opendatasoft.com/explore/dataset/boston-neighborhoods/')  # Boston
        # Wicked Open Data
        doc.add_namespace('hub', 'http://bostonopendata-boston.opendata.arcgis.com/datasets')  # Boston Open Data
        doc.add_namespace('tree', 'http://datamechanics.io/data')
        doc.add_namespace('charge', 'http://bostonopendata-boston.opendata.arcgis.com/datasets')
        doc.add_namespace('openspace', 'http://bostonopendata-boston.opendata.arcgis.com/datasets')
        doc.add_namespace('budget', 'http://bostonopendata-boston.opendata.arcgis.com/datasets')

        this_script = doc.agent('alg:jhs2018_rpm1995#getdata',
                                {prov.model.PROV_TYPE: prov.model.PROV['SoftwareAgent'], 'ont:Extension': 'py'})

# #######

        resource_hubway = doc.entity('hub: geojson', {'prov:label': 'Hubway stations in Boston',
                                                      prov.model.PROV_TYPE: 'ont:DataResource', 'ont:Extension':
                                                          'geojson'})
        get_hubway = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime)
        doc.wasAssociatedWith(get_hubway, this_script)
        doc.usage(get_hubway, resource_hubway, startTime, None, {prov.model.PROV_TYPE: 'ont:Retrieval'})

        resource_trees = doc.entity('tree: geojson', {'prov:label': 'Trees in Boston',
                                                prov.model.PROV_TYPE: 'ont:DataResource', 'ont:Extension': 'geojson'})
        get_trees = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime)
        doc.wasAssociatedWith(get_trees, this_script)
        doc.usage(get_trees, resource_trees, startTime, None, {prov.model.PROV_TYPE: 'ont:Retrieval'})

        resource_charge = doc.entity('charge: json', {'prov:label': 'Charging Stations in Boston',
                                                      prov.model.PROV_TYPE: 'ont:DataResource', 'ont:Extension':
                                                      'json'})
        get_charge = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime)
        doc.wasAssociatedWith(get_charge, this_script)
        doc.usage(get_charge, resource_charge, startTime, None, {prov.model.PROV_TYPE: 'ont:Retrieval'})
        
        resource_budget = doc.entity('budget: geojson', {'prov:label': 'Budget Facilities in Boston',
                                                      prov.model.PROV_TYPE: 'ont:DataResource', 'ont:Extension':
                                                      'geojson'})
        get_budget = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime)
        doc.wasAssociatedWith(get_budget, this_script)
        doc.usage(get_budget, resource_budget, startTime, None, {prov.model.PROV_TYPE: 'ont:Retrieval'})

        resource_openspaces = doc.entity('openspace: geojson', {'prov:label': 'Open Spaces in Boston',
                                                                prov.model.PROV_TYPE: 'ont:DataResource',
                                                                'ont:Extension':
                                                                'geojson'})
        get_openspaces = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime)
        doc.wasAssociatedWith(get_openspaces, this_script)
        doc.usage(get_openspaces, resource_openspaces, startTime, None, {prov.model.PROV_TYPE: 'ont:Retrieval'})
# #######

        hubway = doc.entity('dat:jhs2018_rpm1995hubway', {prov.model.PROV_LABEL: 'hubway', prov.model.PROV_TYPE: 'ont:DataSet'})
        doc.wasAttributedTo(hubway, this_script)
        doc.wasGeneratedBy(hubway, get_hubway, endTime)
        doc.wasDerivedFrom(hubway, resource_hubway, get_hubway, get_hubway, get_hubway)

        trees = doc.entity('dat:jhs2018_rpm1995trees', {prov.model.PROV_LABEL: 'trees', prov.model.PROV_TYPE: 'ont:DataSet'})
        doc.wasAttributedTo(trees, this_script)
        doc.wasGeneratedBy(trees, get_trees, endTime)
        doc.wasDerivedFrom(trees, resource_trees, get_trees, get_trees, get_trees)

        charge = doc.entity('dat:jhs2018_rpm1995charge',
                            {prov.model.PROV_LABEL: 'charge', prov.model.PROV_TYPE: 'ont:DataSet'})
        doc.wasAttributedTo(charge, this_script)
        doc.wasGeneratedBy(charge, get_charge, endTime)
        doc.wasDerivedFrom(charge, resource_charge, get_charge, get_charge, get_charge)
        
        budget = doc.entity('dat:jhs2018_rpm1995budget',
                            {prov.model.PROV_LABEL: 'budget', prov.model.PROV_TYPE: 'ont:DataSet'})
        doc.wasAttributedTo(budget, this_script)
        doc.wasGeneratedBy(budget, get_budget, endTime)
        doc.wasDerivedFrom(budget, resource_budget, get_budget, get_budget, get_budget)

        openspaces = doc.entity('dat:jhs2018_rpm1995openspaces',
                                {prov.model.PROV_LABEL: 'openspaces', prov.model.PROV_TYPE: 'ont:DataSet'})
        doc.wasAttributedTo(openspaces, this_script)
        doc.wasGeneratedBy(openspaces, get_openspaces, endTime)
        doc.wasDerivedFrom(openspaces, resource_openspaces, get_openspaces, get_openspaces, get_openspaces)

        repo.logout()

        return doc


getdata.execute()
doc = getdata.provenance()
print(doc.get_provn())
print(json.dumps(json.loads(doc.serialize()), indent=4))

# eof


Downloading Hubway Stations Dataset from: http://bostonopendata-boston.opendata.arcgis.com/datasets/ee7474e2a0aa45cbbdfe0b747a5eb032_0.geojson
Downloading Charging Stations Dataset from: https://boston.opendatasoft.com/explore/dataset/charging-stations/download/?format=json&timezone=America/New_York
OrderedDict([('\ufeffX', '-71.06962854207255'), ('Y', '42.339609596150176'), ('OBJECTID', '1'), ('Longitude', '-71.0696279488279'), ('Latitude', '42.3396009272291'), ('FID', '0'), ('OBJECTID_1', '2'), ('OBJECTID_12', '100'), ('ID', '56'), ('City_Department', 'Police Department'), ('Capital_Project_Number', 'CPD23867'), ('Capital_Asset_Name', 'Area D-4 Station'), ('Capital_Project_Title', 'Area A-1 and Area D-4 Stations'), ('ID1', '56'), ('Project_Description', 'Install new roofs at two police stations. Replace windows at Area A-1 Station and make foundation and terrace masonry repairs.'), ('Neighborhood', 'South End'), ('Project_Status', 'To Be Scheduled'), ('Operating_Impact', '0'), ('FY17

document
  prefix alg <http://datamechanics.io/algorithm/>
  prefix dat <http://datamechanics.io/data/>
  prefix ont <http://datamechanics.io/ontology#>
  prefix log <http://datamechanics.io/log/>
  prefix bwod <https://boston.opendatasoft.com/explore/dataset/boston-neighborhoods/>
  prefix hub <http://bostonopendata-boston.opendata.arcgis.com/datasets>
  prefix tree <http://datamechanics.io/data>
  
  agent(alg:jhs2018_rpm1995#getdata, [prov:type='prov:SoftwareAgent', ont:Extension="py"])
  entity(hub: geojson, [prov:label="Hubway stations in Boston", prov:type="ont:DataResource", ont:Extension="geojson"])
  activity(log:uuid74fc51a1-03ac-4374-a32f-eb209235f7a0, -, -)
  wasAssociatedWith(log:uuid74fc51a1-03ac-4374-a32f-eb209235f7a0, alg:jhs2018_rpm1995#getdata, -)
  used(log:uuid74fc51a1-03ac-4374-a32f-eb209235f7a0, hub: geojson, -, [prov:type="ont:Retrieval"])
  entity(tree: geojson, [prov:label="Trees in Boston", prov:type="ont:DataResource", ont:Extension="geojson"])
  activity(log



In [48]:
# ############################## Projecting Coordinates and leaving out the boring data ################################

import dml
import prov.model
import datetime
import json
import uuid


class project_coordinates(dml.Algorithm):
    contributor = 'jhs2018_rpm1995'
    reads = ['jhs2018_rpm1995.hubway',                  # We will combine 3 datasets into dataset greenobjects, which
#              'jhs2018_rpm1995.trees',                   # will have just the type of objects (tree, charging station, etc.) and
             'jhs2018_rpm1995.charge',                  # its geographical coordinates
             'jhs2018_rpm1995.budget']
    writes = ['jhs2018_rpm1995.greenobjects']

    @staticmethod
    def extract(cursor, type, megalist):        # This function extracts the type of object we are looking at (eg: tree)
                                                # and its coordinates, and adds them to the list that we will write
        for elements in cursor:                 # into dataset "greenobjects"
            megalist.append({"Type": type, "Location": elements['geometry']['coordinates']})
        return megalist

    @staticmethod
    def execute(trial=False):
        # Retrieve datasets
        startTime = datetime.datetime.now()

        # Set up the database connection.
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('jhs2018_rpm1995', 'jhs2018_rpm1995')

        print("Now running project_coordinates.py")

        objects = []

        hubway = repo.jhs2018_rpm1995.hubway.find()
#         trees = repo.jhs2018_rpm1995.trees.find()
        charge = repo.jhs2018_rpm1995.charge.find()
        # openspaces = repo.jhs2018_rpm1995.openspaces.find()
        budget = repo.jhs2018_rpm1995.budget.find()

        objects = project_coordinates.extract(hubway, "hubway", objects)
#         objects = project_coordinates.extract(trees, "tree", objects)
        objects = project_coordinates.extract(charge, "charge", objects)

        for items in budget:
            if items['City_Department'] == "School Department":
                objects.append({"Type": "budget", "Location": [float(items['Longitude']), float(items['Latitude'])], "Budget": items['Total_Project_Budget']})
        repo.dropCollection("greenobjects")
        repo.createCollection("greenobjects")
        repo['jhs2018_rpm1995.greenobjects'].insert_many(objects)

        repo.logout()

        endTime = datetime.datetime.now()

        return {"start": startTime, "end": endTime}

    @staticmethod
    def provenance(doc=prov.model.ProvDocument(), startTime=None, endTime=None):

            # Create the provenance document describing everything happening
            # in this script. Each run of the script will generate a new
            # document describing that invocation event.

        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('jhs2018_rpm1995', 'jhs2018_rpm1995')
        doc.add_namespace('alg', 'http://datamechanics.io/algorithm/')  # The scripts are in <folder>#<filename> format.
        doc.add_namespace('dat', 'http://datamechanics.io/data/')  # The data sets are in <user>#<collection> format.
        doc.add_namespace('ont', 'http://datamechanics.io/ontology#')  # 'Extension', 'DataResource', 'DataSet',
        # 'Retrieval', 'Query', or 'Computation'.
        doc.add_namespace('log', 'http://datamechanics.io/log/')  # The event log.
        doc.add_namespace('bwod', 'https://boston.opendatasoft.com/explore/dataset/boston-neighborhoods/')  # Boston
        # Wicked Open Data
        doc.add_namespace('ab', 'https://data.boston.gov/dataset/boston-neighborhoods')   # Analyze Boston

        this_script = doc.agent('alg:jhs2018_rpm1995#project_coordinates',
                                {prov.model.PROV_TYPE: prov.model.PROV['SoftwareAgent'], 'ont:Extension': 'py'})

# #######
        resource_hubway = doc.entity('bwod: hubstations', {'prov:label': 'Boston Hubway Stations',
                                                           prov.model.PROV_TYPE: 'ont:DataResource', 'ont:Extension':
                                                           'geojson'})

        resource_trees = doc.entity('alg: trees', {'prov:label': 'Trees in Boston', prov.model.PROV_TYPE:
                                                   'ont:DataResource', 'ont:Extension': 'json'})

        resource_charges = doc.entity('bwod: charging', {'prov:label': 'Charging Stations in Boston',
                                                         prov.model.PROV_TYPE: 'ont:DataResource', 'ont:Extension':
                                                         'json'})

        get_greenobjects = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime,
                                          {
                                                    prov.model.PROV_LABEL: "Locations of Hubway, Charging Stations "
                                                                           "and Trees in Boston",
                                                    prov.model.PROV_TYPE: 'ont:Computation'})

        doc.wasAssociatedWith(get_greenobjects, this_script)

        doc.usage(get_greenobjects, resource_hubway, startTime)
        doc.usage(get_greenobjects, resource_charges, startTime)
        doc.usage(get_greenobjects, resource_trees, startTime)

# #######
        greenobjects = doc.entity('dat:jhs2018_rpm1995_greenobjects',
                                  {prov.model.PROV_LABEL: 'Coordinates of Environment Friendly Assets in Boston',
                                   prov.model.PROV_TYPE: 'ont:DataSet'})
        doc.wasAttributedTo(greenobjects, this_script)
        doc.wasGeneratedBy(greenobjects, get_greenobjects, endTime)
        doc.wasDerivedFrom(greenobjects, resource_hubway, get_greenobjects, get_greenobjects,
                           get_greenobjects)
        doc.wasDerivedFrom(greenobjects, resource_trees, get_greenobjects, get_greenobjects,
                           get_greenobjects)
        doc.wasDerivedFrom(greenobjects, resource_charges, get_greenobjects, get_greenobjects,
                           get_greenobjects)

        repo.logout()

        return doc


project_coordinates.execute()
doc = project_coordinates.provenance()
print(doc.get_provn())
print(json.dumps(json.loads(doc.serialize()), indent=4))

# eof


Now running project_coordinates.py
document
  prefix alg <http://datamechanics.io/algorithm/>
  prefix dat <http://datamechanics.io/data/>
  prefix ont <http://datamechanics.io/ontology#>
  prefix log <http://datamechanics.io/log/>
  prefix bwod <https://boston.opendatasoft.com/explore/dataset/boston-neighborhoods/>
  prefix ab <https://data.boston.gov/dataset/boston-neighborhoods>
  
  agent(alg:jhs2018_rpm1995#project_coordinates, [prov:type='prov:SoftwareAgent', ont:Extension="py"])
  entity(bwod: hubstations, [prov:label="Boston Hubway Stations", prov:type="ont:DataResource", ont:Extension="geojson"])
  entity(alg: trees, [prov:label="Trees in Boston", prov:type="ont:DataResource", ont:Extension="json"])
  entity(bwod: charging, [prov:label="Charging Stations in Boston", prov:type="ont:DataResource", ont:Extension="json"])
  activity(log:uuidb1719a85-9cf1-4b38-a633-ca3141c84634, -, -, [prov:label="Locations of Hubway, Charging Stations and Trees in Boston", prov:type="ont:Computati



In [71]:
import dml
import prov.model
import datetime
import json
import uuid
import folium
import os
from math import cos, asin, sqrt


class display(dml.Algorithm):
    contributor = 'jhs2018_rpm1995'
    reads = ['jhs2018_rpm1995.greenassets']
    writes = ['jhs2018_rpm1995.kmeansdata']

    @staticmethod
    def findcell(value, axis):
        # return min(axis, key=lambda x: abs(x - value))    # Wrong logic... Spent an entire night on this line
        for i in range(1, len(axis)):
            if axis[i] > value:
                return axis[i-1]
            
    @staticmethod
    def distance(lat1, lon1, lat2, lon2):
        p = 0.017453292519943295
        a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
        return 12742 * asin(sqrt(a))
    
    @staticmethod
    def closestpoint(cells, long, lat):     # Will never be able to recreate this again
        return min(cells, key=lambda x: display.distance(lat, long, x[0][1], x[0][0]))
        
    @staticmethod
    def execute(trial=False):
        # Retrieve datasets
        startTime = datetime.datetime.now()

        # Set up the database connection.
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('jhs2018_rpm1995', 'jhs2018_rpm1995')

        print("Now running display.py")

#         dir_path = os.path.dirname(os.path.abspath(__file__))
        dir_path = "C://Users//H//Desktop//course-2018-spr-proj//jhs2018_rpm1995"
        filenamemap = os.path.join(dir_path, "assets.html")
        map_osm = folium.Map(location=[39, -98.1], zoom_start=4)

        assets = repo.jhs2018_rpm1995.greenassets.find()
        budgets = repo.jhs2018_rpm1995.greenobjects.find({"Type": {"$eq": "budget"}})

        # for asset in assets:                                  # Uncomment these four lines to see assets on the map
        #     coords = (asset['Location'][1], asset['Location'][0])
        #     folium.Marker(coords, popup=str(coords)).add_to(map_osm)
        # map_osm.save(filenamemap)

        grid = {}       # This will contain coordinates of the grid as keys, and assets assigned to that grid as values
        cells = []      # To hold just the coordinates of the grid
        megalist = []   # Will hold data to write to database

        i = -71.189
        while i < -70.878:
            j = 42.234
            while j < 42.406:
                coords = (j, i)
                # folium.Marker(coords, popup=str(coords)).add_to(map_osm)      # Uncomment to see grid on map
                # grid[coords] = 0                                              # For overall counts
                grid[coords] = [[0], [0], [0], [0], [0]]                     # [[charge], [hubway], [open spaces], [trees], [budget]]
                cells.append(coords)
                j += 0.01
            i += 0.01
        # map_osm.save(filenamemap)                                             #

        xaxis = []                                                              # Adjust scale of grid here
        i = -71.189
        while i < -70.878:
            xaxis.append(i)
            i += 0.01

        yaxis = []                                                              # Adjust scale of grid here
        i = 42.234
        while i < 42.406:
            yaxis.append(i)
            i += 0.01
            
        budget_coords = []                      # To store coordinates of budgets
        for budget in budgets:
            budget_coords.append([budget['Location'], budget['Budget']])

#         for coords in budget_coords:
#             print("For coords: " + str(coords))
#             cell = display.closestcell(cells, coords[0], coords[1])
#             folium.Marker(cell, icon=folium.Icon(color='green')).add_to(map_osm)
            
        for cell in cells:
            answer = display.closestpoint(budget_coords, cell[1], cell[0])   # Hallelujah
            grid[cell][4][0] += float(answer[1])                              # Storing budget
        
        for asset in assets:                    # This loop finds the cell that the asset belongs to and correspondingly
            y = asset['Location'][1]            # ...increases the count of that asset type in the dictionary
            x = asset['Location'][0]            # ...representation
            typekind = asset['Type']
            ycell = display.findcell(y, yaxis)
            xcell = display.findcell(x, xaxis)
            if (ycell, xcell) in grid:      # O(1) lookup time. Hire me, Google
                # grid[(ycell, xcell)] += 1     # for overall counts
                if typekind == "charge":
                    grid[(ycell, xcell)][0][0] += 1
                elif typekind == "hubway":
                    grid[(ycell, xcell)][1][0] += 1
                elif typekind == "openspace":
                    grid[(ycell, xcell)][2][0] += 1
                elif typekind == "tree":
                    grid[(ycell, xcell)][3][0] += 1

        for coords, counts in grid.items():     # Gonna save to database and display on map
            megalist.append({"coordinates": coords, "charge_count": counts[0][0], "hubway_count": counts[1][0],
                             "open_count": counts[2][0], "tree_count": counts[3][0], "budget": counts[4][0]})
            folium.Marker(coords, popup=str(counts)).add_to(map_osm)

        repo.dropCollection("kmeansdata")
        repo.createCollection("kmeansdata")
        repo['jhs2018_rpm1995.kmeansdata'].insert_many(megalist)
        map_osm.save(filenamemap)

        repo.logout()

        endTime = datetime.datetime.now()

        return {"start": startTime, "end": endTime}

    @staticmethod
    def provenance(doc=prov.model.ProvDocument(), startTime=None, endTime=None):

            # Create the provenance document describing everything happening
            # in this script. Each run of the script will generate a new
            # document describing that invocation event.

        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate('jhs2018_rpm1995', 'jhs2018_rpm1995')
        doc.add_namespace('alg', 'http://datamechanics.io/algorithm/')  # The scripts are in <folder>#<filename> format.
        doc.add_namespace('dat', 'http://datamechanics.io/data/')  # The data sets are in <user>#<collection> format.
        doc.add_namespace('ont', 'http://datamechanics.io/ontology#')  # 'Extension', 'DataResource', 'DataSet',
        # 'Retrieval', 'Query', or 'Computation'.
        doc.add_namespace('log', 'http://datamechanics.io/log/')  # The event log.
        doc.add_namespace('bwod', 'https://boston.opendatasoft.com/explore/dataset/boston-neighborhoods/')  # Boston
        # Wicked Open Data
        doc.add_namespace('ab', 'https://data.boston.gov/dataset/boston-neighborhoods')   # Analyze Boston

        this_script = doc.agent('alg:jhs2018_rpm1995#Combine_Coordinates',
                                {prov.model.PROV_TYPE: prov.model.PROV['SoftwareAgent'], 'ont:Extension': 'py'})

# #######
        resource_greenassets = doc.entity('dat:jhs2018_rpm1995_greenassets',
                                  {prov.model.PROV_LABEL: 'Coordinates of Environment Friendly Assets in Boston',
                                   prov.model.PROV_TYPE: 'ont:DataSet'})

        get_grid = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime,
                                          {
                                                    prov.model.PROV_LABEL: "Locations of Green Assets in Boston in a "
                                                                           "grid representation",
                                                    prov.model.PROV_TYPE: 'ont:Computation'})

        doc.wasAssociatedWith(get_grid, this_script)

        doc.usage(get_grid, resource_greenassets, startTime)

# #######
        grid = doc.entity('dat:jhs2018_rpm1995_kmeans',
                                 {prov.model.PROV_LABEL: 'Coordinates of All Environment Friendly Assets in Grid',
                                  prov.model.PROV_TYPE: 'ont:DataSet'})
        doc.wasAttributedTo(grid, this_script)
        doc.wasGeneratedBy(grid, get_grid, endTime)
        doc.wasDerivedFrom(grid, resource_greenassets, get_grid, get_grid, get_grid)

        repo.logout()

        return doc


# display.execute()
# doc = display.provenance()
# print(doc.get_provn())
# print(json.dumps(json.loads(doc.serialize()), indent=4))

# eof


Now running display.py
document
  prefix alg <http://datamechanics.io/algorithm/>
  prefix dat <http://datamechanics.io/data/>
  prefix ont <http://datamechanics.io/ontology#>
  prefix log <http://datamechanics.io/log/>
  prefix bwod <https://boston.opendatasoft.com/explore/dataset/boston-neighborhoods/>
  prefix ab <https://data.boston.gov/dataset/boston-neighborhoods>
  
  agent(alg:jhs2018_rpm1995#Combine_Coordinates, [prov:type='prov:SoftwareAgent', ont:Extension="py"])
  entity(dat:jhs2018_rpm1995_greenassets, [prov:label="Coordinates of Environment Friendly Assets in Boston", prov:type="ont:DataSet"])
  activity(log:uuidd739b1c6-2aba-4e54-8ea7-673a7893162a, -, -, [prov:label="Locations of Green Assets in Boston in a grid representation", prov:type="ont:Computation"])
  wasAssociatedWith(log:uuidd739b1c6-2aba-4e54-8ea7-673a7893162a, alg:jhs2018_rpm1995#Combine_Coordinates, -)
  used(log:uuidd739b1c6-2aba-4e54-8ea7-673a7893162a, dat:jhs2018_rpm1995_greenassets, -)
  entity(dat:jhs2

