In [1]:
#
# This notebook is written in a way intended to be view
# with cell inputs hidden. If this comment is visible,
# the data is at the end of the report, you may just ignore
# all the code.
#
# intended usage: convert to html with cell execution enabled
# > jupyter nbconvert --to=html --no-input --ExecutePreprocessor.enabled=True TM-status-report.ipynb
#
# if working in the gis repository, run:
# > make
# to automatically run, convert, and publish of github pages


# Tasking Manager COVID-19 projects summary


This is a Jupyter notebook for quickly summarizing the status of a HOTOSM Tasking Manager campaing using the TM API.


In [2]:
from datetime import datetime
import json
import os

TOKEN = os.environ.get('HOTOSM_TM_TOKEN', '')
print('token:',TOKEN)
# set the text search keyword
run_params= dict(
    textSearch= 'covid',
    mapperLevel='ALL',
    projectStatuses='DRAFT,PUBLISHED,ARCHIVED',
    instance = 'tasks.hotosm.org',
)
# for notebooks used for reporting, it's nice to
# timestamp of the data, since it can get old quickly
now = datetime.utcnow()
print(f'data last updated: {now} UTC')
print(json.dumps(run_params, indent=2))

token: Token TVRRek5qUXlOQS5FWHVKUUEuNEZfMnFZWkpqWjFDTHQ0M245UGdlZXFySm5j
data last updated: 2020-04-24 20:55:15.010666 UTC
{
  "textSearch": "covid",
  "mapperLevel": "ALL",
  "projectStatuses": "DRAFT,PUBLISHED,ARCHIVED",
  "instance": "tasks.hotosm.org"
}


In [3]:

import requests
import json
import pandas as pd

# make sure pandas doesn't truncate the dataframe when rendered
pd.set_option('display.max_rows', 500)

def getAllPages(instance='tasks.hotosm.org', textSearch="", mapperLevel='ALL', projectStatuses='', page = 1, token=''):
    
    # both must be empty, or both must be non-empty
    assert (projectStatuses == '') == (token == ''), "DRAFT and ARCHIVED require auth token"
    request_count = 0
    while True:
    
        url = f"https://{instance}/api/v1/project/search?textSearch={textSearch}&mapperLevel={mapperLevel}&projectStatuses={projectStatuses}&page={page}"
        r = requests.get(url, 
            headers = {
                'Accept-Language': '*',
                'Content-Type': 'application/json',
                'Authorization': token
            }
        )
        print(url)
        request_count += 1
        assert request_count < 100, "api search query stuck in loop"
        
        j = json.loads(r.content)

        df = pd.DataFrame(json.loads(r.content)['results'])
        df['page'] = page
        yield df
        if j['pagination']['hasNext'] is True:
            page = j['pagination']['nextNum']
        else:
            break

            
# concatenate paginated API results into one dataframe
result = pd.concat(getAllPages(**run_params, token=TOKEN), ignore_index=True);
# search API pagination sometimes returns the same project more than once
result = result.drop_duplicates(subset='projectId');


https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PUBLISHED,ARCHIVED&page=1
https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PUBLISHED,ARCHIVED&page=2
https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PUBLISHED,ARCHIVED&page=3
https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PUBLISHED,ARCHIVED&page=4
https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PUBLISHED,ARCHIVED&page=5
https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PUBLISHED,ARCHIVED&page=6
https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PUBLISHED,ARCHIVED&page=7
https://tasks.hotosm.org/api/v1/project/search?textSearch=covid&mapperLevel=ALL&projectStatuses=DRAFT,PU

KeyError: 'results'

# Currently published projects

The table below shows the current status of all published projects that include the `covid` keyword.

Projects are sorted by project id, meaning they are in somewhat choronological order.

In [4]:
# this cells applyies some sematic style to make the
# search summary nicer to read
from matplotlib.colors import LinearSegmentedColormap

# colors from the HOTOSM media kit :)
hotosm_colors = {
  "red": "#D73F3F",
  "red-dark": "#6C2020",
  "red-light": "#FFEDED",
  "orange": "#FAA71E",
  "tan": "#F0EFEF",
  "blue-dark": "#2C3038",
  "blue-grey": "#68707F",
  "blue-light": "#929DB3",
  "grey-light": "#E1E0E0",
  "green": "#53AE62"
  }

# create a colormap for colorign mappedness and validatedness percentages
colors = [hotosm_colors['red'],hotosm_colors['orange'],hotosm_colors['green']]
colormap = LinearSegmentedColormap.from_list('hotosm-RdOrGr', colors, N=20)

# Pandas dataframe render styler functions
def format_priority(val):
    """set cell color based on proejct priority in TM"""
    lookup = {
        'LOW': hotosm_colors['grey-light'],
        'MEDIUM': hotosm_colors['green'],
        'HIGH': hotosm_colors['orange'],
        'URGENT': hotosm_colors['red']
    }
    color = lookup[val]
    return f'background-color: {color}'

def format_status(val: str):
    """set cell color based on proejct status in TM"""
    lookup = {
        'ARCHIVED': hotosm_colors['grey-light'],
        'PUBLISHED': hotosm_colors['green'],
        'DRAFT': hotosm_colors['orange']
    }
    color = lookup[val]
    return f'background-color: {color}'

def format_project_link(id):
    return f"https://{search_params['instance']}/project/{id}"

def format_clickable_project_id(id):
    url = format_project_link(id)
    return f'<a href="{url}">{id}</a>'

# Total count of projects
print(f'total number of projects: {len(result)}')

# We select only the most interesting fields from the dataframe
relevant = result[['projectId','name','percentMapped', 'percentValidated','priority','status', 'campaignTag']]

# Sorting by project id is somewhat cronological
# and also keeps the table order intact as new project are created
sorted_vals = relevant.sort_values('projectId')

# add a Tasking manager link
sorted_vals['hyperlink'] = sorted_vals['projectId'].apply(format_project_link)


# write the table out as CSV, so it will be accessible e.g. gy google spreadsheets
# index=False removes the pandas internal row index column from the CSV
sorted_vals.to_csv('TM-status-report.csv', index=False)

# the hyperlink text is just for the CSV
# notebook projectId is clickable, so the hyperlink is redundant
del sorted_vals['hyperlink']

# apply custom styler functions to final table
styled = sorted_vals.style\
    .background_gradient(cmap=colormap,subset=['percentMapped', 'percentValidated'])\
    .applymap(format_priority, subset=['priority'])\
    .applymap(format_status  , subset=['status'])\
    .format({'projectId': format_clickable_project_id})


display(styled)

NameError: name 'result' is not defined

Notebook maintained by @jarmokivekas (hotosm slack), feedback and development ideas welcome.