In [None]:
#
# This notebook is written in a way intended to be view
# with cell inputs hidden. If this comment is visible,
# the data is at the end of the report, you may just ignore
# all the code.
#
# intended usage: convert to html with cell execution enabled
# > jupyter nbconvert --to=html --no-input --ExecutePreprocessor.enabled=True TM-status-report.ipynb
#
# if working in the gis repository, run:
# > make
# to automatically run, convert, and publish of github pages


# Tasking Manager COVID-19 Projects Summary


This is a Jupyter notebook for quickly summarizing the status of a HOTOSM Tasking Manager campaing using the TM API.
Data show here is periodically updated by and automated bot.


In [1]:
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from IPython.core.display import display, HTML
# make sure pandas doesn't truncate the dataframe when rendered
pd.set_option('display.max_rows', 500)

import tasking_manager_client as tm

            
# set the text search keyword
textSearch = 'covid'
projectStatuses = 'ARCHIVED,PUBLISHED,DRAFT'
instance = 'tasks.hotosm.org'

# for notebooks used for reporting, it's nice to
# timestamp of the data, since it can get old quickly
now = datetime.utcnow()
print(f'data last updated: {now} UTC')
print(f'tasking manager:   {instance}')
print(f'search keyword:    {textSearch}')

data last updated: 2020-07-16 11:46:31.663174 UTC
tasking manager:   tasks.hotosm.org
search keyword:    covid


In [3]:

# concatenate paginated API results into one dataframe
result = pd.concat(tm.v2.project_search(textSearch=textSearch, projectStatuses=projectStatuses), ignore_index=True);
# search API pagination sometimes returns the same project more than once
result = result.drop_duplicates(subset='projectId');


In [4]:
result.count()

activeMappers        270
campaigns            270
country              270
dueDate                0
lastUpdated          270
locale               270
mapperLevel          270
name                 270
organisationLogo     245
organisationName     267
percentMapped        270
percentValidated     270
priority             270
projectId            270
shortDescription     270
status               270
totalContributors    270
page                 270
dtype: int64

In [6]:
def get_summaries():
  for idx, project in result.iterrows():


    endpoint = f'/api/v2/projects/{project.projectId}/statistics/'
    r = requests.get('https://' + tm.DEFAULT_INSTANCE_API + endpoint)
    df = pd.io.json.json_normalize(r.json())
    yield df

summaries = pd.concat(get_summaries())
# summaries

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
cols_to_use = summaries.columns.difference(result.columns)
result = result.merge(summaries, suffixes=('', '_duplicate'), how='outer')
result.drop(result.filter(regex='_duplicate$').columns.tolist(),axis=1, inplace=True)
result.columns

## Current Published and Archived Projects

In [None]:

# Total count of projects
print('Published projects:', len(result[result.status == 'PUBLISHED']))
print('Archived projects: ', len(result[result.status == 'ARCHIVED']))
print('Total projects:    ', len(result))

fig, axs = plt.subplots(1,2, sharey=True, figsize=(10,5))
ax = axs[0]
ax.set_ylabel('Project Count')
ax.set_xlabel('Percent completed')
ax.hist(result.percentMapped)
ax.set_title('Percent Mapped')
ax.grid()
ax = axs[1]
ax.grid()
ax.set_title('Percent Validated')
ax.hist(result.percentValidated)
plt.show()



The table below shows the current status of all published and archived projects that include the `covid` keyword.

Projects are sorted by project id, meaning they are in somewhat choronological order.

You can click the project Id to open it in TM.

In [7]:
# We select only the most interesting fields from the dataframe

result['projectArea(in sq.km)'] = result['projectArea(in sq.km)'].apply(int)
result['country'] = result['country'].apply(lambda val: val[0] if isinstance(val, list) else val)

sorted_vals = result.sort_values('projectId')
sorted_vals['hyperlink'] = sorted_vals['projectId'].apply(tm.style.format_project_link)
del sorted_vals['shortDescription']
del sorted_vals['aoiCentroid.type']
del sorted_vals['organisationLogo']
sorted_vals.to_csv('TM-status-report.csv', index=False)
del sorted_vals['hyperlink']



# apply custom styler functions to final table
styled = sorted_vals.style\
    .background_gradient(cmap=tm.style.hotosm_cmap(),subset=['percentMapped', 'percentValidated'])\
    .applymap(tm.style.format_priority, subset=['priority'])\
    .applymap(tm.style.format_status, subset=['status'])\
    .format({'projectId': tm.style.format_clickable_project_id})


display(styled)

KeyError: 'projectArea(in sq.km)'

Notebook maintained by @jarmokivekas (GitHub & HOTOSM Slack), feedback and development ideas welcome.