***Execute export-countermeasure-tags.ipynb before running this notebook.***

In [1]:
import re
import json

import pandas as pd
from notion.block import CollectionViewBlock

import helpers

In [2]:
MEASURES_DB_URL = 'https://www.notion.so/977d5e5be0434bf996704ec361ad621d?v=fe54f89ca9e04ac799af42b39e1efc4b'
COMPLETENESS_DB_URL = 'https://www.notion.so/dbf0f4ffc03f4417a16d65a6d08eea69?v=845ef4baafd44e72b9c483381b90caae'

In [3]:
client = helpers.get_notion_client()

In [4]:
tags = pd.read_csv('data/various/countermeasure-tags.csv')

In [5]:
measures_db = client.get_block(MEASURES_DB_URL)
completeness_db = client.get_block(COMPLETENESS_DB_URL)

In [6]:
# Notion API seems to take a long time with large tables, but it caches results
measures = helpers.df_from_notion_table(measures_db)

In [7]:
completeness = helpers.df_from_notion_table(
    completeness_db)[['country', 'complete_up_to_date']].dropna()

In [8]:
measures = measures.merge(completeness, on='country')

In [9]:
measures = measures[measures.keywords.apply(len) > 0].copy()

In [10]:
measures.keywords.apply(len).sum()

1973

In [11]:
len(measures)

1293

In [12]:
measures.loc[262]

target_region                                                                        []
source                                https://www.hongkongfp.com/2020/03/21/breaking...
keywords                              [public mask and hygiene supply, social distan...
country                                                                       Hong Kong
date_end_intended                     <notion.collection.NotionDate object at 0x11e2...
implementing_state_province                                                          []
target_state                                                                           
implementing_city_municipality                                                       []
target_city                                                                            
applies_to                                                                             
date_start                                                          2020-03-21 00:00:00
exceptions                      

In [13]:
measures.id.apply(lambda x: None if len(x) == 0 else x).dropna()

0       163
3       772
4       760
5       761
6       759
       ... 
1324    298
1325    354
1326    343
1327    344
1328    347
Name: id, Length: 722, dtype: object

In [14]:
fields_to_use, renamed_fields = (list(cols) for cols in zip(
#     ('id', 'id'), # not reliable
    ('country', 'country'),
#     # maybe worth adding these:
#     ('implementing_state_province', 'state_or_province'),
#     ('implementing_city_municipality', 'municipality'),
#     ('target_country', 'target_country'),
    ('description_of_measure_implemented', 'description'),
    ('source', 'source'),
    ('date_start', 'start_date'),
    ('date_end_intended', 'end_date'),
    ('quantity', 'quantity'),
    ('keywords', 'tags')
))

# other columns were not include b/c too few rows have
# data to bother programming them into the frontend

In [15]:
measures = measures[fields_to_use].copy()
measures.columns = renamed_fields

In [16]:
def expand_tags(measures):
    tagged_rows = []
    for _, measure in measures.iterrows():
        for tag in measure['tags']:
            row = dict(tag=tag, **measure.to_dict())
            del row['tags']
            tagged_rows.append(row)
    return pd.DataFrame(tagged_rows)

In [17]:
measures = expand_tags(measures)

In [19]:
def get_default_quantities():
    quant_map = tags[['tag', 'quantity']].drop_duplicates(subset=['tag'], keep='last')
    quantities = quant_map.quantity[
        quant_map.quantity.str.match(r'[\d.]+').fillna(False)].astype('f')
    quant_map['quantity'] = None
    quant_map.quantity.fillna(quantities, inplace=True)
    return quant_map

In [20]:
measures = measures.merge(get_default_quantities(), on='tag', suffixes=('_measure', ''))

measures.quantity.fillna(measures.quantity_measure, inplace=True)
measures.drop(columns=['quantity_measure'], inplace=True)

In [22]:
measures.to_csv('data/various/countermeasures-by-tag.csv', index=False)