In [1]:
import re
import json

import pandas as pd
from notion.block import CollectionViewBlock

import helpers

In [2]:
TAG_HIERARCHY_URL = 'https://www.notion.so/Tag-hierarchy-Features-db9799312efa4f88851e8d49393bbb16'

In [3]:
client = helpers.get_notion_client()

In [4]:
# fetch & compile features df from Notion

def get_tag_tables():
    tag_hierarchy_page = client.get_block(TAG_HIERARCHY_URL)
    return [child for child in tag_hierarchy_page.children
            if isinstance(child, CollectionViewBlock)
            and child.title != 'Unhandled']

def get_feature_df_from_table(tag_table):
    df = helpers.df_from_notion_table(tag_table)
    df['category'] = tag_table.title
    df = df[df.tags.apply(len) != 0] # leave out rows that have no tags
    return df[['category', 'feature', 'tags', 'description', 'quantities', 'aggregation']]

def get_features_df():
    return pd.concat([get_feature_df_from_table(tag_table)
                      for tag_table in get_tag_tables()]).reset_index(drop=True)

In [5]:
# parse out individual tags from features df

def get_tag_description(tag, description):
    """ Some descriptions are subidivided for each tag.
        Get only that tag's description if so. """
    match = re.search(f'[*_]{{2}}({tag}|all others)[*_:]{{3}}(.+)', description, re.IGNORECASE)

    if match is None:
        tag_specific_descriptions = re.search(r'[*_]{2}[\w ]+[*_:]{3}', description)
        parsed_description = '' if tag_specific_descriptions else description
    else:
        parsed_description = match[2].strip()

    return None if parsed_description in ('', '?') else parsed_description

def get_tag_quantity(tag, quantities):
    """ Logic for parsing quantity for each tag. """
    if quantities in ['1', '1 for all']:
        return 1

    match = re.search(f'({tag}|all others): ([ \\d\\.]+)', quantities, re.IGNORECASE)
    return float(match[2]) if match else quantities if len(quantities) > 0 else None

def get_tags_df(features):
    tags = []
    for feature in features.itertuples():
        for tag in feature.tags:
            tags.append({
                'category': feature.category,
                'feature': feature.feature,
                'tag': tag,
                'description': get_tag_description(tag, feature.description),
                'quantity': get_tag_quantity(tag, feature.quantities),
                'aggregation': feature.aggregation
            })
    return pd.DataFrame(tags)

In [6]:
def tags_to_api_object(tags):
    adjusted_tags = tags.drop(columns=['quantity']).rename(columns={'tag': 'name'})

    tags_dict = {}
    for _, tag in adjusted_tags.iterrows():
        tags_dict[tag['name']] = dict(tag)

    return {**helpers.get_api_metadata(client),
            'data': {'containment_tags': tags_dict}}

In [7]:
def store_tags(tags):
    tags.to_csv('data/various/countermeasure-tags.csv', index=False)
#     with open('data/tags.json', 'w') as fp:
#         json.dump(tags_to_api_object(tags), fp, separators=(',', ':'))

In [8]:
features = get_features_df()
tags = get_tags_df(features)

In [9]:
store_tags(tags)

In [10]:
tags.groupby('tag').filter(lambda x: len(x) > 1)

Unnamed: 0,category,feature,tag,description,quantity,aggregation


In [11]:
tags[pd.isnull(tags.description)
     | pd.isnull(tags.aggregation)
     | pd.isnull(tags.quantity)].fillna('???')

Unnamed: 0,category,feature,tag,description,quantity,aggregation
34,Public Hygiene,Public cleaning,public facility cleaning,???,1,sum unique
43,Public Hygiene,Hygiene and distancing advice,coronavirus educational activities,"Adverts, announcements and other activities ai...",???,???
44,Public Hygiene,Hygiene and distancing advice,handwashing encouragement,any measures aimed at getting the general publ...,???,???
45,Public Hygiene,Hygiene and distancing advice,public mask encouragement,???,???,???
46,Public Hygiene,Hygiene and distancing advice,stay home advice,advice for people to stay home if possible,???,???
47,Public Hygiene,Hygiene and distancing advice,space minimum,advice for people to maintain a minimum amount...,???,???
48,Public Hygiene,Hygiene and distancing advice,social distancing advise,???,???,???
49,Public Hygiene,Hygiene and distancing advice,outdoor person density,advice on maximum safe density of people in ou...,???,???
50,Public Hygiene,Hygiene and distancing advice,indoor person density,advice on maximum safe density of people in in...,???,???
51,Public Hygiene,Hygiene and distancing advice,handshakes banned,banning of handshakes as a greeting,???,???
