In [1]:
import pandas as pd
import numpy as np
import re

In [3]:
from notion.client import NotionClient
from notion.block import CollectionViewBlock

In [4]:
# You'll need to find and store your Notion token for this. When logged
# into Notion, look in the web console for a cookie named "token_v2".
# Copy the contents into a file in this directory named .notion-token

with open('.notion-token', 'r') as token_file:
    NOTION_TOKEN = token_file.read().strip()
    
client = NotionClient(token_v2=NOTION_TOKEN)

In [93]:
tag_hierarchy_page = client.get_block(
    'https://www.notion.so/Tag-hierarchy-Features-db9799312efa4f88851e8d49393bbb16')

In [94]:
tag_tables = [child for child in tag_hierarchy_page.children
          if isinstance(child, CollectionViewBlock)
          and child.title != 'Unhandled']

In [95]:
def get_feature_df(tag_table):
    rows = tag_table.collection.get_rows()
    df = pd.DataFrame([row.get_all_properties() for row in rows])
    df['category'] = tag_table.title
    df = df[df.tags.apply(len) != 0] # leave out rows that have no tags
    return df[['category', 'feature', 'tags', 'description', 'quantities']]

In [96]:
features = pd.concat(
    [get_feature_df(tag_table) for tag_table in tag_tables]).reset_index(drop=True)

In [88]:
counts = {}
for feature in features.itertuples():
    for tag in feature.tags:
        counts[tag] = counts[tag] + 1 if tag in counts else 1

{k:v for (k, v) in counts.items() if v > 1}

{'natural village quarantine': 2}

In [144]:
def get_tag_description(tag, description):
    """ Some descriptions are subidivided for each tag.
        Get only that tag's description if so. """
    match = re.search(f'[*_]{{2}}{tag}[*_:]{{3}}(.+)', description)
    return match[1].strip() if match else description

def get_tag_quantity(tag, quantities):
    """ Logic for parsing quantity for each tag. """
    if quantities in ['1', '1 for all']:
        return 1

    match = re.search(f'{tag}: ([ \\d\\.]+)', quantities)
    return float(match[1]) if match else quantities

tags = []
for feature in features.itertuples():
    for tag in feature.tags:
        tags.append({
            'category': feature.category,
            'feature': feature.feature,
            'tag': tag,
            'description': get_tag_description(tag, feature.description),
            'quantity': get_tag_quantity(tag, feature.quantities)
        })

tags = pd.DataFrame(tags)

In [145]:
tags

Unnamed: 0,category,feature,tag,description,quantity
0,Isolation,Symptomatic isolation - targeted,contact isolation - symptoms,Close contacts of confirmed cases directed to ...,1
1,Isolation,Symptomatic isolation - targeted,cohort isolation - symptoms,__confirmed case isolation__: Isolation of con...,1
2,Isolation,Symptomatic isolation - blanket,cluster isolation - symptoms,Entire districts or large groups (e.g. every m...,1
3,Isolation,Symptomatic isolation - blanket,blanket isolation - symptoms,Entire cities or countries directed to isolate...,2
4,Isolation,Asymptomatic isolation - targeted,contact isolation - no symptoms,close contacts (i.e. more than 15 minutes of c...,1
...,...,...,...,...,...
93,Testing and contact tracing,Tracing technology,bracelet based location tracking,Technology to improve the efficacy of contact ...,Compliance (percentage of people using the tec...
94,Testing and contact tracing,Tracing technology,phone based location tracing,Technology to improve the efficacy of contact ...,Compliance (percentage of people using the tec...
95,Testing and contact tracing,Tracing technology,public transport occupancy tracing,Technology to improve the efficacy of contact ...,Compliance (percentage of people using the tec...
96,Testing and contact tracing,Diagnostic criteria loosened,diagnostic criteria loosened,Record when changes are made to diagnostic cri...,1
