In [1]:
%pip install h3
%pip install tqdm
%pip install psycopg[binary]

Collecting h3
  Downloading h3-3.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: h3
Successfully installed h3-3.7.4
Note: you may need to restart the kernel to use updated packages.
Collecting tqdm
  Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.4/78.4 KB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.64.0
Note: you may need to restart the kernel to use updated packages.
Collecting psycopg[binary]
  Downloading psycopg-3.0.12-py3-none-any.whl (143 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.1/143.1 KB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting psycopg-binary==3.0.12
  Downloading psycopg_binary-3.0.12-cp310-c

In [1]:
import h3
import numpy as np
import pandas as pd
import asyncio
import psycopg
import math
import gc

### Calculate Transit Accessibility Scores
#### Cells
* `-1`: geo cell, no station here
* ` 0`: bus stop
* ` 1`: tram / ferry stop
* ` 2`: metro / s-tog station
* ` 3`: train station

#### Parameters
* transit type multiplicator - stops of "better" modes of transit are given a higher initial score
* urban / rual distance factor (accept higher distances in rual areas)
* reach - what is the area of effect for a given transit type in hex cells (assume 40m for one res 11 hexagon)

#### Score formula
use a gaussian with score on y and distance on x

#### About
This notebook performs the transit accessibility score calculation on top of a postgres database.

In [13]:
H3_RES = 11
HOURS = 24 * 7
TRANSIT_TYPE = 0
MAX_DISTANCE = 8

SELECTED_TIMES = list(range(0, 24)) + list(range(96, HOURS))

print(f"selected radius for transit type {TRANSIT_TYPE} is ~{MAX_DISTANCE*40}m")

selected radius for transit type 0 is ~320m


In [4]:
DB_CONN = "postgresql://postgres:byS*<7AxwYC#U24s@srv-captain--postgres-db-db/postgres"

In [21]:
# reset scores in database
async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
    async with conn.cursor() as cursor:
        await cursor.execute(f"""
        update transit
            set scores = array []::float4[], 
            visitors = array []::text[]
            where cardinality(visitors) != 0
        """)

        await conn.commit()

In [8]:
# for development only, select a specific h3_4 cell
SELECTED_H3_4 = '841f22bffffffff'
SELECTED_H3_4_QUERY = f"and h3_4 = '{SELECTED_H3_4}'"
# SELECTED_H3_4_QUERY = ""

#### Load entrypoint (station) data

In [9]:
# load all stops for a specific type from the database
async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
    async with conn.cursor() as cursor:
        await cursor.execute(f"""
        select * from transit
            where type = {TRANSIT_TYPE}
            {SELECTED_H3_4_QUERY}
        """)

        stations = await cursor.fetchall()

In [16]:
# add all stations to a queue
queue = asyncio.Queue()
for station in stations:
    # (station origin h3 cell at lower res, current h3 cell, scores array, distance)
    await queue.put((h3.h3_to_parent(station[0], 9), station[0], np.array(station[3])[SELECTED_TIMES], 0))

#### Main Functions

In [17]:
# define formula here
async def calc_score(origin_scores, distance, type):
    f = 0.01 if type > 1 else 0.02
    # result_scores = []
    # for score in origin_scores:
    #     if score > 1 and distance < MAX_DISTANCE:
    #         new_score = -0.01 * distance ** 2 + score
    #     elif distance > MAX_DISTANCE and score > 0.1:
    #         new_score = math.exp(MAX_DISTANCE - distance)
    #     else:
    #         new_score = 0
    #     result_scores.append(new_score)
    return list(map(lambda e: max(e - f * distance ** 2 , 0), origin_scores))

In [18]:
async def update_scores(origin_h3, current_h3, scores, distance):
    print(f"calculating {current_h3}, dist={distance + 1}", end="\r")
    # get h3 k-ring for origon_h3
    neighbors = h3.k_ring(current_h3, k= 1)
    neighbors.discard(current_h3)

    # get neighbors from DB
    async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
        async with conn.cursor() as cursor:
            await cursor.execute("""
                select h3 from transit
                    where h3 = any(%s)
                    and %s != all(visitors)
                    and cardinality(visitors) < 5
            """, [list(neighbors), origin_h3])

            db_neighbors = await cursor.fetchall()

            # update scores for neighbors according to calc_score function
            for neighbor in db_neighbors:
                neighbor = neighbor[0]

                score = await calc_score(scores, distance + 1, TRANSIT_TYPE)
                if score[31] >= 1:
                    await cursor.execute(f"""
                        update transit
                            set scores = array_cat(scores, array {score}),
                            visitors = array_append(visitors, '{origin_h3}')
                            where h3 = '{neighbor}'
                    """)

                    await queue.put((origin_h3, neighbor, score, distance + 1))
        
        await conn.commit()
        del(db_neighbors)
        gc.collect()

#### Execution Loop

In [20]:
while not queue.empty():
    current = await queue.get()
    await asyncio.gather(
        update_scores(current[0], current[1], current[2], current[3])
    )

calculating 8b1f22a50a45fff, dist=3

CancelledError: 

#### Aggregate results and export for visualization

In [56]:
# get all h3_4 regions and perform the aggregation on a per-region basis
async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
    async with conn.cursor() as cursor:
        await cursor.execute("""
            select distinct(h3_4) from transit
        """)

        h3_4_regions = await cursor.fetchall()

In [57]:
h3_4_regions = list(map(lambda row: row[0], h3_4_regions))

In [62]:
async def aggregate_region(h3_4):
    # load the results for a single region
    async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
        async with conn.cursor() as cursor:
            await cursor.execute(f"""
                select h3_10, freq, scores, cardinality(visitors), type from transit
                    where h3_4 = '{h3_4}'
                    and cardinality(visitors) > 0
                    order by h3_10
            """)

            hexagons = await cursor.fetchall()
            
            result = [] # at res 10
            for group in pd.DataFrame(hexagons, columns=['h3', 'freq', 'scores', 'cardinality', 'type']).groupby('h3'):
                score_sum = np.array([0]*HOURS)
                
                # res 11
                for idx, array in enumerate(group[1]['scores']):
                    cardinality = group[1]['cardinality'].iloc[idx]
                    # visitors for res 11 cell
                    for subarray in np.array_split(array, cardinality):
                        # overlap
                        score_sum = np.add(score_sum, subarray)
                    # frequency for res 11 (if stop)
                    score_sum = np.add(score_sum, group[1]['freq'].iloc[idx])
                # add aggregate result to overall result
                result.append([group[0], max(group[1]['type']), list(score_sum / max(len(group[1]),1))])

            pd.DataFrame(result, columns=['h3', 'type', 'freq']).to_json(f"../docs/h3/{h3_4}.json", orient='records')
            del(hexagons)


In [64]:
# export loop
for h3_4 in h3_4_regions:
    await asyncio.gather(
        aggregate_region(h3_4)
    )