In [1]:
%pip install h3
%pip install tqdm
%pip install psycopg[binary]

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [130]:
import h3
import tqdm
import json
import numpy as np
import pandas as pd
import asyncio
import psycopg

### Calculate Transit Accessibility Scores
#### Cell types:
* `-1`: geo cell, no station here
* ` 0`: bus stop
* ` 1`: tram / ferry stop
* ` 2`: metro / s-tog station
* ` 3`: train station


In [131]:
H3_RES = 11
HOURS = 24 * 7
TRANSIT_TYPE = 3
MAX_DISTANCE = 32

In [132]:
DB_CONN = "postgresql://postgres:byS*<7AxwYC#U24s@srv-captain--postgres-db-db/postgres"

In [133]:
# reset scores in database
async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
    async with conn.cursor() as cursor:
        await cursor.execute(f"""
        update transit
            set scores = array []::float4[], 
            visitors = array []::text[]
            where cardinality(visitors) != 0
        """)

        await conn.commit()

In [134]:
# for development only, select a specific h3_4 cell
SELECTED_H3_4 = '841f059ffffffff'
# SELECTED_H3_4_QUERY = f"and h3_4 = '{SELECTED_H3_4}'"
SELECTED_H3_4_QUERY = ""

#### Load entrypoint (station) data

In [135]:
# load all stops for a specific type from the database
async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
    async with conn.cursor() as cursor:
        await cursor.execute(f"""
        select * from transit
            where type = {TRANSIT_TYPE}
            {SELECTED_H3_4_QUERY}
        """)

        stations = await cursor.fetchall()

In [136]:
# add all stations to a queue
queue = asyncio.Queue()
for station in stations:
    # (station origin h3 cell, current h3 cell, scores array, distance)
    await queue.put((station[0], station[0], station[3], 0))

#### Functions

In [137]:
# define formula here
async def calc_score(origin_scores, distance, type):
    result_scores = []
    for score in origin_scores:
        if score > 1 and distance < MAX_DISTANCE:
            result_scores.append(score - 1)
        else:
            result_scores.append(0)
    return result_scores

In [138]:
async def update_scores(origin_h3, current_h3, scores, distance):
    print(f"calculating {current_h3}, dist={distance}")
    # get h3 k-ring for origon_h3
    neighbors = h3.k_ring(current_h3, k= 1)
    neighbors.discard(current_h3)

    # get neighbors from DB
    async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
        async with conn.cursor() as cursor:
            await cursor.execute("""
                select h3 from transit
                    where h3 = any(%s)
                    and %s != all(visitors)
            """, [list(neighbors), origin_h3])

            db_neighbors = await cursor.fetchall()

            # update scores for neighbors according to calc_score function
            for neighbor in db_neighbors:
                neighbor = neighbor[0]

                score = await calc_score(scores, distance + 1, TRANSIT_TYPE)
                if sum(score) > 0:
                    await cursor.execute(f"""
                        update transit
                            set scores = array_cat(scores, array {score}),
                            visitors = array_append(visitors, '{origin_h3}')
                            where h3 = '{neighbor}'
                    """)

                    if distance < MAX_DISTANCE:
                        await queue.put((origin_h3, neighbor, score, distance + 1))
                    
        await conn.commit()

#### Execution Loop

In [139]:
while not queue.empty():
    current = await queue.get()
    await asyncio.gather(
        update_scores(current[0], current[1], current[2], current[3])
    )

calculating 8b1f05831152fff, dist=0
calculating 8b1f266966c0fff, dist=0
calculating 8b1f230f0793fff, dist=0
calculating 8b1f240515b3fff, dist=0
calculating 8b1f232108c0fff, dist=0
calculating 8b1f3155dc66fff, dist=0
calculating 8b1f3142b7a2fff, dist=0
calculating 8b1f3155a810fff, dist=0
calculating 8b1f05b95151fff, dist=0
calculating 8b1f05b84565fff, dist=0
calculating 8b1f05ce66d0fff, dist=0
calculating 8b1f26d1b69afff, dist=0
calculating 8b1f15ad3150fff, dist=0
calculating 8b1f06440d40fff, dist=0
calculating 8b1f31623b0dfff, dist=0
calculating 8b1f15a8d126fff, dist=0
calculating 8b1f15a89156fff, dist=0
calculating 8b1f15ba50e3fff, dist=0
calculating 8b1f2e0d282efff, dist=0
calculating 8b1f2e7206aafff, dist=0
calculating 8b1f2324a00bfff, dist=0
calculating 8b1f234b3173fff, dist=0
calculating 8b1f15ad0756fff, dist=0
calculating 8b1f3162c131fff, dist=0
calculating 8b1f06ce2612fff, dist=0
calculating 8b1f06d0d066fff, dist=0
calculating 8b1f31709daafff, dist=0
calculating 8b1f25a93270fff,

#### Aggregate results and export for visualization

In [73]:
# get all h3_4 regions and perform the aggregation on a per-region basis
async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
    async with conn.cursor() as cursor:
        await cursor.execute("""
        select distinct(h3_4) from transit
        """)

        h3_4_regions = await cursor.fetchall()

In [129]:
h3_4_regions = list(map(lambda row: row[0], h3_4_regions))

In [122]:
async def aggregate_region(h3_4):
    # load the results for a single region
    async with await psycopg.AsyncConnection.connect(DB_CONN) as conn:
        async with conn.cursor() as cursor:
            await cursor.execute(f"""
            select h3_10, freq, scores, cardinality(visitors), type from transit
                where h3_4 = '{h3_4}'
                and cardinality(visitors) > 0
                order by h3_10
            """)

            hexagons = await cursor.fetchall()
            
            result = []
            for group in pd.DataFrame(hexagons, columns=['h3', 'freq', 'scores', 'cardinality', 'type']).groupby('h3'):
                score_sum = np.array([0]*HOURS)
                
                for idx, array in enumerate(group[1]['scores']):
                    cardinality = group[1]['cardinality'].iloc[idx]
                    for subarray in np.array_split(array, cardinality):
                        score_sum = np.add(score_sum, subarray)
                    score_sum = np.add(score_sum, group[1]['freq'].iloc[idx])
                result.append([group[0], max(group[1]['type']), list(score_sum)])

            pd.DataFrame(result, columns=['h3', 'type', 'freq']).to_json(f"../docs/h3/{h3_4}.json", orient='records')


In [None]:
for h3_4 in h3_4_regions:
    await asyncio.gather(
        aggregate_region(h3_4)
    )