In [7]:
import duckdb
import geopandas as gpd
import jenkspy
from lonboard import BitmapTileLayer, Map, PolygonLayer
from lonboard.colormap import apply_categorical_cmap
import numpy as np
import pyarrow as pa

con = duckdb.connect()
con.install_extension("spatial")
con.load_extension("spatial")

In [8]:
# OpenStreetMap

# Google Satellite
basemap = BitmapTileLayer(
    data="http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={z}",
    tile_size=256,
    max_requests=-1,
    min_zoom=0,
    max_zoom=19,
)

# % of People Making Over $100,000 For Select Cities

In [9]:
con.execute("""
DROP TABLE IF EXISTS geo_data;
CREATE TABLE geo_data AS
SELECT
    geo.da_dguid,
    cop.count_total_1,
    cop.count_total_155,
    cop.count_total_168,
    CASE
        WHEN cop.count_total_168 = 0.0 THEN 0
        WHEN cop.count_total_155 = 0.0 THEN 0
        WHEN cop.count_total_168 IS NULL THEN 0
        WHEN cop.count_total_155 IS NULL THEN 0
        ELSE 
            ((cop.count_total_168/cop.count_total_155) * 100) 
    END AS percentage_over_100k,
    geo.geom
FROM
    'https://data-01.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,
    'https://data-01.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo
WHERE geo.csd_dguid in (
    '2021A00056001009', -- Whitehorse, YT
    '2021A00056106023', -- Yellowknife, NT
    '2021A00056204003', -- Iqaluit, NU
    '2021A00055915022', -- Vancouver, BC
    '2021A00054806016', -- Calgary, AB
    '2021A00054706027', -- Regina, SK
    '2021A00054611040', -- Winnipeg, MB
    '2021A00053506008', -- Ottawa, ON
    '2021A00052466023', -- Montréal, QC
    '2021A00051301006', -- Saint John, NB
    '2021A00051102075', -- Charlottetown, PE
    '2021A00051209034', -- Halifax, NS
    '2021A00051001519' -- St. John's, NL
    ) 
AND cop.da_dguid = geo.da_dguid;
""")

con.execute("""
COPY geo_data TO './da_2021_characteristic.parquet' (FORMAT PARQUET);
""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x7fde966d7b70>

In [10]:
characteristic_values = con.execute("SELECT DISTINCT percentage_over_100k FROM geo_data").fetchall()

values = np.array([v[0] for v in characteristic_values])

# Compute Jenks breaks
num_classes = 5
breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)

# Create a bin range mapping: (lower, upper) for each bin
bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]

# Create a function to get the range string for a value
def jenks_range(value) -> str:
    for i, (low, high) in enumerate(bin_ranges):
        if low <= value <= high:
            return f"{int(low)}-{int(high)}"
    return "unknown"


characteristic_df = gpd.read_parquet('./da_2021_characteristic.parquet')
characteristic_df['category'] = characteristic_df["percentage_over_100k"].apply(lambda v: jenks_range(v))
characteristic_df['category'] = characteristic_df['category'].astype('category')

# Categories to colors
cmap = {}
colors = [
    [255, 255, 255],
    [255, 191.25, 191.25],
    [255, 127.50, 127.50],
    [255, 63.75, 63.75],
    [255, 0, 0]
]
for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):
    cmap[value] = colors[index]

In [11]:
get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)

cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,
                                        stroked=True,
                                        get_fill_color=get_color,
                                        get_line_color=[255, 255, 255],
                                        get_line_width=5,
                                        line_width_min_pixels=0.2,
                                        line_width_units="meters",
                                        opacity=0.4,
                                        auto_highlight = True
                                       )

In [None]:
m = Map([basemap, cop_layer])

m

# % of People Making Over $100,000 For The Whole Country

In [14]:
con.execute("""
DROP TABLE IF EXISTS geo_data;
CREATE TABLE geo_data AS
SELECT
    geo.da_dguid,
    cop.count_total_1,
    cop.count_total_155,
    cop.count_total_168,
    CASE
        WHEN cop.count_total_168 = 0.0 THEN 0
        WHEN cop.count_total_155 = 0.0 THEN 0
        WHEN cop.count_total_168 IS NULL THEN 0
        WHEN cop.count_total_155 IS NULL THEN 0
        ELSE 
            ((cop.count_total_168/cop.count_total_155) * 100) 
    END AS percentage_over_100k,
    geo.geom
FROM
    'https://data-01.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,
    'https://data-01.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo
WHERE cop.da_dguid = geo.da_dguid;
""")

con.execute("""
COPY geo_data TO './da_2021_characteristic.parquet' (FORMAT PARQUET);
""")

characteristic_values = con.execute("SELECT DISTINCT percentage_over_100k FROM geo_data").fetchall()

values = np.array([v[0] for v in characteristic_values])

# Compute Jenks breaks
num_classes = 5
breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)

# Create a bin range mapping: (lower, upper) for each bin
bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]

# Create a function to get the range string for a value
def jenks_range(value) -> str:
    for i, (low, high) in enumerate(bin_ranges):
        if low <= value <= high:
            return f"{int(low)}-{int(high)}"
    return "unknown"


characteristic_df = gpd.read_parquet('./da_2021_characteristic.parquet')
characteristic_df['category'] = characteristic_df["percentage_over_100k"].apply(lambda v: jenks_range(v))
characteristic_df['category'] = characteristic_df['category'].astype('category')

# Categories to colors
cmap = {}
colors = [
    [255, 255, 255],
    [255, 191.25, 191.25],
    [255, 127.50, 127.50],
    [255, 63.75, 63.75],
    [255, 0, 0]
]
for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):
    cmap[value] = colors[index]


get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)

cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,
                                        stroked=True,
                                        get_fill_color=get_color,
                                        get_line_color=[255, 255, 255],
                                        get_line_width=5,
                                        line_width_min_pixels=0.2,
                                        line_width_units="meters",
                                        opacity=0.4,
                                        auto_highlight = True
                                       )

In [16]:
m = Map([basemap, cop_layer])

m

Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…