In [36]:
import buckaroo
import duckdb
from IPython.core.interactiveshell import InteractiveShell
from lonboard import viz
import pandas as pd

# Enable multiple outputs per cell
InteractiveShell.ast_node_interactivity = "all"
# Show all columns
pd.set_option('display.max_columns', None)

output_data_folder = '/data/experiments'

In [2]:
con = duckdb.connect()
con.install_extension("spatial")
con.load_extension("spatial")

# 1.0 Total private dwellings and private dwellings per square kilometer for Ottawa
These values are from the 2021 Census of Population

In [28]:
con.execute("""
DROP TABLE IF EXISTS geo_data;
CREATE TABLE geo_data AS
SELECT geo.da_dguid, cop.count_total_4, CAST(round((cop.count_total_1 / (ST_Area_Spheroid(geo.geom) / 1000000.0)), 0) AS INTEGER) AS count_total_4_per_square_km,  geo.geom 
FROM 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,
'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo
WHERE geo.csd_name in ('Ottawa') AND cop.da_dguid = geo.da_dguid;
""")

<duckdb.duckdb.DuckDBPyConnection at 0x7f0704bb5e70>

## 1.1 Export result as a GeoJSON

In [37]:
con.execute(f"""
COPY geo_data TO '{output_data_folder}/da_2021_private_dwellings.geojson'
WITH (
  FORMAT GDAL,
  DRIVER 'GeoJSON',
  GEOMETRY_TYPE 'POLYGON',
  SRS 'EPSG:4326'
);
""")

<duckdb.duckdb.DuckDBPyConnection at 0x7f0704bb5e70>

## 1.2 Export result as file geodatabase

In [38]:
con.execute(f"""
COPY geo_data TO '{output_data_folder}/da_2021_private_dwellings.gdb'
WITH (
  FORMAT GDAL,
  DRIVER 'OpenFileGDB',
  GEOMETRY_TYPE 'POLYGON',
  SRS 'EPSG:4326'
);
""")

<duckdb.duckdb.DuckDBPyConnection at 0x7f0704bb5e70>

## 1.3 Export result as GeoParquet

In [39]:
con.execute(f"""
COPY geo_data TO '{output_data_folder}/da_2021_private_dwellings.parquet' (FORMAT PARQUET);
""")

<duckdb.duckdb.DuckDBPyConnection at 0x7f0704bb5e70>