# Demo: Berapa banyak bangunan se Indonesia dari sumber open data?

Jumlah bangunan se Indonesia dari data Google dan Microsoft Open Buildings.

Sumber data: Source.coop

In [1]:
%%writefile requirements.txt

duckdb
geopandas
pyarrow

Writing requirements.txt


In [2]:
%pip install -r requirements.txt --quiet

In [3]:
import duckdb

In [4]:
#    """ Initialize and setup duckdb database"""
con = duckdb.connect(database = ":memory:")
con.install_extension('httpfs')
con.load_extension('httpfs')
con.install_extension('spatial')
con.load_extension('spatial')
print("Checking DuckDB Installation...")
con.query('select st_point(0,0);')

Checking DuckDB Installation...


┌────────────────┐
│ st_point(0, 0) │
│    geometry    │
├────────────────┤
│ POINT (0 0)    │
└────────────────┘

In [5]:
## Indonesia
prefix = "s3://us-west-2.opendata.source.coop/vida/google-microsoft-open-buildings/geoparquet"
partitions = "by_country"
country_iso = "IDN"

In [6]:
# pakai S2 indexing + parquet_scan dari duckdb
con.execute(f"""
    create or replace table idn_buildings as
      select * from parquet_scan('{prefix}/by_country_s2/country_iso={country_iso}/*.parquet')
""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x79e3bc369170>

In [9]:
## ada kolom apa saja?
con.query('describe idn_buildings')

┌────────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│  column_name   │ column_type │  null   │   key   │ default │  extra  │
│    varchar     │   varchar   │ varchar │ varchar │ varchar │ varchar │
├────────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ geometry       │ BLOB        │ YES     │ NULL    │ NULL    │ NULL    │
│ boundary_id    │ BIGINT      │ YES     │ NULL    │ NULL    │ NULL    │
│ bf_source      │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ confidence     │ DOUBLE      │ YES     │ NULL    │ NULL    │ NULL    │
│ area_in_meters │ DOUBLE      │ YES     │ NULL    │ NULL    │ NULL    │
│ s2_id          │ BIGINT      │ YES     │ NULL    │ NULL    │ NULL    │
│ country_iso    │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
└────────────────┴─────────────┴─────────┴─────────┴─────────┴─────────┘

In [7]:
## Jumlah total bangunan se Indonesa raya
con.query('select count(*) from idn_buildings;')

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│    135174610 │
└──────────────┘

In [8]:
# jumlah bangunan per sumber data: Google dan Microsoft
con.query('SELECT bf_source as data_source, COUNT(*) AS buildings_count FROM idn_buildings GROUP BY data_source;')

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌─────────────┬─────────────────┐
│ data_source │ buildings_count │
│   varchar   │      int64      │
├─────────────┼─────────────────┤
│ google      │       127723559 │
│ microsoft   │         7451051 │
└─────────────┴─────────────────┘

In [13]:
# google only
con.query('''
from idn_buildings where bf_source = 'google' limit 10;'''
)

┌──────────────────────────┬─────────────┬───────────┬────────────┬────────────────┬─────────────────────┬─────────────┐
│         geometry         │ boundary_id │ bf_source │ confidence │ area_in_meters │        s2_id        │ country_iso │
│           blob           │    int64    │  varchar  │   double   │     double     │        int64        │   varchar   │
├──────────────────────────┼─────────────┼───────────┼────────────┼────────────────┼─────────────────────┼─────────────┤
│ \x01\x03\x00\x00\x00\x…  │         181 │ google    │     0.6875 │        66.4818 │ 3191644760922128384 │ IDN         │
│ \x01\x03\x00\x00\x00\x…  │         181 │ google    │     0.6875 │        62.4271 │ 3191644760922128384 │ IDN         │
│ \x01\x03\x00\x00\x00\x…  │         181 │ google    │     0.6875 │        55.0361 │ 3191644760922128384 │ IDN         │
│ \x01\x03\x00\x00\x00\x…  │         181 │ google    │     0.6875 │        27.7609 │ 3191644760922128384 │ IDN         │
│ \x01\x03\x00\x00\x00\x…  │    

In [None]:
# export. lemot
con.query("""
  COPY (select * from idn_buildings where bf_source = 'google') TO 'idn_buildings_google.parquet'
""")