In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

%matplotlib inline

Let's get the database credentials we need.

In [2]:
import os

OSM_USER=os.getenv('POSTGRES_USER')
OSM_PASS=os.getenv('POSTGRES_PASS')

# This is a hack to be able to run osm2pgsql without interactively entering the password
os.environ['PGPASSWORD'] = OSM_PASS

--hstore
adds any tags not already in a conventional column to a hstore column. With the standard stylesheet this would result in tags like highway appearing in a conventional column while tags not in the style like name:en or lanes:forward would appear only in the hstore column.

--hstore-add-index
adds an index to the hstore columns

In [4]:
%%bash 

osm2pgsql --hstore --latlong \
    --database osm \
    --username $POSTGRES_USER \
    --host osm_db \
    /data/saint_kitts_and_nevis-latest.osm.pbf

osm2pgsql version 0.94.0 (64 bit id space)

Using built-in tag processing pipeline
Using projection SRS 4326 (Latlong)
Setting up table: planet_osm_point
Setting up table: planet_osm_line
Setting up table: planet_osm_polygon
Setting up table: planet_osm_roads
Allocating memory for dense node cache
Allocating dense node cache in one big chunk
Allocating memory for sparse node cache
Sharing dense sparse
Node-cache: cache=800MB, maxblocks=12800*65536, allocation method=3

Reading in file: /data/saint_kitts_and_nevis-latest.osm.pbf
Using PBF parser.
  parse time: 0s
Node stats: total(98770), max(6604079315) in 0s
Way stats: total(13403), max(703235432) in 0s
Relation stats: total(62), max(9501117) in 0s
Committing transaction for planet_osm_point
Committing transaction for planet_osm_line
Committing transaction for planet_osm_polygon
Committing transaction for planet_osm_roads
Using built-in tag processing pipeline
Using built-in tag processing pipeline
Using built-in tag processing pipeli

In [5]:
import pandas as pd
import geopandas as gpd
import sqlalchemy

Learned all of this from https://songhuiming.github.io/pages/2017/04/02/jupyter-and-pandas-display/

In [6]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.width', 1000)
# Display all text in a text without truncation
pd.set_option('display.max_colwidth', -1)

In [7]:
connection_string = 'postgresql://{}:{}@osm_db:5432/osm'.format(OSM_USER, OSM_PASS)

In [8]:
connection_string

'postgresql://osm:osm@osm_db:5432/osm'

In [9]:
engine = sqlalchemy.create_engine(connection_string)

# What are the OSM Tables?
- planet_osm_point: which contains points of interest such as restaurants, hospitals, schools, supermarkets and addresses
- planet_osm_lines: contains roads and streets
- planet_osm_polygons: contains lakes, building footprints, administrative boundaries such as towns and cities

**Source: https://www.bostongis.com/PrinterFriendly.aspx?content_name=loading_osm_postgis**

# How do I extract values from the tags column?
SELECT osm_id, tags ->'website' AS website FROM planet_osm_polygon where tags -> 'website' is not null limit 20;

In [10]:
query = """
SELECT osm_id, amenity, building, leisure, name, office, ref, religion, shop, tourism, tags ->'website' AS website, way
FROM planet_osm_polygon 
WHERE name is not null or tags ->'website' IS NOT NULL
ORDER BY name, website
"""
osm_polygon = gpd.read_postgis(sql=query,
                con=engine, geom_col='way', index_col='osm_id')

In [11]:
osm_polygon.dtypes

amenity     object
building    object
leisure     object
name        object
office      object
ref         object
religion    object
shop        object
tourism     object
website     object
way         object
dtype: object

In [12]:
import qgrid

In [13]:
qgrid.show_grid(osm_polygon.drop(columns=['way']))

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [14]:
query = """
SELECT osm_id, amenity, building, leisure, name, office, ref, religion, shop, tourism, tags ->'website' AS website, way
FROM planet_osm_point
WHERE name is not null or tags ->'website' IS NOT NULL
"""
osm_point = gpd.read_postgis(sql=query,
                con=engine, geom_col='way')

In [15]:
qgrid.show_grid(osm_point.drop(columns=['way']))

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…