# Dissertation Data Querying
### Gavin Rolls

This script contains the queries I used to save OSM and Overture data locally for Greater London and Birmingham. Because it's easier to grab the data I need from my desktop as opposed to running these queries every time I need data, I'm leaving these queries here but only intend to use them once

## Library Imports & Database Configuration

In [14]:
#Library Imports - using DuckDB for Overture Import

#Database
import duckdb

#Basics
import pandas as pd
import geopandas as gpd
from geopy.geocoders import Nominatim

#OpenStreetMap
import osmnx as ox

In [15]:
#Config SQL
%pip install ipython-sql duckdb duckdb-engine jupysql --quiet
%pip install --upgrade grpcio --quiet
%load_ext sql

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
There's a new jupysql version available (0.10.11), you're running 0.10.10. To upgrade: pip install jupysql --upgrade
Deploy Shiny apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup


In [16]:
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False
%sql duckdb:///:memory:

In [17]:
%%sql      
INSTALL httpfs;

LOAD httpfs;

INSTALL spatial;

LOAD spatial;

SET s3_region='us-west-2';

Unnamed: 0,Success


## Get Bounding Boxes

In [25]:
#Get London Bounding Box

# Initialize the geolocator
geolocator = Nominatim(user_agent="geoapi")

# Get location data for London
location = geolocator.geocode("London")

# Get the bounding box
bounding_box_london = location.raw['boundingbox']

min_lat_london, max_lat_london = float(bounding_box_london[0]), float(bounding_box_london[1])
min_lon_london, max_lon_london = float(bounding_box_london[2]), float(bounding_box_london[3])

print("London Bounding Box Coordinates:")
print(f"Min Longitude: {min_lon_london}")
print(f"Min Latitude: {min_lat_london}")
print(f"Max Longitude: {max_lon_london}")
print(f"Max Latitude: {max_lat_london}")
print()

# Get location data for Birmingham
location_birmingham = geolocator.geocode("Birmingham")

# Get the bounding box
bounding_box_birmingham = location_birmingham.raw['boundingbox']

# Convert bounding box to coordinates for Birmingham
min_lat_birmingham, max_lat_birmingham = float(bounding_box_birmingham[0]), float(bounding_box_birmingham[1])
min_lon_birmingham, max_lon_birmingham = float(bounding_box_birmingham[2]), float(bounding_box_birmingham[3])

print("Birmingham Bounding Box Coordinates:")
print(f"Min Longitude: {min_lon_birmingham}")
print(f"Min Latitude: {min_lat_birmingham}")
print(f"Max Longitude: {max_lon_birmingham}")
print(f"Max Latitude: {max_lat_birmingham}")


London Bounding Box Coordinates:
Min Longitude: -0.5103751
Min Latitude: 51.2867601
Max Longitude: 0.3340155
Max Latitude: 51.6918741

Birmingham Bounding Box Coordinates:
Min Longitude: -2.0336486
Min Latitude: 52.381053
Max Longitude: -1.7288417
Max Latitude: 52.6087058


## Download POIs - Overture

### London

In [36]:
%%sql

COPY (
    SELECT
        names.primary AS name,
        categories.main as category,
        ROUND(confidence,2) as confidence,
        ST_GeomFromWKB(geometry) as geometry
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-05-16-beta.0/theme=places/*/*')
WHERE
    bbox.xmin BETWEEN -0.5103751 AND 0.3340155 AND
    bbox.ymin BETWEEN 51.2867601 AND 51.6918741
) TO 'data/overture_data/london_places.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,Success


### Birmingham

In [37]:
%%sql

COPY (
    SELECT
        names.primary AS name,
        categories.main as category,
        ROUND(confidence,2) as confidence,
        ST_GeomFromWKB(geometry) as geometry
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-05-16-beta.0/theme=places/*/*')
WHERE
    bbox.xmin BETWEEN -2.0336486 AND -1.7288417 AND
    bbox.ymin BETWEEN 52.381053 AND 52.6087058
) TO 'data/overture_data/bham_places.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,Success


### Overture Download Stats

In [None]:
#Basic overview stats of London Data - Using the locally saved files here
london_places = gpd.read_file('data/overture_data/london_places.geojson')
bham_places = gpd.read_file('data/overture_data/bham_places.geojson')

#Count of Features
print("London POI Count: " + str(london_places.shape[0]))
print("Birmingham POI Count: " + str(bham_places.shape[0]))

## Download Buildings - OSM

### London

In [None]:
#Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

#Test new config
ox.config(use_cache=True, log_console=True)

buildings = ox.features_from_place(place_name, tags={'building': True})
buildings = buildings[buildings.geometry.notnull()]
building_footprints = buildings[buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in building_footprints.columns:
    if building_footprints.apply(lambda x: isinstance(x, list)).any():
        building_footprints = building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

building_footprints = building_footprints[['name', 'geometry']].reset_index()
print(building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
building_footprints.to_file("data/osm_data/boroughs_buildings.geojson", driver="GeoJSON")

### City of London

In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

buildings_city = ox.features_from_place(place_name, tags={'building': True})
buildings_city = buildings_city[buildings_city.geometry.notnull()]
building_footprints_city = buildings_city[buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in building_footprints_city.columns:
    if building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        building_footprints_city = building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

building_footprints_city = building_footprints_city[['name', 'geometry']].reset_index()

building_footprints_combined = pd.concat([building_footprints, building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
building_footprints_combined.to_file("data/osm_data/all_london_buildings.geojson", driver="GeoJSON")

### Birmingham

In [None]:
#Download Data for Birmingham
place_name = 'Birmingham, United Kingdom'

#Test new config
ox.config(use_cache=True, log_console=True)

buildings = ox.features_from_place(place_name, tags={'building': True})
buildings = buildings[buildings.geometry.notnull()]
building_footprints = buildings[buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in building_footprints.columns:
    if building_footprints.apply(lambda x: isinstance(x, list)).any():
        building_footprints = building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

building_footprints = building_footprints[['name', 'geometry']].reset_index()
print(building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
building_footprints.to_file("data/osm_data/bham_buildings.geojson", driver="GeoJSON")

### Extract Building Type Info from OSM - Commercial


In [None]:
#Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

commercial_buildings = ox.features_from_place(place_name, tags={'building': ['commercial']})

commercial_buildings = commercial_buildings[commercial_buildings.geometry.notnull()]
commercial_building_footprints = commercial_buildings[commercial_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in commercial_building_footprints.columns:
    if commercial_building_footprints.apply(lambda x: isinstance(x, list)).any():
        commercial_building_footprints = commercial_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

commercial_building_footprints = commercial_building_footprints[['name', 'geometry']].reset_index()
print(commercial_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
commercial_building_footprints.to_file("data/osm_data/boroughs_commercial_buildings.geojson", driver="GeoJSON")

In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

commercial_buildings_city = ox.features_from_place(place_name, tags={'building': ['commercial']})
commercial_buildings_city = commercial_buildings_city[commercial_buildings_city.geometry.notnull()]
commercial_building_footprints_city = commercial_buildings_city[commercial_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in commercial_building_footprints_city.columns:
    if commercial_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        commercial_building_footprints_city = commercial_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

commercial_building_footprints_city = commercial_building_footprints_city[['name', 'geometry']].reset_index()

commercial_building_footprints_combined = pd.concat([commercial_building_footprints, commercial_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
commercial_building_footprints_combined.to_file("data/osm_data/all_london_commercial_buildings.geojson", driver="GeoJSON")

In [None]:
#Download Data for Birmingham
place_name = 'Birmingham, United Kingdom'

commercial_buildings = ox.features_from_place(place_name, tags={'building': ['commercial']})

commercial_buildings = commercial_buildings[commercial_buildings.geometry.notnull()]
commercial_building_footprints = commercial_buildings[commercial_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in commercial_building_footprints.columns:
    if commercial_building_footprints.apply(lambda x: isinstance(x, list)).any():
        commercial_building_footprints = commercial_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

commercial_building_footprints = commercial_building_footprints[['name', 'geometry']].reset_index()
print(commercial_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
commercial_building_footprints.to_file("data/osm_data/bham_commercial_buildings.geojson", driver="GeoJSON")

### Extract Building Type Info from OSM - Office

In [None]:
# Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

ox.config(use_cache=True, log_console=True)

office_buildings = ox.features_from_place(place_name, tags={'building': ['office']})

office_buildings = office_buildings[office_buildings.geometry.notnull()]
office_building_footprints = office_buildings[office_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in office_building_footprints.columns:
    if office_building_footprints.apply(lambda x: isinstance(x, list)).any():
        office_building_footprints[col] = office_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

office_building_footprints = office_building_footprints[['name', 'geometry']].reset_index()
print(office_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
office_building_footprints.to_file("data/osm_data/boroughs_office_buildings.geojson", driver="GeoJSON")


In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

office_buildings_city = ox.features_from_place(place_name, tags={'building': ['office']})
office_buildings_city = office_buildings_city[office_buildings_city.geometry.notnull()]
office_building_footprints_city = office_buildings_city[office_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in office_building_footprints_city.columns:
    if office_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        office_building_footprints_city = office_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

office_building_footprints_city = office_building_footprints_city[['name', 'geometry']].reset_index()

office_building_footprints_combined = pd.concat([office_building_footprints, office_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
office_building_footprints_combined.to_file("data/osm_data/all_london_office_buildings.geojson", driver="GeoJSON")

In [None]:
#Download Data for Birmingham
place_name = 'Birmingham, United Kingdom'

ox.config(use_cache=True, log_console=True)

office_buildings = ox.features_from_place(place_name, tags={'building': ['office']})

office_buildings = office_buildings[office_buildings.geometry.notnull()]
office_building_footprints = office_buildings[office_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in office_building_footprints.columns:
    if office_building_footprints.apply(lambda x: isinstance(x, list)).any():
        office_building_footprints[col] = office_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

office_building_footprints = office_building_footprints[['name', 'geometry']].reset_index()
print(office_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
office_building_footprints.to_file("data/osm_data/bham_office_buildings.geojson", driver="GeoJSON")


### Extract Building Type Info from OSM - Residential

In [None]:
# Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

ox.config(use_cache=True, log_console=True)

residential_buildings = ox.features_from_place(place_name, tags={'building': ['residential']})

residential_buildings = residential_buildings[residential_buildings.geometry.notnull()]
residential_building_footprints = residential_buildings[residential_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in residential_building_footprints.columns:
    if residential_building_footprints.apply(lambda x: isinstance(x, list)).any():
        residential_building_footprints[col] = residential_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

residential_building_footprints = residential_building_footprints[['name', 'geometry']].reset_index()
print(residential_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
residential_building_footprints.to_file("data/osm_data/boroughs_residential_buildings.geojson", driver="GeoJSON")


In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

residential_buildings_city = ox.features_from_place(place_name, tags={'building': ['residential']})
residential_buildings_city = residential_buildings_city[residential_buildings_city.geometry.notnull()]
residential_building_footprints_city = residential_buildings_city[residential_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in residential_building_footprints_city.columns:
    if residential_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        residential_building_footprints_city = residential_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

residential_building_footprints_city = residential_building_footprints_city[['name', 'geometry']].reset_index()

residential_building_footprints_combined = pd.concat([residential_building_footprints, residential_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
residential_building_footprints_combined.to_file("data/osm_data/all_london_residential_buildings.geojson", driver="GeoJSON")

In [None]:
#Download Data for Birmingham
place_name = 'Birmingham, United Kingdom'

ox.config(use_cache=True, log_console=True)

residential_buildings = ox.features_from_place(place_name, tags={'building': ['residential']})

residential_buildings = residential_buildings[residential_buildings.geometry.notnull()]
residential_building_footprints = residential_buildings[residential_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in residential_building_footprints.columns:
    if residential_building_footprints.apply(lambda x: isinstance(x, list)).any():
        residential_building_footprints[col] = residential_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

residential_building_footprints = residential_building_footprints[['name', 'geometry']].reset_index()
print(residential_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
residential_building_footprints.to_file("data/osm_data/bham_residential_buildings.geojson", driver="GeoJSON")


### Extract Building Type Info from OSM - Retail

In [None]:
# Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

ox.config(use_cache=True, log_console=True)

retail_buildings = ox.features_from_place(place_name, tags={'building': ['retail']})

retail_buildings = retail_buildings[retail_buildings.geometry.notnull()]
retail_building_footprints = retail_buildings[retail_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in retail_building_footprints.columns:
    if retail_building_footprints.apply(lambda x: isinstance(x, list)).any():
        retail_building_footprints[col] = retail_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

retail_building_footprints = retail_building_footprints[['name', 'geometry']].reset_index()
print(retail_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
retail_building_footprints.to_file("data/osm_data/boroughs_retail_buildings.geojson", driver="GeoJSON")


In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

retail_buildings_city = ox.features_from_place(place_name, tags={'building': ['retail']})
retail_buildings_city = retail_buildings_city[retail_buildings_city.geometry.notnull()]
retail_building_footprints_city = retail_buildings_city[retail_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in retail_building_footprints_city.columns:
    if retail_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        retail_building_footprints_city = retail_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

retail_building_footprints_city = retail_building_footprints_city[['name', 'geometry']].reset_index()

retail_building_footprints_combined = pd.concat([retail_building_footprints, retail_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
retail_building_footprints_combined.to_file("data/osm_data/all_london_retail_buildings.geojson", driver="GeoJSON")

In [None]:
#Download Data for Birmingham
place_name = 'Birmingham, United Kingdom'

ox.config(use_cache=True, log_console=True)

retail_buildings = ox.features_from_place(place_name, tags={'building': ['retail']})

retail_buildings = retail_buildings[retail_buildings.geometry.notnull()]
retail_building_footprints = retail_buildings[retail_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in retail_building_footprints.columns:
    if retail_building_footprints.apply(lambda x: isinstance(x, list)).any():
        retail_building_footprints[col] = retail_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

retail_building_footprints = retail_building_footprints[['name', 'geometry']].reset_index()
print(retail_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
retail_building_footprints.to_file("data/osm_data/bham_retail_buildings.geojson", driver="GeoJSON")
