## Dissertation Data Collection
### Gavin Rolls

Migrated from OvertureTest.ipynb, these are the queries I used to save OSM and Overture data locally for Greater London. Because it's easier to grab the data I need from my desktop as opposed to running these queries every time I need data, I'm leaving them here

### Imports + Database Configuration

In [7]:
#Library Imports - using DuckDB for Overture Import

#Database
import duckdb

#Basics
import pandas as pd
import geopandas as gpd

#OpenStreetMap
import osmnx as ox

In [10]:
#Config SQL
%pip install ipython-sql duckdb duckdb-engine jupysql --quiet
%pip install --upgrade grpcio --quiet
%load_ext sql

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [11]:
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False
%sql duckdb:///:memory:

In [12]:
%%sql      
INSTALL httpfs;

LOAD httpfs;

INSTALL spatial;

LOAD spatial;

SET s3_region='us-west-2';

Unnamed: 0,Success


### Download Buildings - Overture

Downloaded as 'data/london_buildings_overture.geojson'

In [16]:
#Get London Bounding Box

# Initialize the geolocator
geolocator = Nominatim(user_agent="geoapi")

# Get location data
location = geolocator.geocode("London")

# Get the bounding box
bounding_box = location.raw['boundingbox']

# Convert bounding box to coordinates
min_lat, max_lat = float(bounding_box[0]), float(bounding_box[1])
min_lon, max_lon = float(bounding_box[2]), float(bounding_box[3])

print(min_lon)
print(min_lat)
print(max_lon)
print(max_lat)

-0.5103751
51.2867601
0.3340155
51.6918741


In [None]:
%%sql

LOAD azure;

SET azure_storage_connection_string = 'DefaultEndpointsProtocol=https;AccountName=overturemapswestus2;AccountKey=;EndpointSuffix=core.windows.net';
COPY (
SELECT
    names.primary as primary_name,
    height,
    level,
    ST_GeomFromWKB(geometry) as geometry
FROM read_parquet('azure://release/2024-05-16-beta.0/theme=buildings/type=building/*', filename=true, hive_partitioning=1)
WHERE primary_name IS NOT NULL
AND bbox.xmin > -0.5103751
AND bbox.xmax < 0.3340155
AND bbox.ymin > 51.2867601
AND bbox.ymax < 51.6918741
) TO 'data/overture_data/london_buildings_overture.geojson'
WITH (FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');


### Download POIs - Overture

Downloaded as 'data/london_places_overture.geojson''

In [None]:
%%sql

COPY (
    SELECT
        names.primary AS name,
        categories.main as category,
        ROUND(confidence,2) as confidence,
        ST_GeomFromWKB(geometry) as geometry
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-05-16-beta.0/theme=places/*/*')
WHERE
    bbox.xmin BETWEEN -0.5103751 AND 0.3340155 AND
    bbox.ymin BETWEEN 51.2867601 AND 51.6918741
) TO 'data/overture_data/london_places_overture.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');

### Overture Download Stats

In [18]:
#Basic overview stats of London Data - Using the locally saved files here
buildings = gpd.read_file('data/overture_data/london_buildings_overture.geojson')
places = gpd.read_file('data/overture_data/london_places_overture.geojson')

#Count of Features
print("London Building Count: " + str(buildings.shape[0]))
print("London POI count: " + str(places.shape[0]))

London Building Count: 61364
London POI count: 343712


### Download Buildings - OSM

Downloaded as 'data/osmbuildings/building_footprints.geojson'

In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

buildings_city = ox.features_from_place(place_name, tags={'building': True})
buildings_city = buildings_city[buildings_city.geometry.notnull()]
building_footprints_city = buildings_city[buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in building_footprints_city.columns:
    if building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        building_footprints_city = building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

building_footprints_city = building_footprints_city[['name', 'geometry']].reset_index()

building_footprints_combined = pd.concat([building_footprints, building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
building_footprints_combined.to_file("data/osm_data/all_london_buildings.geojson", driver="GeoJSON")

### Extract Building Type Info from OSM - Commercial

In [14]:
#Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

commercial_buildings = ox.features_from_place(place_name, tags={'building': ['commercial']})

commercial_buildings = commercial_buildings[commercial_buildings.geometry.notnull()]
commercial_building_footprints = commercial_buildings[commercial_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in commercial_building_footprints.columns:
    if commercial_building_footprints.apply(lambda x: isinstance(x, list)).any():
        commercial_building_footprints = commercial_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

commercial_building_footprints = commercial_building_footprints[['name', 'geometry']].reset_index()
print(commercial_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
commercial_building_footprints.to_file("data/osm_data/boroughs_commercial_buildings.geojson", driver="GeoJSON")

  ox.config(use_cache=True, log_console=True)


     element_type     osmid                      name  \
0             way   4076571  Bromley Road Retail Park   
1             way   4076587   Catford Delivery Office   
2             way   5790535              Refuge House   
3             way   5986754         One Canada Square   
4             way   5987126                       NaN   
...           ...       ...                       ...   
5187     relation  13472609                Hyde House   
5188     relation  13535999           Old Guy's House   
5189     relation  15685271                       NaN   
5190     relation  15761586               China Works   
5191     relation  16030676           Camelford House   

                                               geometry  
0     POLYGON ((-0.02063 51.43669, -0.02047 51.43631...  
1     POLYGON ((-0.02001 51.43859, -0.02004 51.43865...  
2     POLYGON ((-0.07800 51.65301, -0.07777 51.65303...  
3     POLYGON ((-0.02002 51.50477, -0.01996 51.50500...  
4     POLYGON ((-0.02588 

In [20]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

commercial_buildings_city = ox.features_from_place(place_name, tags={'building': True})
commercial_buildings_city = commercial_buildings_city[commercial_buildings_city.geometry.notnull()]
commercial_building_footprints_city = commercial_buildings_city[commercial_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in commercial_building_footprints_city.columns:
    if commercial_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        commercial_building_footprints_city = commercial_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

commercial_building_footprints_city = commercial_building_footprints_city[['name', 'geometry']].reset_index()

commercial_building_footprints_combined = pd.concat([commercial_building_footprints, commercial_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
commercial_building_footprints_combined.to_file("data/osm_data/all_london_commercial_buildings.geojson", driver="GeoJSON")

### Extract Building Type Info from OSM - Office

In [22]:
# Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

ox.config(use_cache=True, log_console=True)

office_buildings = ox.features_from_place(place_name, tags={'building': ['office']})

office_buildings = office_buildings[office_buildings.geometry.notnull()]
office_building_footprints = office_buildings[office_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in office_building_footprints.columns:
    if office_building_footprints.apply(lambda x: isinstance(x, list)).any():
        office_building_footprints[col] = office_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

office_building_footprints = office_building_footprints[['name', 'geometry']].reset_index()
print(office_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
office_building_footprints.to_file("data/osm_data/boroughs_office_buildings.geojson", driver="GeoJSON")


  ox.config(use_cache=True, log_console=True)


     element_type     osmid                       name  \
0             way   4237383                        ITN   
1             way   5251705                        NaN   
2             way   5986805                    HSBC UK   
3             way   5986920                       KPMG   
4             way   5986929                   Barclays   
...           ...       ...                        ...   
1714     relation  12566775                        NaN   
1715     relation  12581563                BBC Studios   
1716     relation  14168517  The Featherstone Building   
1717     relation  15690226                 The Hudson   
1718     relation  16030675             Tintagel House   

                                               geometry  
0     POLYGON ((-0.11445 51.52304, -0.11478 51.52344...  
1     POLYGON ((-0.23366 51.40761, -0.23281 51.40822...  
2     POLYGON ((-0.01792 51.50522, -0.01794 51.50525...  
3     POLYGON ((-0.01680 51.50508, -0.01663 51.50569...  
4     POLYGON

In [24]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

office_buildings_city = ox.features_from_place(place_name, tags={'building': True})
office_buildings_city = office_buildings_city[office_buildings_city.geometry.notnull()]
office_building_footprints_city = office_buildings_city[office_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in office_building_footprints_city.columns:
    if office_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        office_building_footprints_city = office_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

office_building_footprints_city = office_building_footprints_city[['name', 'geometry']].reset_index()

office_building_footprints_combined = pd.concat([office_building_footprints, office_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
office_building_footprints_combined.to_file("data/osm_data/all_london_office_buildings.geojson", driver="GeoJSON")

### Extract Building Type Info from OSM - Residential

In [27]:
# Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

ox.config(use_cache=True, log_console=True)

residential_buildings = ox.features_from_place(place_name, tags={'building': ['residential']})

residential_buildings = residential_buildings[residential_buildings.geometry.notnull()]
residential_building_footprints = residential_buildings[residential_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in residential_building_footprints.columns:
    if residential_building_footprints.apply(lambda x: isinstance(x, list)).any():
        residential_building_footprints[col] = residential_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

residential_building_footprints = residential_building_footprints[['name', 'geometry']].reset_index()
print(residential_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
residential_building_footprints.to_file("data/osm_data/boroughs_residential_buildings.geojson", driver="GeoJSON")


  ox.config(use_cache=True, log_console=True)


      element_type     osmid            name  \
0              way   4948114    Kemble House   
1              way   4948116  Kettleby House   
2              way   4948118   Woolley House   
3              way   5131193             NaN   
4              way   5131194             NaN   
...            ...       ...             ...   
83761     relation  16389021     Birch House   
83762     relation  16389022  Lansdell House   
83763     relation  16847821             NaN   
83764     relation  17193995             NaN   
83765     relation  17729875             NaN   

                                                geometry  
0      POLYGON ((-0.10656 51.46576, -0.10645 51.46582...  
1      POLYGON ((-0.10571 51.46618, -0.10559 51.46624...  
2      POLYGON ((-0.10486 51.46659, -0.10473 51.46665...  
3      POLYGON ((-0.19992 51.46981, -0.19992 51.46987...  
4      POLYGON ((-0.19955 51.46940, -0.19955 51.46980...  
...                                                  ...  
83761  POL

In [29]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

residential_buildings_city = ox.features_from_place(place_name, tags={'building': True})
residential_buildings_city = residential_buildings_city[residential_buildings_city.geometry.notnull()]
residential_building_footprints_city = residential_buildings_city[residential_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in residential_building_footprints_city.columns:
    if residential_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        residential_building_footprints_city = residential_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

residential_building_footprints_city = residential_building_footprints_city[['name', 'geometry']].reset_index()

residential_building_footprints_combined = pd.concat([residential_building_footprints, residential_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
residential_building_footprints_combined.to_file("data/osm_data/all_london_residential_buildings.geojson", driver="GeoJSON")

### Extract Building Type Info from OSM - Retail

In [None]:
# Download Data for 32 Boroughs (Exc. City of London)
place_name = 'London, United Kingdom'

ox.config(use_cache=True, log_console=True)

retail_buildings = ox.features_from_place(place_name, tags={'building': ['retail']})

retail_buildings = retail_buildings[retail_buildings.geometry.notnull()]
retail_building_footprints = retail_buildings[retail_buildings.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in retail_building_footprints.columns:
    if retail_building_footprints.apply(lambda x: isinstance(x, list)).any():
        retail_building_footprints[col] = retail_building_footprints[col].apply(lambda x: str(x) if isinstance(x, list) else x)

retail_building_footprints = retail_building_footprints[['name', 'geometry']].reset_index()
print(retail_building_footprints)
        
# Save the combined GeoDataFrame to a geojson file
retail_building_footprints.to_file("data/osm_data/boroughs_retail_buildings.geojson", driver="GeoJSON")


In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

retail_buildings_city = ox.features_from_place(place_name, tags={'building': True})
retail_buildings_city = retail_buildings_city[retail_buildings_city.geometry.notnull()]
retail_building_footprints_city = retail_buildings_city[retail_buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in retail_building_footprints_city.columns:
    if retail_building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        retail_building_footprints_city = retail_building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

retail_building_footprints_city = retail_building_footprints_city[['name', 'geometry']].reset_index()

retail_building_footprints_combined = pd.concat([retail_building_footprints, retail_building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
retail_building_footprints_combined.to_file("data/osm_data/all_london_retail_buildings.geojson", driver="GeoJSON")