## Dissertation Data Collection
### Gavin Rolls

Migrated from OvertureTest.ipynb, these are the queries I used to save OSM and Overture data locally for Greater London. Because it's easier to grab the data I need from my desktop as opposed to running these queries every time I need data, I'm leaving them here

### Imports + Database Configuration

In [11]:
#Library Imports - using DuckDB for Overture Import

#Database
import duckdb

#Basics
import pandas as pd
import geopandas as gpd

#OpenStreetMap
import osmnx as ox

In [10]:
#Config SQL
%pip install ipython-sql duckdb duckdb-engine jupysql --quiet
%pip install --upgrade grpcio --quiet
%load_ext sql

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [11]:
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False
%sql duckdb:///:memory:

In [12]:
%%sql      
INSTALL httpfs;

LOAD httpfs;

INSTALL spatial;

LOAD spatial;

SET s3_region='us-west-2';

Unnamed: 0,Success


### Download Buildings - Overture

Downloaded as 'data/london_buildings_overture.geojson'

In [16]:
#Get London Bounding Box

# Initialize the geolocator
geolocator = Nominatim(user_agent="geoapi")

# Get location data
location = geolocator.geocode("London")

# Get the bounding box
bounding_box = location.raw['boundingbox']

# Convert bounding box to coordinates
min_lat, max_lat = float(bounding_box[0]), float(bounding_box[1])
min_lon, max_lon = float(bounding_box[2]), float(bounding_box[3])

print(min_lon)
print(min_lat)
print(max_lon)
print(max_lat)

-0.5103751
51.2867601
0.3340155
51.6918741


In [None]:
%%sql

LOAD azure;

SET azure_storage_connection_string = 'DefaultEndpointsProtocol=https;AccountName=overturemapswestus2;AccountKey=;EndpointSuffix=core.windows.net';
COPY (
SELECT
    names.primary as primary_name,
    height,
    level,
    ST_GeomFromWKB(geometry) as geometry
FROM read_parquet('azure://release/2024-05-16-beta.0/theme=buildings/type=building/*', filename=true, hive_partitioning=1)
WHERE primary_name IS NOT NULL
AND bbox.xmin > -0.5103751
AND bbox.xmax < 0.3340155
AND bbox.ymin > 51.2867601
AND bbox.ymax < 51.6918741
) TO 'data/overture_data/london_buildings_overture.geojson'
WITH (FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');


### Download POIs - Overture

Downloaded as 'data/london_places_overture.geojson''

In [None]:
%%sql

COPY (
    SELECT
        names.primary AS name,
        categories.main as category,
        ROUND(confidence,2) as confidence,
        ST_GeomFromWKB(geometry) as geometry
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-05-16-beta.0/theme=places/*/*')
WHERE
    bbox.xmin BETWEEN -0.5103751 AND 0.3340155 AND
    bbox.ymin BETWEEN 51.2867601 AND 51.6918741
) TO 'data/overture_data/london_places_overture.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');

### Overture Download Stats

In [18]:
#Basic overview stats of London Data
buildings = gpd.read_file('data/overture_data/london_buildings_overture.geojson')
places = gpd.read_file('data/overture_data/london_places_overture.geojson')

#Count of Features
print("London Building Count: " + str(buildings.shape[0]))
print("London POI count: " + str(places.shape[0]))

London Building Count: 61364
London POI count: 343712


### Download Buildings - OSM

Downloaded as 'data/osmbuildings/building_footprints.geojson'

In [None]:
#Add City of London
place_name = 'City of London, England, United Kingdom'

buildings_city = ox.features_from_place(place_name, tags={'building': True})
buildings_city = buildings_city[buildings_city.geometry.notnull()]
building_footprints_city = buildings_city[buildings_city.geom_type.isin(['Polygon', 'MultiPolygon'])]

for col in building_footprints_city.columns:
    if building_footprints_city.apply(lambda x: isinstance(x, list)).any():
        building_footprints_city = building_footprints_city[col].apply(lambda x: str(x) if isinstance(x, list) else x)

building_footprints_city = building_footprints_city[['name', 'geometry']].reset_index()

building_footprints_combined = pd.concat([building_footprints, building_footprints_city], ignore_index=True)
        
# Save the combined GeoDataFrame to a geojson file
building_footprints_combined.to_file("data/osm_data/all_london_buildings.geojson", driver="GeoJSON")