In [1]:
import osmnx as ox
import pandas as pd
import geopandas as gpd
import os

In [2]:
columns_to_keep = ['geometry', 'name', 'id', 'description', 'area_type', 'category']

## Adapt the boundaries and file name to the desired environment to retrieve data from

Make sure you created a folder

In [3]:
boundaries = [
    'Alphen aan den Rijn, Zuid-Holland, Netherlands',
    'Waddinxveen, Zuid-Holland, Netherlands',
    'Boskoop, Zuid-Holland, Netherlands'
]

# boundaries = ['Breda, Noord-Brabant, Netherlands']

# boundaries = ['Borsele, Zeeland, Netherlands']

In [4]:
city = 'alphen-waddinxveen'
# city = 'breda'
# city = 'borsele'

In [5]:
file_name = f'{city}.geojson'

In [6]:
output_path = "output/" + city

os.makedirs(output_path, exist_ok=True)

In [7]:
save_separate_files = False

# Mapping

In [8]:
def determine_area_type_from_row(row):
    # Transportation
    if "highway" in row:
        if row["highway"] in ["motorway", "trunk", "primary"]:
            return "Motorways and major roads"
        elif row["highway"] in ["secondary", "tertiary", "residential"]:
            return "Regional roads"
        elif row["highway"] in ["track", "unclassified", "service"]:
            return "Tracks and rural access roads"
        elif row["highway"] in ["footway", "cycleway", "path", "bridleway" ,"pedestrian"]:
            return "Pedestrian and cycling paths"
        elif row["highway"] in ["living_street"]:
            return "Living and residential streets"

    if "railway" in row and row["railway"] in ["rail", "light_rail"]:
        return "Railways"

    # Power-related
    if "power" in row and row["power"] in ["line", "tower"]:
        return "Power lines"
    if "power" in row and row["power"] == "plant":
        return "Power plants"

    # Man-made infrastructure
    if "man_made" in row:
        val = row["man_made"]
        if val == "waste_water_plant":
            return "Power plants"
        elif val in ["communications_tower", "mast"]:
            return "Communication towers"
        elif val in ["water_tower", "silo", "chimney", "tank", "windmill", "wind_turbine"]:
            return "High infrastructures"

    # Land use types
    if "landuse" in row:
        val = row["landuse"]
        if val == "industrial":
            return "Industrial zones"
        elif val == "commercial":
            return "Commercial zones"
        elif val == "retail":
            return "Retail zones"
        elif val == "residential":
            return "Residential areas"
        elif val == "recreation_ground":
            return "Recreational zones"
        elif val in ["farmland", "orchard"]:
            return "Agricultural lands"

    # Nature areas
    if "landuse" in row:
        val = row["landuse"]
        if val in ["forest", "wood"]:
            return "Forests and woodlands"
        elif val in ["meadow", "scrub", "grassland", "grass"]:
            return "Meadows and open grass"

    if "natural" in row and row["natural"] == "wetland":
        return "Wetlands"

    if "wetland" in row and row["wetland"] in ["marsh", "bog", "fen"]:
        return "Wetlands"

    # Water areas
    if "waterway" in row and row["waterway"] in ["river", "stream", "canal"]:
        return "Rivers, canals and streams"

    if "water" in row and row["water"] in ["lake", "reservoir", "pond"]:
        return "Lakes and ponds"

    if "water" in row and row["water"] in ["reservoir"]:
        return "Reservoirs"

    # Public facilities
    if "amenity" in row:
        if row["amenity"] in ["school", "kindergarten", "university"]:
            return "Schools and universities"
        if row["amenity"] == "hospital":
            return "Hospitals"
        if row["amenity"] == "prison":
            return "Prisons"
        if row["amenity"] == "place_of_worship":
            return "Religious sites"
        if "historic" in row:
            return "Cultural sites"
        if "tourism" in row and row["tourism"] == "attraction":
            return "Cultural sites"
        if "landuse" in row and row["landuse"] == "cemetery":
            return "Cemeteries"
        if "leisure" in row and row["leisure"] == "park":
            return "Parks"

    # Airports and aviation
    if "aeroway" in row:
        val = row["aeroway"]
        if val in ["aerodrome", "airstrip", "heliport", 'apron']:
            return "Airports and airfields"
        elif val == "helipad":
            return "Helipads"
        elif val in ["runway", "aprons"]:
            return "Runways and aprons"
        elif val in ["terminal", "gate"]:
            return "Passenger terminals"
        elif val in ["hangar"]:
            return "Hangars and support buildings"

    if "man_made" in row and row["man_made"] == "control_tower":
        return "Hangars and support buildings"


    return "Unknown"



## Transportation infrastructure

In [9]:
transportation_tags = {
    "highway": [
        "motorway", "trunk", "primary",         # Motorways and major roads
        "secondary", "tertiary", "residential", # Regional/local roads
        "living_street",                        # Living streets    
        "track", "unclassified", "service",     # Tracks and rural access roads
        "footway", "cycleway", "path", "bridleway", "pedestrian"  # Pedestrian/cycling paths
    ],
    "railway": [
        "rail", "light_rail"                    # Railways
    ]
}

In [10]:
# Get data from OSM
transportation_data = ox.features_from_place(boundaries, tags=transportation_tags)

In [11]:
transportation_data.reset_index(inplace=True)

In [12]:
transportation_data["area_type"] = transportation_data.apply(determine_area_type_from_row, axis=1)


In [13]:
transportation_data['category'] = "Transportation"

In [14]:
for column in columns_to_keep:
    if column not in transportation_data.columns:
        transportation_data[column] = None

transportation_data = transportation_data[columns_to_keep]

In [15]:
transportation_data['area_type'].unique()

array(['Pedestrian and cycling paths', 'Tracks and rural access roads',
       'Regional roads', 'Living and residential streets',
       'Motorways and major roads', 'Railways'], dtype=object)

In [16]:
if save_separate_files:
    transportation_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/transportation_{file_name}')

## Energy/Industrial

In [17]:
energy_industrial_tags = {
    "power": ["line", "plant", "tower"],
    "man_made": [
        "waste_water_plant", 
        "communications_tower", 
        "mast", 
        "windmill", 
        "wind_turbine", 
        "water_tower", 
        "silo", 
        "chimney", 
        "tank"
    ]
}

In [18]:
industrial_data = ox.features_from_place(boundaries, tags=energy_industrial_tags)

In [19]:
industrial_data.reset_index(inplace=True)

In [20]:
industrial_data['category'] = "Energy/Industrial"

In [21]:
industrial_data["area_type"] = industrial_data.apply(determine_area_type_from_row, axis=1)


In [22]:
industrial_data = industrial_data[columns_to_keep]

In [23]:
if save_separate_files:
    industrial_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/energy_industrial_{file_name}')

## Land use

In [24]:
land_use_tags = {
    "landuse": [
        "industrial",
        "commercial",
        "retail",
        "residential",
        "recreation_ground",
        "farmland",
        "orchard"
    ]
}

In [25]:
land_use_data = ox.features_from_place(boundaries, tags=land_use_tags)

In [26]:
land_use_data.reset_index(inplace=True)

In [27]:
land_use_data['category'] = "Land use"

In [28]:
land_use_data["area_type"] = land_use_data.apply(determine_area_type_from_row, axis=1)

In [29]:
for column in columns_to_keep:
    if column not in land_use_data.columns:
        land_use_data[column] = None

land_use_data = land_use_data[columns_to_keep]

In [30]:
land_use_data['area_type'].unique()

array(['Agricultural lands', 'Residential areas', 'Industrial zones',
       'Power plants', 'Recreational zones', 'Retail zones',
       'Commercial zones'], dtype=object)

In [31]:
if save_separate_files:
    land_use_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/land_use_{file_name}')

## Nature

In [32]:
nature_tags = {
    "landuse": ["forest", "wood", "scrub", "meadow", "grass", "grassland"],
    "natural": ["wetland"],
    "wetland": ["marsh", "bog", "fen"]
}

In [33]:
nature_data = ox.features_from_place(boundaries, tags=nature_tags)


In [34]:
nature_data.reset_index(inplace=True)

In [35]:
nature_data["area_type"] = nature_data.apply(determine_area_type_from_row, axis=1)

In [36]:
nature_data['category'] = "Nature"

In [37]:
for column in columns_to_keep:
    if column not in nature_data.columns:
        nature_data[column] = None

nature_data = nature_data[columns_to_keep]

In [38]:
if save_separate_files:
    nature_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/nature_{file_name}')

## Water

In [39]:
water_tags = {
    "waterway": ["river", "stream", "canal"],
    "water": ["lake", "reservoir", "pond", "bog", "fen"] 
}

In [40]:
water_data = ox.features_from_place(boundaries, tags=water_tags)

In [41]:
water_data.reset_index(inplace=True)

In [42]:
water_data["area_type"] = water_data.apply(determine_area_type_from_row, axis=1)


In [43]:
water_data['category'] = "Water"

In [44]:
for column in columns_to_keep:
    if column not in water_data.columns:
        water_data[column] = None

water_data = water_data[columns_to_keep]

In [45]:
if save_separate_files:
    water_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/water_{file_name}')

## Public facility

In [46]:
public_facility_tags = {
    "amenity": [
        "school",
        "kindergarten",
        "university",
        "hospital",
        "prison",
        "place_of_worship"],
    "historic": ["*"], 
    "tourism": ["attraction"],
    "landuse": ["cemetery"],
    "leisure": ["park"]
}

In [47]:
public_data = ox.features_from_place(boundaries, tags=public_facility_tags)

In [48]:
public_data.reset_index(inplace=True)

In [49]:
public_data['category'] = 'Public facility'

In [50]:
public_data["area_type"] = public_data.apply(determine_area_type_from_row, axis=1)

In [51]:
for column in columns_to_keep:
    if column not in public_data.columns:
        public_data[column] = None

public_data = public_data[columns_to_keep]

In [52]:
if save_separate_files:
    public_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/public_facilities_{file_name}')

## Airspace/Aviation

In [53]:
aviation_tags = {
    "aeroway": [
        "aerodrome",       # Airports and airfields
        "airstrip",        # Basic landing strips
        "heliport",        # Dedicated helicopter facilities
        "helipad",         # Individual helicopter landing pads
        "runway",          # Aircraft runways
        "taxiway",         # Taxi paths for aircraft
        "apron",           # Aircraft parking areas
        "terminal",        # Passenger terminals
        "gate",            # Boarding gates
        "hangar",          # Aircraft storage buildings
        "control_tower",   # Air traffic control towers
        "navigationaid",   # Navigation aids like beacons
        "launchpad",       # Rocket launch facilities
        "spaceport",       # Spaceflight launch sites
        "highway_strip"    # Roadways used as emergency runways
    ],
    "man_made": [
        "control_tower"    # Air traffic control towers
    ]
}


In [54]:
aviation_data = ox.features_from_place(boundaries, tags=aviation_tags)

InsufficientResponseError: No matching features. Check query location, tags, and log.

In [55]:
aviation_data.reset_index(inplace=True)

NameError: name 'aviation_data' is not defined

In [56]:
aviation_data

NameError: name 'aviation_data' is not defined

In [57]:
aviation_data["area_type"] = aviation_data.apply(determine_area_type_from_row, axis=1)

NameError: name 'aviation_data' is not defined

In [58]:
aviation_data

NameError: name 'aviation_data' is not defined

In [59]:
aviation_data['category'] = "Airports/Aviation"

NameError: name 'aviation_data' is not defined

In [60]:
for column in columns_to_keep:
    if column not in aviation_data.columns:
        aviation_data[column] = None

aviation_data = aviation_data[columns_to_keep]


NameError: name 'aviation_data' is not defined

In [61]:
aviation_data

NameError: name 'aviation_data' is not defined

In [62]:
if save_separate_files:
    aviation_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/aviation_{file_name}')

# Postnl

In [63]:
post_tags = {"amenity": ["post_office", "post_box", "post_depot"]}

In [64]:
post_data = ox.features_from_place(boundaries, tags=post_tags)

In [65]:
post_data.reset_index(inplace=True)

In [66]:
for column in columns_to_keep:
    if column not in post_data.columns:
        post_data[column] = None

post_data = post_data[columns_to_keep]

In [67]:
post_data['area_type'] = 'postnl point'
post_data['category'] = 'PostNL'

In [68]:
post_data

Unnamed: 0,geometry,name,id,description,area_type,category
0,POINT (4.64633 52.04288),,302330426,,postnl point,PostNL
1,POINT (4.63595 52.03715),PostNL,302330621,,postnl point,PostNL
2,POINT (4.66225 52.04385),,303648200,,postnl point,PostNL
3,POINT (4.65306 52.03063),,311524912,,postnl point,PostNL
4,POINT (4.6482 52.06488),PostNL Pakketpunt,348127872,,postnl point,PostNL
...,...,...,...,...,...,...
76,POINT (4.65087 52.12642),,7105101668,,postnl point,PostNL
77,POINT (4.64873 52.13501),,7220656837,,postnl point,PostNL
78,POINT (4.65486 52.0234),,8901678928,,postnl point,PostNL
79,POINT (4.5786 52.09552),,11333642753,,postnl point,PostNL


In [69]:
if save_separate_files:
    post_data.to_file(f'/Users/cmartens/Documents/Thesis/model/OSM_data/output/{folder_name}/post_{file_name}')

# Concat

In [70]:
categories = [
    transportation_data,
    industrial_data,
    land_use_data,
    nature_data,
    water_data,
    public_data,
    #aviation_data,
    post_data
]

# Filter out None
categories = [cat for cat in categories if cat is not None]

# Concat if there's anything to concat
if categories:
    df = pd.concat(categories, ignore_index=True)
else:
    print("No data to combine.")


In [71]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df['area_type'].value_counts())

area_type
Pedestrian and cycling paths      10112
Tracks and rural access roads      6581
Meadows and open grass             6356
Regional roads                     4069
Forests and woodlands              1832
Living and residential streets      930
Motorways and major roads           728
Agricultural lands                  456
Rivers, canals and streams          291
Communication towers                208
Railways                            152
Residential areas                    98
Schools and universities             97
postnl point                         81
Cultural sites                       73
Religious sites                      63
Industrial zones                     54
Wetlands                             51
Power lines                          40
High infrastructures                 33
Retail zones                         13
Lakes and ponds                      12
Recreational zones                    6
Power plants                          5
Commercial zones              

In [72]:
gdf = gpd.GeoDataFrame(df, crs="EPSG:4326")

In [73]:
# if the whole row is dubplicate, drop it, print how many were dropped
print(f"Number of duplicates before dropping: {len(gdf)}")

gdf = gdf.drop_duplicates(subset=['geometry', 'name', 'id', 'description'])
print(f"Number of duplicates after dropping: {len(gdf)}")


Number of duplicates before dropping: 32348
Number of duplicates after dropping: 32346


In [75]:
gdf.to_file(f'/Users/cmartens/Documents/thesis_cf_martens/1.get_osm_data/{output_path}/osm_data_{city}.geojson', driver='GeoJSON')