In [2]:
# %pip install OSMPythonTools

In [1]:
# %pip install shapely

In [16]:
%pip install osmnx

Collecting osmnx
  Downloading osmnx-2.0.2-py3-none-any.whl.metadata (4.9 kB)
Downloading osmnx-2.0.2-py3-none-any.whl (99 kB)
Installing collected packages: osmnx
Successfully installed osmnx-2.0.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt

import requests
from shapely.geometry import shape
from shapely.geometry import Polygon, LineString, MultiLineString
from shapely.ops import polygonize, unary_union

import osmnx as ox

# from OSMPythonTools.api import Api
# from OSMPythonTools.overpass import Overpass
# from OSMPythonTools.nominatim import Nominatim

# import random
# from shapely.geometry import Point, LineString
# from shapely.ops import unary_union
# from shapely import offset_curve
# from dotenv import load_dotenv
# import requests

%matplotlib inline
pd.set_option('display.max_columns', None)  # Show all columns

In [59]:
uk_boundaries = gpd.read_file("natural_assets_data_raw/uk_boundaries.gpkg")

## Loading protected areas

In [63]:
protected_areas = ox.features_from_place("United Kingdom", tags = {"boundary": "protected_area"})
protected_areas = protected_areas.to_crs(epsg=27700)
protected_areas = protected_areas[protected_areas.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
protected_areas = protected_areas[~protected_areas.geometry.duplicated()].reset_index(drop=True)

In [72]:
protected_areas.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1145 entries, 0 to 1144
Columns: 251 entries, geometry to short_name:en
dtypes: geometry(1), object(250)
memory usage: 2.2+ MB


In [86]:
protected_areas_over100 = protected_areas
protected_areas_over100 = protected_areas_over100[protected_areas_over100["access"] != "no"]
# threshold = 0.95  # 90% NaNs
protected_areas_over100 = protected_areas_over100.loc[:, protected_areas_over100.isna().mean() < threshold]
# protected_areas_over100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1131 entries, 0 to 1144
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   geometry          1131 non-null   geometry
 1   boundary          1131 non-null   object  
 2   protect_class     369 non-null    object  
 3   name              984 non-null    object  
 4   protection_title  322 non-null    object  
 5   wikidata          323 non-null    object  
 6   wikipedia         265 non-null    object  
 7   operator          209 non-null    object  
 8   alt_name          72 non-null     object  
 9   source            339 non-null    object  
 10  leisure           475 non-null    object  
 11  start_date        72 non-null     object  
 12  website           238 non-null    object  
 13  note              57 non-null     object  
 14  designation       466 non-null    object  
 15  natural           131 non-null    object  
 16  name:gd           74 

In [89]:
protected_areas_over100.head(1)

Unnamed: 0,geometry,boundary,protect_class,name,protection_title,wikidata,wikipedia,operator,alt_name,source,leisure,start_date,website,note,designation,natural,name:gd,type
0,"POLYGON ((257432.785 147408.06, 257422.647 147...",protected_area,5,Exmoor National Park,national_park,Q593627,en:Exmoor,,,,,,https://www.exmoor-nationalpark.gov.uk/,Contains public sector information licensed un...,national_park,,,boundary


In [90]:
protected_areas_over100 = protected_areas_over100[protected_areas_over100.geometry.intersects(uk_boundaries.union_all())]

In [92]:
# fig, ax = plt.subplots(figsize=(20, 20))

# protected_areas_over100.plot(ax=ax, cmap='tab20', alpha=0.6)

# plt.title("Combined Features by Group")
# plt.legend()
# plt.axis('off')
# plt.show()

In [95]:
protected_areas_over100.to_file("natural_assets_data_raw/protected_areas.gpkg", layer='protected_areas', driver="GPKG")

## Loading parks

In [3]:
parks = ox.features_from_place("United Kingdom", tags = {"leisure": "park", "access": "public"})
parks = parks.to_crs(epsg=27700)

parks = parks[parks.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
parks = parks[~parks.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [38]:
parks_over100 = parks[parks.geometry.area > 1_000_000]
parks_over100 = parks_over100[parks_over100["access"] != "private"]
threshold = 0.95  # 90% NaNs
parks_over100 = parks_over100.loc[:, parks_over100.isna().mean() < threshold]
parks_over100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 218 entries, 9 to 33798
Data columns (total 21 columns):
 #   Column                            Non-Null Count  Dtype   
---  ------                            --------------  -----   
 0   geometry                          218 non-null    geometry
 1   leisure                           218 non-null    object  
 2   name                              213 non-null    object  
 3   access                            21 non-null     object  
 4   addr:city                         15 non-null     object  
 5   addr:postcode                     15 non-null     object  
 6   alt_name                          13 non-null     object  
 7   website                           43 non-null     object  
 8   tourism                           11 non-null     object  
 9   wikidata                          98 non-null     object  
 10  wikipedia                         71 non-null     object  
 11  operator                          67 non-null     obj

In [39]:
parks_over100.head(1) #see useful comments

Unnamed: 0,geometry,leisure,name,access,addr:city,addr:postcode,alt_name,website,tourism,wikidata,wikipedia,operator,operator:type,wheelchair,note,operator:wikidata,source,listed_status,type,boundary,communication:amateur_radio:pota
9,"POLYGON ((455659.735 205492.151, 455662.648 20...",park,Shotover Country Park,,,,,,,Q24677877,,,,,,,,,multipolygon,,


In [41]:
parks_over100.to_file("natural_assets_data_raw/parks_over100ha.gpkg", layer='parks', driver="GPKG")

## Loading nature reserves

In [42]:
nature_reserves = ox.features_from_place("United Kingdom", tags = {"leisure": "nature_reserve"})
nature_reserves = nature_reserves.to_crs(epsg=27700)

nature_reserves = nature_reserves[nature_reserves.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
nature_reserves = nature_reserves[~nature_reserves.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [57]:
nature_reserves_100 = nature_reserves[parks.geometry.area > 1_000_000]
nature_reserves_100 = nature_reserves_100[~nature_reserves_100["access"].isin(["private", "no"])]
threshold = 0.95  # 90% NaNs
nature_reserves_100 = nature_reserves_100.loc[:, nature_reserves_100.isna().mean() < threshold]
nature_reserves_100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 76 entries, 9 to 3397
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   geometry          76 non-null     geometry
 1   leisure           76 non-null     object  
 2   name              69 non-null     object  
 3   operator          26 non-null     object  
 4   operator:type     4 non-null      object  
 5   source            13 non-null     object  
 6   website           19 non-null     object  
 7   access            4 non-null      object  
 8   wikidata          17 non-null     object  
 9   wikipedia         13 non-null     object  
 10  fixme             4 non-null      object  
 11  designation       7 non-null      object  
 12  natural           14 non-null     object  
 13  boundary          20 non-null     object  
 14  protection_title  4 non-null      object  
 15  name:gd           4 non-null      object  
 16  protect_class     6 non

  result = super().__getitem__(key)


In [58]:
nature_reserves_100["access"].value_counts()

access
permissive    3
permit        1
Name: count, dtype: int64

nature_reserves_100 = nature_reserves_100[nature_reserves_100.geometry.intersects(uk_boundaries.union_all())]

In [61]:
nature_reserves_100.to_file("natural_assets_data_raw/nature_reserves_over100ha.gpkg", layer='nature_reserves', driver="GPKG")

In [94]:
# fig, ax = plt.subplots(figsize=(20, 20))

# nature_reserves_100.plot(ax=ax, cmap='tab20', alpha=0.6)

# plt.title("Combined Features by Group")
# plt.legend()
# plt.axis('off')
# plt.show()

## Loading gardens

In [96]:
gardens = ox.features_from_place("United Kingdom", tags = {"leisure": "garden"})
gardens = gardens.to_crs(epsg=27700)
gardens = gardens[gardens.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
gardens = gardens[~gardens.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [99]:
gardens.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 284045 entries, 0 to 284044
Columns: 384 entries, geometry to power
dtypes: geometry(1), object(383)
memory usage: 832.2+ MB


In [101]:
gardens_100 = gardens[gardens.geometry.area > 100_000]
gardens_100 = gardens_100[~gardens_100["access"].isin(["private", "no"])]
threshold = 0.95  # 90% NaNs
gardens_100 = gardens_100.loc[:, gardens_100.isna().mean() < threshold]
# gardens_100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 119 entries, 1 to 264325
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   geometry       119 non-null    geometry
 1   leisure        119 non-null    object  
 2   name           78 non-null     object  
 3   wikidata       43 non-null     object  
 4   wikipedia      26 non-null     object  
 5   opening_hours  9 non-null      object  
 6   garden:type    31 non-null     object  
 7   website        30 non-null     object  
 8   tourism        24 non-null     object  
 9   access         21 non-null     object  
 10  source         17 non-null     object  
 11  email          10 non-null     object  
 12  fee            18 non-null     object  
 13  operator       23 non-null     object  
 14  operator:type  8 non-null      object  
 15  addr:city      13 non-null     object  
 16  addr:postcode  14 non-null     object  
 17  phone          13 non-null   

In [103]:
# fig, ax = plt.subplots(figsize=(20, 20))

# gardens_100.plot(ax=ax, cmap='tab20', alpha=0.6)

# plt.title("Combined Features by Group")
# plt.legend()
# plt.axis('off')
# plt.show()

In [105]:
gardens_100.to_file("natural_assets_data_raw/gardens_over10ha.gpkg", layer='gardens', driver="GPKG")

## Loading beaches

In [None]:
beaches = ox.features_from_place("United Kingdom", tags = {"natural": "beach"})
beaches = beaches.to_crs(epsg=27700)
beaches = beaches[beaches.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
beaches = beaches[~beaches.geometry.duplicated()].reset_index(drop=True)

In [3]:
natural_values = [
    "wood"
    # , "tree", "tree_row", "scrub", "heath", "grassland", "fell", "bare_rock",
    # "scree", "shingle", "sand", "mud", "beach", "dune", "cliff", "rock", "stone",
    # "water", "wetland", "glacier", "reef", "cave_entrance"
]

features_2 = {}

for val in natural_values:
    tag_2 = {"natural": val}
    gdf = fetch_osm_features("United Kingdom", tag_2, element_types=("way", "relation"))
    if gdf is not None and not gdf.empty:
        features_2[val] = gdf
        print(f"{val}: {len(gdf)} features")
    else:
        print(f"{val}: No features")

NameError: name 'requests' is not defined

In [None]:
tags = {
  "type": "railwayStation"
}

stations = fetch_osm_features("England", tags, element_types=("nodes"))

print(stations.head())
print(f"Total features: {len(stations)}")

# Save or plot
# gdf.to_file("filtered_features.geojson", driver="GeoJSON")
stations.plot()