In [2]:
# %pip install OSMPythonTools

In [1]:
# %pip install shapely

In [16]:
%pip install osmnx

Collecting osmnx
  Downloading osmnx-2.0.2-py3-none-any.whl.metadata (4.9 kB)
Downloading osmnx-2.0.2-py3-none-any.whl (99 kB)
Installing collected packages: osmnx
Successfully installed osmnx-2.0.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt

import requests
from shapely.geometry import shape
from shapely.geometry import Polygon, LineString, MultiLineString
from shapely.ops import polygonize, unary_union

import osmnx as ox

# from OSMPythonTools.api import Api
# from OSMPythonTools.overpass import Overpass
# from OSMPythonTools.nominatim import Nominatim

# import random
# from shapely.geometry import Point, LineString
# from shapely.ops import unary_union
# from shapely import offset_curve
# from dotenv import load_dotenv
# import requests

%matplotlib inline
pd.set_option('display.max_columns', None)  # Show all columns

In [59]:
uk_boundaries = gpd.read_file("natural_assets_data_raw/uk_boundaries.gpkg")

## Loading protected areas

In [63]:
protected_areas = ox.features_from_place("United Kingdom", tags = {"boundary": "protected_area"})
protected_areas = protected_areas.to_crs(epsg=27700)
protected_areas = protected_areas[protected_areas.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
protected_areas = protected_areas[~protected_areas.geometry.duplicated()].reset_index(drop=True)

In [72]:
protected_areas.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1145 entries, 0 to 1144
Columns: 251 entries, geometry to short_name:en
dtypes: geometry(1), object(250)
memory usage: 2.2+ MB


In [86]:
protected_areas_over100 = protected_areas
protected_areas_over100 = protected_areas_over100[protected_areas_over100["access"] != "no"]
# threshold = 0.95  # 90% NaNs
protected_areas_over100 = protected_areas_over100.loc[:, protected_areas_over100.isna().mean() < threshold]
# protected_areas_over100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1131 entries, 0 to 1144
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   geometry          1131 non-null   geometry
 1   boundary          1131 non-null   object  
 2   protect_class     369 non-null    object  
 3   name              984 non-null    object  
 4   protection_title  322 non-null    object  
 5   wikidata          323 non-null    object  
 6   wikipedia         265 non-null    object  
 7   operator          209 non-null    object  
 8   alt_name          72 non-null     object  
 9   source            339 non-null    object  
 10  leisure           475 non-null    object  
 11  start_date        72 non-null     object  
 12  website           238 non-null    object  
 13  note              57 non-null     object  
 14  designation       466 non-null    object  
 15  natural           131 non-null    object  
 16  name:gd           74 

In [89]:
protected_areas_over100.head(1)

Unnamed: 0,geometry,boundary,protect_class,name,protection_title,wikidata,wikipedia,operator,alt_name,source,leisure,start_date,website,note,designation,natural,name:gd,type
0,"POLYGON ((257432.785 147408.06, 257422.647 147...",protected_area,5,Exmoor National Park,national_park,Q593627,en:Exmoor,,,,,,https://www.exmoor-nationalpark.gov.uk/,Contains public sector information licensed un...,national_park,,,boundary


In [90]:
protected_areas_over100 = protected_areas_over100[protected_areas_over100.geometry.intersects(uk_boundaries.union_all())]

In [92]:
# fig, ax = plt.subplots(figsize=(20, 20))

# protected_areas_over100.plot(ax=ax, cmap='tab20', alpha=0.6)

# plt.title("Combined Features by Group")
# plt.legend()
# plt.axis('off')
# plt.show()

In [95]:
protected_areas_over100.to_file("natural_assets_data_raw/protected_areas.gpkg", layer='protected_areas', driver="GPKG")

## Loading parks

In [3]:
parks = ox.features_from_place("United Kingdom", tags = {"leisure": "park", "access": "public"})
parks = parks.to_crs(epsg=27700)

parks = parks[parks.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
parks = parks[~parks.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [38]:
parks_over100 = parks[parks.geometry.area > 1_000_000]
parks_over100 = parks_over100[parks_over100["access"] != "private"]
threshold = 0.95  # 90% NaNs
parks_over100 = parks_over100.loc[:, parks_over100.isna().mean() < threshold]
parks_over100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 218 entries, 9 to 33798
Data columns (total 21 columns):
 #   Column                            Non-Null Count  Dtype   
---  ------                            --------------  -----   
 0   geometry                          218 non-null    geometry
 1   leisure                           218 non-null    object  
 2   name                              213 non-null    object  
 3   access                            21 non-null     object  
 4   addr:city                         15 non-null     object  
 5   addr:postcode                     15 non-null     object  
 6   alt_name                          13 non-null     object  
 7   website                           43 non-null     object  
 8   tourism                           11 non-null     object  
 9   wikidata                          98 non-null     object  
 10  wikipedia                         71 non-null     object  
 11  operator                          67 non-null     obj

In [39]:
parks_over100.head(1) #see useful comments

Unnamed: 0,geometry,leisure,name,access,addr:city,addr:postcode,alt_name,website,tourism,wikidata,wikipedia,operator,operator:type,wheelchair,note,operator:wikidata,source,listed_status,type,boundary,communication:amateur_radio:pota
9,"POLYGON ((455659.735 205492.151, 455662.648 20...",park,Shotover Country Park,,,,,,,Q24677877,,,,,,,,,multipolygon,,


In [41]:
parks_over100.to_file("natural_assets_data_raw/parks_over100ha.gpkg", layer='parks', driver="GPKG")

## Loading nature reserves

In [42]:
nature_reserves = ox.features_from_place("United Kingdom", tags = {"leisure": "nature_reserve"})
nature_reserves = nature_reserves.to_crs(epsg=27700)

nature_reserves = nature_reserves[nature_reserves.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
nature_reserves = nature_reserves[~nature_reserves.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [57]:
nature_reserves_100 = nature_reserves[parks.geometry.area > 1_000_000]
nature_reserves_100 = nature_reserves_100[~nature_reserves_100["access"].isin(["private", "no"])]
threshold = 0.95  # 90% NaNs
nature_reserves_100 = nature_reserves_100.loc[:, nature_reserves_100.isna().mean() < threshold]
nature_reserves_100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 76 entries, 9 to 3397
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   geometry          76 non-null     geometry
 1   leisure           76 non-null     object  
 2   name              69 non-null     object  
 3   operator          26 non-null     object  
 4   operator:type     4 non-null      object  
 5   source            13 non-null     object  
 6   website           19 non-null     object  
 7   access            4 non-null      object  
 8   wikidata          17 non-null     object  
 9   wikipedia         13 non-null     object  
 10  fixme             4 non-null      object  
 11  designation       7 non-null      object  
 12  natural           14 non-null     object  
 13  boundary          20 non-null     object  
 14  protection_title  4 non-null      object  
 15  name:gd           4 non-null      object  
 16  protect_class     6 non

  result = super().__getitem__(key)


In [58]:
nature_reserves_100["access"].value_counts()

access
permissive    3
permit        1
Name: count, dtype: int64

nature_reserves_100 = nature_reserves_100[nature_reserves_100.geometry.intersects(uk_boundaries.union_all())]

In [61]:
nature_reserves_100.to_file("natural_assets_data_raw/nature_reserves_over100ha.gpkg", layer='nature_reserves', driver="GPKG")

In [94]:
# fig, ax = plt.subplots(figsize=(20, 20))

# nature_reserves_100.plot(ax=ax, cmap='tab20', alpha=0.6)

# plt.title("Combined Features by Group")
# plt.legend()
# plt.axis('off')
# plt.show()

## Loading gardens

In [96]:
gardens = ox.features_from_place("United Kingdom", tags = {"leisure": "garden"})
gardens = gardens.to_crs(epsg=27700)
gardens = gardens[gardens.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
gardens = gardens[~gardens.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [99]:
gardens.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 284045 entries, 0 to 284044
Columns: 384 entries, geometry to power
dtypes: geometry(1), object(383)
memory usage: 832.2+ MB


In [101]:
gardens_100 = gardens[gardens.geometry.area > 100_000]
gardens_100 = gardens_100[~gardens_100["access"].isin(["private", "no"])]
threshold = 0.95  # 90% NaNs
gardens_100 = gardens_100.loc[:, gardens_100.isna().mean() < threshold]
# gardens_100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 119 entries, 1 to 264325
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   geometry       119 non-null    geometry
 1   leisure        119 non-null    object  
 2   name           78 non-null     object  
 3   wikidata       43 non-null     object  
 4   wikipedia      26 non-null     object  
 5   opening_hours  9 non-null      object  
 6   garden:type    31 non-null     object  
 7   website        30 non-null     object  
 8   tourism        24 non-null     object  
 9   access         21 non-null     object  
 10  source         17 non-null     object  
 11  email          10 non-null     object  
 12  fee            18 non-null     object  
 13  operator       23 non-null     object  
 14  operator:type  8 non-null      object  
 15  addr:city      13 non-null     object  
 16  addr:postcode  14 non-null     object  
 17  phone          13 non-null   

In [103]:
# fig, ax = plt.subplots(figsize=(20, 20))

# gardens_100.plot(ax=ax, cmap='tab20', alpha=0.6)

# plt.title("Combined Features by Group")
# plt.legend()
# plt.axis('off')
# plt.show()

In [105]:
gardens_100.to_file("natural_assets_data_raw/gardens_over10ha.gpkg", layer='gardens', driver="GPKG")

## Loading beaches

see also: https://www.data.gov.uk/dataset/748b475b-e534-4298-90bf-cca7b244a374/beaches-ccgbc/datafile/ce627212-5fa4-41a7-abf5-747579ca0d15/preview
https://data-housinggovie.opendata.arcgis.com/datasets/housinggovie::blue-flag-beaches/explore?location=52.337208%2C-5.298342%2C6.58
https://www.keepbritaintidy.org/2024-blue-flag-and-seaside-award-winners

In [106]:
beaches = ox.features_from_place("United Kingdom", tags = {"natural": "beach"})
beaches = beaches.to_crs(epsg=27700)
beaches = beaches[beaches.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
beaches = beaches[~beaches.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [111]:
beaches_100 = beaches[beaches.geometry.area > 100_000]
beaches_100 = beaches_100[~beaches_100["access"].isin(["private", "no"])]
threshold = 0.98  # 90% NaNs
beaches_100 = beaches_100.loc[:, beaches_100.isna().mean() < threshold]
# gardens_100.info()

In [120]:
beaches_100.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 679 entries, 0 to 6256
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   geometry    679 non-null    geometry
 1   name        315 non-null    object  
 2   natural     679 non-null    object  
 3   surface     450 non-null    object  
 4   lifeguard   30 non-null     object  
 5   supervised  28 non-null     object  
 6   name:en     20 non-null     object  
 7   source      153 non-null    object  
 8   name:cy     21 non-null     object  
 9   wikidata    50 non-null     object  
 10  access      47 non-null     object  
 11  wikipedia   21 non-null     object  
 12  tidal       111 non-null    object  
 13  type        135 non-null    object  
dtypes: geometry(1), object(13)
memory usage: 79.6+ KB


In [129]:
beaches_all = ox.features_from_place("United Kingdom", tags = {"natural": "beach"})
beaches_all = beaches_all.to_crs(epsg=27700)
# beaches = beaches[beaches.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
beaches_all = beaches_all[~beaches_all.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [122]:
beach_awards_2024 = {
    "Blue Flag Award": [
        "Porthcawl Marina", "Rest Bay", "Trecco Bay", "Cefn Sidan", "Aberystwyth South", "Borth",
        "Llangrannog", "Tresaith", "Prestatyn Central", "Broadhaven Central", "Coppet Hall", "Dale",
        "Newgale", "Poppit Sands", "Saundersfoot", "Tenby Castle Beach", "Tenby South", "Whitesands",
        "Tenby North", "Caswell Bay", "Langland Bay", "Port Eynon Bay", "Swansea Marina", "Penarth Marina",
        "Central Beach, Mablethorpe", "Central Beach, Skegness", "Central Beach, Sutton on Sea",
        "Three Shells Beach", "Westcliff Bay", "East Beach, Shoeburyness", "Shoebury Common", "Cromer",
        "Sheringham", "West Runton", "East Runton", "Frinton on Sea", "Brightlingsea", "Dovercourt Bay",
        "Felixstowe", "Southwold", "Whitley Bay South", "Tynemouth Longsands South", "King Edwards Bay",
        "Seaburn Beach", "Roker Beach", "Whitby", "Hornsea", "Withernsea", "Hayling Island Beachlands Central",
        "Saltdean Beach", "Hove Lawns", "Tankerton", "Sheerness", "St Mildred's Bay", "Minnis Bay",
        "Joss Bay", "West Wittering Beach", "Botany Bay", "Blackpool Sands", "Shore Road (Poole)",
        "Sandbanks Peninsula", "Avon Beach", "Highcliffe Beach", "Friars Cliff Beach", "Canford Cliffs",
        "Branksome Chine", "Alum Chine", "Manor Steps", "Durley Chine", "Fisherman's Walk", "Southbourne",
        "Carbis Bay", "Crooklets", "Gyllyngvase", "Porthmeor", "Polzeath", "Porthtowan", "Trevone Bay",
        "Widemouth Bay", "Summerleaze", "Beer", "Exmouth", "Seaton", "Sidmouth Town", "Swanage Central",
        "Dawlish Warren", "Breakwater Beach", "Broadsands", "Meadfoot Beach", "Preston Sands", "Oddicombe",
        "Torre Abbey Sands", "Westward Ho!", "Weymouth Central", "Croyde Bay",
        "Royal Albert Dock and Salthouse Dock Marina", "Rutland Water"
    ],
    "Green Coast Award": [
        "Abereiddy", "Bracelet Bay", "Caerfai Bay", "Cilborth", "Druidston Haven", "Freshwater East",
        "Llanrhystud", "Manorbier", "Mwnt", "Penally", "Penbryn", "Silver Bay", "West Angle Bay",
        "Arklow South", "Ballyhiernan Bay", "Ballymoney", "Cahore South", "Donabate", "Dooey Beach",
        "Falcarragh Beach", "Garnish Beach", "Portmarnock", "Red Strand", "Renvyle", "Seapoint",
        "Silver Strand", "Thallabawn", "The Burrow", "Tyrella Beach (Clough)", "Abereiddy Bay",
        "Manorbier Bay"
    ],
    "Seaside Award": [
        "Aberavon", "Aberystwyth North", "Clarach", "Cold Knap", "Jackson's Bay", "New Quay Harbour",
        "Rhyl Central", "Traeth y Dolau", "Whitmore Bay", "Llantwit Major", "Penarth", "Aberporth",
        "New Quay Traeth Gwyn", "Aberdeen Ballroom Beach", "Balmedie Beach", "Collieston", "Cruden Bay",
        "Fraserburgh Tigerhill", "Fraserburgh Waters of Philorth", "Inverboyndie Beach", "Peterhead Lido",
        "Stonehaven Beach", "Montrose Seafront", "Carnoustie", "Arbroath", "Lunan Bay", "Monifieth",
        "East Haven", "Broughty Ferry", "Belhaven Bay", "Dunbar East", "Longniddry Bents, Gosford",
        "Yellowcraig", "North Berwick Milsey Bay", "North Berwick West Beach", "Longniddry Bents",
        "Gullane Bents", "Aberdour Silver Sands", "Anstruther Billowness", "Burntisland Beach",
        "Crail Roome Bay", "Elie Harbour", "Elie Ruby Bay", "Kingsbarns Beach", "Kirkcaldy Seafield",
        "Leven East", "St Andrews East Sands", "St Andrews West Sands", "Aberdour Black Sands",
        "Kirkcaldy Pathhead Sands", "Kinghorn and Pettycur Bay", "Brora Beach", "Dornoch Beach",
        "Sango Sands", "Nairn Central", "Loch Morlich", "Irvine", "Coldingham", "West Sandwick",
        "Sands of Breckon", "Ayr South", "Troon", "Maidens", "Prestwick", "Girvan", "Barassie Shore"
    ]
}

In [130]:
beaches_all.head()

Unnamed: 0,geometry,name,natural,surface,description,lifeguard,supervised,alt_name,alt_name:en,name:en,name:ga,name:gd,created_by,wheelchair,website,fixme,source,name:cy,wikidata,access,ele,fee,image,nudism,wikipedia,geological,url,tourism,note,operator,loc_name,tidal,dog,highway,place,area,alt_name:gd,designation,name:de,source:name,wetland,alt_name_1,layer,name:kw,water,scuba_diving:entry,sport,was:natural,ref,dog:conditional,opening_hours,operator:wikidata,addr:city,addr:housename,addr:postcode,addr:street,surfboard:rental,foot,access:dog,source:opening_hours,fishing,wikimedia_commons,not:name,alt_name:cy,vehicle,type,horse:conditional,jetski,seamark:type,complete,motor_vehicle,swimming,source:nudist,landuse,is_in,start_date,survey:date,leisure,beach,lifeguard:operator,opening_hours:lifeguard,comment,email,man_made,horse,barrier,ref:GB:uprn,material,source:geometry,CLASSIFICA,seasonal,amenity,ref:EU:bwid,addr:country,owner,waterway
0,POINT (241817.844 335797.788),Morfa Abererch,beach,,,,,,,,,,POI Editor,,,,npe,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,POINT (255809.17 333630.357),Morfa Harlech,beach,,,,,,,,,,POI Editor,,,,npe,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,POINT (587336.336 345862.378),Holkham Naturist Beach,beach,,,,,,,,,,,,,,,,,,,,,yes,,,,attraction,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,POINT (204129.732 84148.465),Tregardock,beach,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,POINT (184624.989 891000.224),Salmon Beach,beach,,,,,,,,,,JOSM,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [131]:
# Add new columns for each award category, default to False
beaches_all["blue_flag"] = beaches_all["name"].isin(beach_awards_2024["Blue Flag Award"])
beaches_all["green_coast"] = beaches_all["name"].isin(beach_awards_2024["Green Coast Award"])
beaches_all["seaside_award"] = beaches_all["name"].isin(beach_awards_2024["Seaside Award"])

In [132]:
beaches_all["blue_flag"].value_counts()

blue_flag
False    6510
True       20
Name: count, dtype: int64

In [133]:
beaches_all["green_coast"].value_counts()

green_coast
False    6528
True        2
Name: count, dtype: int64

In [134]:
beaches_all["seaside_award"].value_counts()

seaside_award
False    6524
True        6
Name: count, dtype: int64

In [None]:
gardens_100.to_file("natural_assets_data_raw/gardens_over10ha.gpkg", layer='gardens', driver="GPKG")

In [116]:
# fig, ax = plt.subplots(figsize=(20, 20))

# beaches_100.plot(ax=ax, facecolor="black", alpha=0.6)

# plt.title("Combined Features by Group")
# plt.legend()
# plt.axis('off')
# plt.show()

In [3]:
natural_values = [
    "wood"
    # , "tree", "tree_row", "scrub", "heath", "grassland", "fell", "bare_rock",
    # "scree", "shingle", "sand", "mud", "beach", "dune", "cliff", "rock", "stone",
    # "water", "wetland", "glacier", "reef", "cave_entrance"
]

features_2 = {}

for val in natural_values:
    tag_2 = {"natural": val}
    gdf = fetch_osm_features("United Kingdom", tag_2, element_types=("way", "relation"))
    if gdf is not None and not gdf.empty:
        features_2[val] = gdf
        print(f"{val}: {len(gdf)} features")
    else:
        print(f"{val}: No features")

NameError: name 'requests' is not defined

## Loading forests

In [None]:
woods = ox.features_from_place("United Kingdom", tags = {"natural": "wood"})
woods = woods.to_crs(epsg=27700)
woods = woods[beaches.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])]
woods = woods[~woods.geometry.duplicated()].reset_index(drop=True)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [None]:
tags = {
  "type": "railwayStation"
}

stations = fetch_osm_features("England", tags, element_types=("nodes"))

print(stations.head())
print(f"Total features: {len(stations)}")

# Save or plot
# gdf.to_file("filtered_features.geojson", driver="GeoJSON")
stations.plot()

In [None]:
https://hub.arcgis.com/datasets/RSPB::ibas-uk/about