## Geo Boundaries Loader for Regions and ICBs

This script loads geospatial boundary data for NHS Regions and Integrated Care Boards (ICBs) into the `geo_boundaries` table in the PostGIS-enabled PostgreSQL database.

### How It Works:
- **User Input**: Specify the paths to both the GeoJSON files and the corresponding CSV mapping files for regions and ICBs.
- **Preprocessing**:
  - Reads and merges each GeoJSON with its respective NHS-to-ONS code mapping.
  - Normalizes and renames columns to match the table schema (`ons_code`, `nhs_code`, `name`, `geo_level`, `geometry`).
- **Combining**: Concatenates region and ICB GeoDataFrames into one unified structure.
- **Database Load**: Writes all geometries into the `geo_boundaries` table using `to_postgis()`, ensuring geometry type and metadata are preserved.

This script ensures geographic consistency and enables accurate visual mapping and spatial analysis across NHS organisational boundaries.


In [None]:
import geopandas as gpd
import geopandas as gpd
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError

In [None]:
# === DB CONNECTION ===
engine = create_engine("postgresql://postgres:<password>@localhost:5432/nhs_dashboard")

# === FILE PATHS ===
REGION_GEOJSON = "../data/geoboundaries/NHS_England_Regions.geojson"
REGION_MAPPING_CSV = "../data/geoboundaries/NHS_England_(Region)_(2024)_Names_and_Codes_in_England.csv"
ICB_GEOJSON = "../data/geoboundaries/Integrated_Care_Boards.geojson"
ICB_MAPPING_CSV = "../data/geoboundaries/Integrated_Care_Boards_(April_2023)_Names_and_Codes_in_England.csv"

try:
    # === REGIONS ===
    gdf_region = gpd.read_file(REGION_GEOJSON)
    df_region_map = pd.read_csv(REGION_MAPPING_CSV)

    df_region_map = df_region_map.rename(columns={
        'NHSER24CD': 'ons_code',
        'NHSER24CDH': 'nhs_code',
        'NHSER24NM': 'name'
    })

    gdf_region = gdf_region.rename(columns={
        'NHSER24CD': 'ons_code',
        'NHSER24NM': 'name'
    })

    gdf_region = gdf_region.merge(df_region_map[['ons_code', 'nhs_code']], on='ons_code', how='left')
    gdf_region['geo_level'] = 'Region'
    gdf_region = gdf_region[['ons_code', 'nhs_code', 'name', 'geo_level', 'geometry']]

    # === ICBs ===
    gdf_icb = gpd.read_file(ICB_GEOJSON)
    df_icb_map = pd.read_csv(ICB_MAPPING_CSV)

    df_icb_map = df_icb_map.rename(columns={
        'ICB23CD': 'ons_code',
        'ICB23CDH': 'nhs_code',
        'ICB23NM': 'name'
    })

    gdf_icb = gdf_icb.rename(columns={
        'ICB23CD': 'ons_code',
        'ICB23NM': 'name'
    })

    gdf_icb = gdf_icb.merge(df_icb_map[['ons_code', 'nhs_code']], on='ons_code', how='left')
    gdf_icb['geo_level'] = 'ICB'
    gdf_icb = gdf_icb[['ons_code', 'nhs_code', 'name', 'geo_level', 'geometry']]

    # === COMBINE AND INSERT ===
    final_gdf = pd.concat([gdf_region, gdf_icb], ignore_index=True)
    final_gdf.to_postgis("geo_boundaries", con=engine, if_exists="append", index=False)

    print(f"Loaded {len(final_gdf)} rows into geo_boundaries")

except (SQLAlchemyError, Exception) as e:
    print("Error occurred during geo boundaries processing:", str(e))
