In [None]:
"""
London Borough Boundaries Processing Script

This script processes the London borough boundary shapefiles which can be found on the London gov site, extracts central London borough boundaries,
and creates visualisations to show which boundaries have been processed and calculates area statistics. It prepares the data for
further green space analysis using satellite imagery.

Data source:
London borough boundaries: https://data.london.gov.uk/dataset/statistical-gis-boundary-files-london

"""

import os
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# Define London Borough shapefile input file 
shapefile_path = "../Data/Raw/London_Borough/statistical-gis-boundaries-london/ESRI/London_Borough_Excluding_MHW.shp"

# Define London boroughs to be analysed
central_boroughs = [
    'City of London',
    'Westminster',
    'Camden',
    'Islington',
    'Southwark',
    'Kensington and Chelsea',
    'Lambeth',
    'Tower Hamlets',
    'Hackney',
    'Wandsworth'
]

# Define output paths
output_folder = "../Data/Processed/London_Borough"
results_folder = "../Results/London_Borough"

# Create output directories to save down output and results
os.makedirs(output_folder, exist_ok=True)
os.makedirs(results_folder, exist_ok=True)

# Read the shapefile
boroughs = gpd.read_file(shapefile_path)

# Filter for central boroughs
borough_col = 'NAME'  # Common column name for borough names
central_boroughs_df = boroughs[boroughs[borough_col].isin(central_boroughs)].copy()

# Check if we found all the expected boroughs
found_boroughs = central_boroughs_df[borough_col].tolist()
print(f"Found {len(central_boroughs_df)} central London boroughs")
print(f"Boroughs included: {found_boroughs}")

# Check for missing boroughs
missing_boroughs = set(central_boroughs) - set(found_boroughs)
if missing_boroughs:
    print(f"WARNING: Could not find these boroughs: {missing_boroughs}")

# Check for invalid geometries and fix them
invalid_geoms = []
for idx, row in central_boroughs_df.iterrows():
    if not row.geometry.is_valid:
        invalid_geoms.append(row[borough_col])
        # Fix by applying buffer(0)
        central_boroughs_df.loc[idx, 'geometry'] = row.geometry.buffer(0)

if invalid_geoms:
    print(f"Fixed invalid geometries for: {invalid_geoms}")

# Save individual borough boundaries (without dissolving)

# Save in original CRS
individual_boroughs_path = os.path.join(output_folder, "central_london_boroughs.geojson")
central_boroughs_df.to_file(individual_boroughs_path, driver="GeoJSON")
print(f"Saved individual borough boundaries to {individual_boroughs_path}")

# Save in UTM Zone 30N for direct use with Sentinel data
central_boroughs_utm = central_boroughs_df.to_crs("EPSG:32630")
individual_boroughs_utm_path = os.path.join(output_folder, "central_london_boroughs_utm.geojson")
central_boroughs_utm.to_file(individual_boroughs_utm_path, driver="GeoJSON")
print(f"Saved UTM version of individual borough boundaries to {individual_boroughs_utm_path}")

# Create and save dissolved boundary 

# Dissolve to create a single geometry for all central London
dissolved_boundary = central_boroughs_df.dissolve()

# 2. Save in original CRS
dissolved_path = os.path.join(output_folder, "central_london_dissolved.geojson")
dissolved_boundary.to_file(dissolved_path, driver="GeoJSON")
print(f"Saved dissolved boundary to {dissolved_path}")

# 3. Save in UTM Zone 30N
dissolved_boundary_utm = dissolved_boundary.to_crs("EPSG:32630")
dissolved_utm_path = os.path.join(output_folder, "central_london_utm.geojson")
dissolved_boundary_utm.to_file(dissolved_utm_path, driver="GeoJSON")
print(f"Saved UTM version of dissolved boundary to {dissolved_utm_path}")

# Verify that all geometries are valid
print(f"All individual borough geometries valid: {central_boroughs_df.geometry.is_valid.all()}")
print(f"Dissolved boundary geometry valid: {dissolved_boundary.geometry.is_valid.all()}")

# Create visualisation of individual boroughs
fig, ax = plt.subplots(figsize=(12, 10))
boroughs.boundary.plot(ax=ax, linewidth=0.5, color='gray')
central_boroughs_df.plot(ax=ax, column=borough_col, alpha=0.7, edgecolor='black', legend=True)

# Format axis and chart
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.tick_params(axis='both', which='major', labelsize=10)
plt.title('Central London Boroughs', fontsize=14, pad=10)
plt.xlabel('Easting (m)', fontsize=12, labelpad=15)
plt.ylabel('Northing (m)', fontsize=12, labelpad=10)

plt.savefig(os.path.join(results_folder, "central_london_boroughs.png"), dpi=300, bbox_inches='tight')
print(f"Saved borough visualisation to {results_folder}/central_london_boroughs.png")

# Create visualisation of dissolved boundary
fig, ax = plt.subplots(figsize=(12, 10))
boroughs.boundary.plot(ax=ax, linewidth=0.5, color='gray')
dissolved_boundary.plot(ax=ax, color='red', edgecolor='black')
plt.title('Dissolved Central London Boundary', fontsize=14, pad=10)
plt.xlabel('Easting (m)', fontsize=12, labelpad=15)
plt.ylabel('Northing (m)', fontsize=12, labelpad=10)
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.tick_params(axis='both', which='major', labelsize=10)
plt.savefig(os.path.join(results_folder, "dissolved_boundary.png"), dpi=300, bbox_inches='tight')
print(f"Saved dissolved boundary visualisation to {results_folder}/dissolved_boundary.png")

# Calculate area for each borough and save in table

# Convert to EPSG:27700 (British National Grid)
borough_areas = central_boroughs_df.to_crs('EPSG:27700').copy()
borough_areas['area_sqkm'] = borough_areas.area / 1_000_000  # convert from sq m to sq km

# Create a DataFrame for the table
area_table = borough_areas[[borough_col, 'area_sqkm']].copy()
area_table.columns = ['Borough', 'Area (sq km)']
area_table = area_table.sort_values('Area (sq km)', ascending=False)
area_table['Area (sq km)'] = area_table['Area (sq km)'].round(2)

# Add a row for the total area
total_area = area_table['Area (sq km)'].sum()
total_row = pd.DataFrame({'Borough': ['TOTAL'], 'Area (sq km)': [round(total_area, 2)]})
area_table = pd.concat([area_table, total_row], ignore_index=True)

# Display the table
print("\nAreas of Central London Boroughs:")
print(area_table.to_string(index=False))

# Save the table to a CSV file
table_path = os.path.join(results_folder, "central_london_areas.csv")
area_table.to_csv(table_path, index=False)
print(f"Saved borough areas table to: {table_path}")

print("\nProcessing complete")
print("Files generated for use in NDVI analysis:")
print(f"1. Individual borough boundaries (UTM): {individual_boroughs_utm_path}")
print(f"2. Dissolved London boundary (UTM): {dissolved_utm_path}")
