In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from pytorch_lightning import seed_everything
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import h3
from shapely.ops import transform
from functools import partial
import pyproj

from srai.embedders import Highway2VecEmbedder, Hex2VecEmbedder, GTFS2VecEmbedder, GeoVexEmbedder
from srai.joiners import IntersectionJoiner
from srai.loaders import OSMNetworkType, OSMWayLoader, OSMOnlineLoader, OSMPbfLoader, GTFSLoader
from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS, HEX2VEC_FILTER
from srai.neighbourhoods import H3Neighbourhood
from srai.plotting import plot_regions, plot_numeric_data
from srai.regionalizers import H3Regionalizer, geocode_to_region_gdf
from srai.h3 import ring_buffer_h3_regions_gdf

from pathlib import Path
from tqdm import tqdm
import torch
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
def load_and_prepare_data():
    """
    Loads and prepares geographical data and Leefbaarometer data.
    Returns GeoDataFrames ready for processing.
    """
    # Load geographical data for South Holland and transform to H3 hexagons (resolution 9)
    area_southholland_gdf = geocode_to_region_gdf("South Holland, Netherlands")
    regions_southholland_gdf = H3Regionalizer(10).transform(area_southholland_gdf)

    # Load Leefbaarometer shapefile and values
    leefbaarometer_shape_gdf = gpd.read_file("D:\\tu delft\\Afstuderen\\geometrie-leefbaarometer-3-0\\grid 2020.shp")
    leefbaarometer_values_df = pd.read_csv("D:\\tu delft\\Afstuderen\\open-data-leefbaarometer-meting-2022_2023-11-21_1035\\Leefbaarometer-scores grids 2002-2022.csv")
    leefbaarometer_values_df = leefbaarometer_values_df[leefbaarometer_values_df['jaar'] == 2022]

    # Ensure both GeoDataFrames are in the same projection (EPSG:4326)
    regions_southholland_gdf = regions_southholland_gdf.to_crs(epsg=4326)
    leefbaarometer_shape_gdf = leefbaarometer_shape_gdf.to_crs(epsg=4326)

    return regions_southholland_gdf, leefbaarometer_shape_gdf, leefbaarometer_values_df

def calculate_intersection_areas(regions_gdf, leefbaarometer_shape_gdf, leefbaarometer_values_df):
    """
    Calculates intersection areas between regions and Leefbaarometer shapes in square meters,
    merges Leefbaarometer values, and computes weighted averages for each region.
    Returns a DataFrame with `region_id` as the index and simplified column names for scores.
    """
    # Project GeoDataFrames to EPSG:28992 for accurate area calculation in square meters
    regions_gdf_projected = regions_gdf.to_crs(epsg=28992)
    leefbaarometer_shape_gdf_projected = leefbaarometer_shape_gdf.to_crs(epsg=28992)

    # Perform spatial intersection on projected GeoDataFrames
    intersections = gpd.overlay(regions_gdf_projected.reset_index(), leefbaarometer_shape_gdf_projected, how='intersection')

    # Calculate intersection area in square meters
    intersections['IntersectionArea'] = intersections.geometry.area

    # Merge with Leefbaarometer values
    intersections = intersections.merge(leefbaarometer_values_df, left_on='grid_id', right_on='grid_id', how='left')

    score_columns = ['lbm', 'afw', 'fys', 'onv', 'soc', 'vrz', 'won']

    # Calculate weighted scores based on intersection area
    for column in score_columns:
        intersections[f'Weighted{column.upper()}'] = intersections[column] * intersections['IntersectionArea']

    # Aggregate weighted scores and total intersection area by region
    aggregation = {f'Weighted{column.upper()}': 'sum' for column in score_columns}
    aggregation['IntersectionArea'] = 'sum'
    grouped = intersections.groupby('region_id').agg(aggregation)

    # Calculate the weighted average for each score column and simplify column names
    for column in score_columns:
        grouped[column] = grouped[f'Weighted{column.upper()}'] / grouped['IntersectionArea']
        del grouped[f'Weighted{column.upper()}']  # Remove intermediary weighted score columns

    # Fill NaN values with 0
    grouped.fillna(0, inplace=True)

    return grouped

def merge_data(regions_gdf, weighted_averages):
    """
    Merges the DataFrame with weighted averages into the regions GeoDataFrame.
    Assumes 'region_id' is the identifier for regions in 'regions_gdf' and is present in 'weighted_averages' DataFrame.
    """
    # Ensure regions_gdf has 'region_id' as a column for merging, if it's originally an index
    if regions_gdf.index.name == 'region_id':
        regions_gdf.reset_index(inplace=True)

    # Merge weighted averages into the regions GeoDataFrame
    leefbaarometer_gdf = regions_gdf.merge(weighted_averages, on='region_id', how='left')

    # Set 'region_id' as the index for the final GeoDataFrame
    leefbaarometer_gdf.set_index('region_id', inplace=True)
    leefbaarometer_gdf.fillna(0, inplace=True)  # Fill NaN values with 0
    return leefbaarometer_gdf

def select_regions_based_on_intersection_area(leefbaarometer_gdf, minimum_intersection_area):
    """
    Selects regions with at least the specified minimum intersection area.
    Returns a GeoDataFrame of selected regions.
    """
    return leefbaarometer_gdf[leefbaarometer_gdf['IntersectionArea'] >= minimum_intersection_area]

In [None]:
# Main execution flow
regions_gdf, leefbaarometer_shape_gdf, leefbaarometer_values_df = load_and_prepare_data()
weighted_averages = calculate_intersection_areas(regions_gdf, leefbaarometer_shape_gdf, leefbaarometer_values_df)
leefbaarometer_gdf = merge_data(regions_gdf, weighted_averages)

In [None]:
# Plot the distribution of LBM scores per hexagon
plt.figure(figsize=(10, 6))
plt.hist(leefbaarometer_gdf['IntersectionArea'], bins=30, alpha=0.75, edgecolor='black')
plt.title('Histogram of intersection between leefbaarometer squares and H3 hexagons')
plt.xlabel('Intersecting area square meters')
plt.ylabel('Number of Hexagons')
plt.grid(True)
plt.show()

In [None]:
# Select regions with at least 10000 square meters of intersection area
selected_regions_gdf = select_regions_based_on_intersection_area(leefbaarometer_gdf, 10000)
print(selected_regions_gdf.shape)

In [None]:
# Buffer selected regions and merge with weighted averages
selected_regions_buffered_gdf_nomerge = ring_buffer_h3_regions_gdf(selected_regions_gdf, 15)
selected_regions_buffered_gdf = merge_data(selected_regions_buffered_gdf_nomerge, weighted_averages)

In [None]:
plot_numeric_data(selected_regions_buffered_gdf, 'afw', colormap='coolwarm')

In [None]:
plot_numeric_data(selected_regions_gdf, 'afw', colormap='coolwarm')

In [None]:
# Export selected_hexagons_buffered to a geojson file
selected_regions_buffered_gdf.to_file("selected_regions_buffered_10.geojson", driver='GeoJSON')
selected_regions_gdf.to_file("selected_regions_10.geojson", driver='GeoJSON')
# # selected_hexagons.to_file(r"D:\tu delft\Afstuderen\flat contrastive loss\selected_hexagons.geojson", driver='GeoJSON')
# #selected_hexagons_buffered.to_file(r"D:\tu delft\Afstuderen\flat contrastive loss\selected_hexagons_buffered.geojson", driver='GeoJSON')