In [None]:
import geopandas as gpd
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from tqdm import tqdm
from networkx import Graph
from shapely.geometry import Point
from collections import deque
from sklearn.decomposition import PCA
from srai.neighbourhoods import H3Neighbourhood
import random
import concurrent.futures
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import osmnx as ox
import networkx as nx
from scipy.spatial import KDTree

# Learning strategy 2: Step 3 - Training Region Embeddings with Circle Loss and distance-based adjacency graph

This script performs the third step of learning strategy 2, focusing on training region embeddings using a neural network (nn.Embedding) and Circle Loss. The process incorporates the following key steps:

1. **Data Loading and Preprocessing**:
    - Loads geospatial data for specified regions using GeoPandas.
    - Utilizes H3 hexagonal indices for spatial context with the SRAI library.

2. **Adjacency Graph Creation**:
    - Creates an adjacency graph considering neighbors up to a specified distance.
    - Employs parallel processing using a ThreadPoolExecutor for efficiency.
    - Supports Euclidean distance and location-based accessibility for adjacency calculations.

3. **Training Neural Network**:
    - Initializes region embeddings using pre-trained embeddings from aerial images and POIs.
    - Utilizes nn.Embedding for the neural network.
    - Employs Circle Loss for training, focusing on minimizing the distances between anchor-positive pairs while maximizing the distances between anchor-negative pairs.
    - Samples triplets using a weighted graph-based random walk strategy.

4. **Embedding and Graph Export**:
    - Saves the final trained embeddings and the adjacency graph as Parquet files in the specified checkpoint directory.

This approach ensures scalable and efficient processing for urban region embedding tasks, leveraging both spatial context and parallel computation for enhanced performance.


In [None]:
# Constants
RESOLUTION = 10
EMBEDDING_DIM = 200
NUM_EPOCHS = 100
BATCH_SIZE = 256
LEARNING_RATE = 5e-5
ADJACENCY_DISTANCE = 5
NUM_WORKERS = 16
DISTANCE_MEASURE = "Location_based_accessibility"  # or "Euclidean"
WALK_LENGTH = 10
NUM_ANTS = 1
CHECKPOINT_DIR = f'checkpoints_urban2vec_res{RESOLUTION}_dim{EMBEDDING_DIM}'
PRETRAINED_EMBEDDING_FILE = os.path.join(CHECKPOINT_DIR, "step2_region_embeddings.csv")
CUTOFF_TIME = 60 * 60  # 60 minutes in seconds

In [None]:
# CircleLoss definition
class CircleLoss(nn.Module):
    def __init__(self, m=0.25, gamma=256):
        super().__init__()
        self.m, self.gamma = m, gamma
        self.soft_plus = nn.Softplus()

    def forward(self, anchor, positive, negative):
        sp = torch.sum(anchor * positive, dim=1)
        sn = torch.sum(anchor * negative, dim=1)

        ap = torch.clamp_min(-sp.detach() + 1 + self.m, min=0.)
        an = torch.clamp_min(sn.detach() + self.m, min=0.)

        delta_p, delta_n = 1 - self.m, self.m
        logit_p = -ap * (sp - delta_p) * self.gamma
        logit_n = an * (sn - delta_n) * self.gamma

        return self.soft_plus(logit_n + logit_p).mean()

In [None]:
# Dataset definition
class Urban2VecDataset(Dataset):
    def __init__(self, regions_gdf, adjacency_graph, num_ants=NUM_ANTS, walk_length=WALK_LENGTH):
        self.regions = list(regions_gdf.index)
        self.region_to_idx = {r: i for i, r in enumerate(self.regions)}
        self.adjacency_graph = adjacency_graph
        self.num_ants = num_ants
        self.walk_length = walk_length

    def __len__(self):
        return len(self.regions)

    def __getitem__(self, idx):
        anchor_region = self.regions[idx]

        positive_regions = set()
        for _ in range(self.num_ants):
            current_region = anchor_region
            for _ in range(self.walk_length):
                if current_region not in self.adjacency_graph:
                    break
                neighbors = list(self.adjacency_graph.neighbors(current_region))
                if not neighbors:
                    break
                weights = np.array([self.adjacency_graph[current_region][n]['weight'] for n in neighbors])
                weights = 1 / weights
                weights /= weights.sum()
                next_region = np.random.choice(neighbors, p=weights)
                positive_regions.add(next_region)
                current_region = next_region

        if not positive_regions:
            positive_region = anchor_region
        else:
            positive_region = random.choice(list(positive_regions))

        negative_region = random.choice([r for r in self.regions if r not in positive_regions and r != anchor_region])

        anchor_idx = self.region_to_idx[anchor_region]
        positive_idx = self.region_to_idx[positive_region]
        negative_idx = self.region_to_idx[negative_region]

        return anchor_idx, positive_idx, negative_idx

In [None]:
# Custom collate function
def custom_collate(batch):
    anchor_idx, positive_idx, negative_idx = zip(*batch)
    return (
        torch.tensor(anchor_idx, dtype=torch.long),
        torch.tensor(positive_idx, dtype=torch.long),
        torch.tensor(negative_idx, dtype=torch.long)
    )

In [None]:
# Urban2VecModel definition
class Urban2VecModel(nn.Module):
    def __init__(self, num_regions, embedding_dim):
        super(Urban2VecModel, self).__init__()
        self.region_embedding = nn.Embedding(num_regions, embedding_dim)

    def forward(self, region_indices):
        region_embed = self.region_embedding(region_indices)
        return region_embed

In [None]:
# Load data
def load_data():
    print("Loading data...")
    regions_gdf = gpd.read_file(f"selected_regions_{RESOLUTION}.geojson").set_index("region_id")
    return regions_gdf

def load_and_prepare_driving_graph():
    print("Loading and preparing driving graph...")
    G_drive = ox.graph_from_place('south holland, Netherlands', network_type='drive')
    G_drive = ox.project_graph(G_drive)

    for u, v, k, data in G_drive.edges(data=True, keys=True):
        speed_mps = data.get('speed_kph', 50) / 3.6  # Default to 50 km/h if speed not available
        data['travel_time'] = data['length'] / speed_mps

    return G_drive

def create_nodes_kdtree(nodes_gdf):
    nodes_xy = np.array(list(zip(nodes_gdf['geometry'].x, nodes_gdf['geometry'].y)))
    return KDTree(nodes_xy)

def map_centroids_to_network_nodes(regions_gdf, G):
    print("Mapping centroids to network nodes...")
    nodes, _ = ox.graph_to_gdfs(G)
    nodes_tree = KDTree(nodes[['x', 'y']])

    centroid_node_mapping = {}
    unmapped_regions = []

    for idx, row in regions_gdf.iterrows():
        x, y = row.geometry.centroid.x, row.geometry.centroid.y
        distances, indices = nodes_tree.query([[x, y]], k=4)  # Get 4 nearest neighbors

        mapped = False
        for distance, index in zip(distances[0], indices[0]):
            nearest_node = nodes.iloc[index].name
            if nearest_node in G.nodes:
                centroid_node_mapping[idx] = nearest_node
                mapped = True
                break

        if not mapped:
            unmapped_regions.append(idx)
            print(f"Warning: Could not map region {idx} to any nearby node in the graph.")

    print(f"Successfully mapped {len(centroid_node_mapping)} out of {len(regions_gdf)} regions.")
    print(f"Number of unmapped regions: {len(unmapped_regions)}")

    return centroid_node_mapping, unmapped_regions

def calculate_region_density(regions_gdf):
    print("Calculating region densities...")

    # Load building density data
    building_density_gdf = gpd.read_file(r"D:\tu delft\Afstuderen\Phase 4 online triplet loss\Rudifun_PV28_Zuid_Holland\Rudifun_Bruto_Buurt_PV28.shp")
    building_density_gdf = building_density_gdf.to_crs(epsg=4326)  # Ensure it's in the same CRS as regions_gdf

    # Project both GeoDataFrames to EPSG:28992 for accurate area calculation in square meters
    regions_gdf_projected = regions_gdf.to_crs(epsg=28992)
    building_density_gdf_projected = building_density_gdf.to_crs(epsg=28992)

    # Perform spatial intersection on projected GeoDataFrames
    intersections = gpd.overlay(regions_gdf_projected.reset_index(), building_density_gdf_projected, how='intersection')

    # Calculate intersection area in square meters
    intersections['IntersectionArea'] = intersections.geometry.area

    # Calculate weighted building density based on intersection area
    intersections['WeightedDensity'] = intersections['FSI_22'] * intersections['IntersectionArea']

    # Aggregate weighted density and total intersection area by region
    grouped = intersections.groupby('region_id').agg({'WeightedDensity': 'sum', 'IntersectionArea': 'sum'})

    # Calculate the weighted average density
    grouped['average_density'] = grouped['WeightedDensity'] / grouped['IntersectionArea']
    grouped.fillna(0, inplace=True)

    return grouped['average_density']

def calculate_euclidean_distance(origin, destination, regions_gdf):
    return Point(regions_gdf.loc[origin].geometry.centroid).distance(Point(regions_gdf.loc[destination].geometry.centroid))

def calculate_location_based_accessibility(origin, destination, travel_time, density, decay_parameter=0.001):
    return np.exp(-decay_parameter * travel_time) * density[destination]

def calculate_adjacency_for_region(region, regions_gdf, h3_neighbourhood, distance, distance_measure, G_drive=None, centroid_node_mapping=None, density=None):
    edges = []
    neighbors = h3_neighbourhood.get_neighbours_up_to_distance(region, distance)

    if distance_measure == "Euclidean":
        for neighbor in neighbors:
            if neighbor in regions_gdf.index:
                dist = calculate_euclidean_distance(region, neighbor, regions_gdf)
                edges.append((region, neighbor, dist))
    elif distance_measure == "Location_based_accessibility":
        if region not in centroid_node_mapping or centroid_node_mapping[region][0] not in G_drive:
            print(f"Warning: Node for region {region} not found in graph. Skipping.")
            return edges

        origin_node = centroid_node_mapping[region][0]
        try:
            travel_times = nx.single_source_dijkstra_path_length(G_drive, origin_node, weight='travel_time', cutoff=CUTOFF_TIME)
        except nx.NodeNotFound:
            print(f"Warning: Node {origin_node} for region {region} not found in graph. Skipping.")
            return edges

        for neighbor in neighbors:
            if neighbor in regions_gdf.index and neighbor in centroid_node_mapping:
                destination_node = centroid_node_mapping[neighbor][0]
                if destination_node in travel_times:
                    travel_time = travel_times[destination_node]
                    accessibility = calculate_location_based_accessibility(region, neighbor, travel_time, density)
                    edges.append((region, neighbor, accessibility))
                else:
                    print(f"Warning: No path found from {region} to {neighbor}. Skipping.")

    return edges

def calculate_accessibility_edges(regions_gdf, h3_neighbourhood, distance, G_drive, centroid_node_mapping):
    edges = []
    for region in tqdm(regions_gdf.index, desc="Calculating accessibility"):
        origin_node = centroid_node_mapping[region]
        neighbors = h3_neighbourhood.get_neighbours_up_to_distance(region, distance)
        neighbors = [n for n in neighbors if n in regions_gdf.index]

        travel_times = nx.single_source_dijkstra_path_length(G_drive, origin_node, weight='travel_time', cutoff=CUTOFF_TIME)

        for neighbor in neighbors:
            dest_node = centroid_node_mapping[neighbor]
            if dest_node in travel_times:
                travel_time = travel_times[dest_node]
                accessibility = 1 / (1 + travel_time)  # Simple accessibility measure
                edges.append((region, neighbor, accessibility))

    return edges

def calculate_euclidean_edges(regions_gdf, h3_neighbourhood, distance):
    edges = []
    for region in tqdm(regions_gdf.index, desc="Calculating Euclidean distances"):
        neighbors = h3_neighbourhood.get_neighbours_up_to_distance(region, distance)
        for neighbor in neighbors:
            if neighbor in regions_gdf.index:
                dist = regions_gdf.loc[region].geometry.centroid.distance(regions_gdf.loc[neighbor].geometry.centroid)
                edges.append((region, neighbor, dist))
    return edges

def create_adjacency_graph(regions_gdf, distance=ADJACENCY_DISTANCE, distance_measure=DISTANCE_MEASURE, G_drive=None):
    print("Creating adjacency graph...")
    centroid_node_mapping, unmapped_regions = map_centroids_to_network_nodes(regions_gdf, G_drive)

    # Calculate region densities
    densities = calculate_region_density(regions_gdf)

    # Check if all regions have density data
    missing_density = set(regions_gdf.index) - set(densities.index)
    if missing_density:
        print(f"Warning: {len(missing_density)} regions are missing density data")

    adjacency_matrix = pd.DataFrame(index=regions_gdf.index, columns=regions_gdf.index)

    h3_neighbourhood = H3Neighbourhood(regions_gdf=regions_gdf, include_center=False)

    for region in tqdm(regions_gdf.index, desc="Calculating accessibility"):
        if region in unmapped_regions or region not in densities.index:
            continue

        origin_node = centroid_node_mapping[region]
        neighbors = h3_neighbourhood.get_neighbours_up_to_distance(region, distance)
        neighbors = [n for n in neighbors if n in regions_gdf.index and n not in unmapped_regions and n in densities.index]

        try:
            travel_times = nx.single_source_dijkstra_path_length(G_drive, origin_node, weight='travel_time', cutoff=CUTOFF_TIME)
        except nx.NodeNotFound:
            print(f"Error: Node {origin_node} not found in graph for region {region}.")
            continue

        for neighbor in neighbors:
            dest_node = centroid_node_mapping[neighbor]
            if dest_node in travel_times:
                travel_time = travel_times[dest_node]
                accessibility = calculate_location_based_accessibility(region, neighbor, travel_time, densities)
                adjacency_matrix.at[region, neighbor] = accessibility

    return adjacency_matrix

def calculate_location_based_accessibility(origin, destination, travel_time, densities, decay_parameter=0.001):
    try:
        destination_density = densities.loc[destination]
        return np.exp(-decay_parameter * travel_time) * destination_density
    except KeyError:
        print(f"Warning: Density data not found for region {destination}")
        return 0  # or some other default value

In [None]:
# Export adjacency graph
def export_adjacency_graph(graph, filename):
    print("Exporting adjacency graph...")
    adjacency_list = []
    for node1, node2, data in graph.edges(data=True):
        adjacency_list.append({'node1': node1, 'node2': node2, 'weight': data['weight']})
    adjacency_df = pd.DataFrame(adjacency_list)
    adjacency_df.to_parquet(filename, index=False)

In [None]:
# Train Urban2Vec model
def train_urban2vec(model, dataloader, optimizer, criterion, device, num_epochs):
    print("Starting training...")
    model.train()
    loss_values = []
    avg_loss_queue = deque(maxlen=10)

    with tqdm(total=len(dataloader) * num_epochs, desc="Training", unit="batch") as pbar:
        for epoch in range(num_epochs):
            total_loss = 0
            for batch in dataloader:
                anchor_idx, positive_idx, negative_idx = batch
                anchor_idx, positive_idx, negative_idx = anchor_idx.to(device), positive_idx.to(device), negative_idx.to(device)

                optimizer.zero_grad()
                anchor_embed = model(anchor_idx)
                positive_embed = model(positive_idx)
                negative_embed = model(negative_idx)

                loss = criterion(anchor_embed, positive_embed, negative_embed)
                loss.backward()
                optimizer.step()

                total_loss += loss.item()
                loss_values.append(loss.item())

                pbar.update(1)

            avg_epoch_loss = total_loss / len(dataloader)
            avg_loss_queue.append(avg_epoch_loss)

            running_avg_loss = sum(avg_loss_queue) / len(avg_loss_queue)
            pbar.set_postfix(loss=running_avg_loss)

    return model, loss_values

In [None]:
# Apply PCA
def apply_pca(embeddings, n_components):
    pca = PCA(n_components=n_components)
    return pca.fit_transform(embeddings)

In [None]:
# Load pretrained embeddings
def load_pretrained_embeddings(regions, embedding_file, n_components=EMBEDDING_DIM):
    print(f"Loading pretrained embeddings from: {embedding_file}")
    pretrained_embeddings = pd.read_csv(embedding_file, index_col=0)
    pretrained_embeddings = pretrained_embeddings.reindex(regions)
    pretrained_embeddings = pretrained_embeddings.fillna(0)

    if pretrained_embeddings.shape[1] >= n_components:
        reduced_embeddings = apply_pca(pretrained_embeddings.values, n_components=n_components)
    else:
        print("Not enough pre-trained embeddings for PCA. Using as is.")
        reduced_embeddings = pretrained_embeddings.values

    return torch.tensor(reduced_embeddings, dtype=torch.float32)

In [None]:
# # Load data and prepare graph
# regions_gdf = load_data()
# G_drive = load_and_prepare_driving_graph()
# 
# # Print some information about the graph
# print(f"Number of nodes in G_drive: {len(G_drive.nodes)}")
# print(f"Number of edges in G_drive: {len(G_drive.edges)}")
# 
# # Create adjacency matrix
# adjacency_matrix = create_adjacency_graph(regions_gdf, distance=ADJACENCY_DISTANCE, distance_measure=DISTANCE_MEASURE, G_drive=G_drive)
# 
# # Print some information about the adjacency matrix
# print(f"Shape of adjacency matrix: {adjacency_matrix.shape}")
# print(f"Number of non-null values: {adjacency_matrix.notna().sum().sum()}")
# 
# # save as parquet
# adjacency_matrix.to_parquet(os.path.join(CHECKPOINT_DIR, f'adjacency_matrix_res{RESOLUTION}_{DISTANCE_MEASURE.lower()}.parquet'))
# print("Adjacency matrix created and saved.")

In [None]:
# Script execution
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
regions_gdf = load_data()

adjacency_file_path = os.path.join(CHECKPOINT_DIR, f'adjacency_matrix_res{RESOLUTION}_{DISTANCE_MEASURE.lower()}.parquet')

# Try to import adjacency graph, otherwise create it and export it
if os.path.exists(adjacency_file_path):
    print("Loading existing adjacency graph...")
    adjacency_df = pd.read_parquet(adjacency_file_path)

    # Convert the adjacency matrix to an edge list using melt
    edge_list = adjacency_df.reset_index().melt(id_vars='region_id', var_name='target', value_name='weight')
    edge_list.columns = ['source', 'target', 'weight']

    # Remove edges with NaN or zero weights
    edge_list = edge_list.dropna().query('weight != 0')

    # Create a new directed graph
    adjacency_graph = nx.DiGraph()
    adjacency_graph.add_weighted_edges_from(edge_list.itertuples(index=False))

    print(f"Loaded graph with {adjacency_graph.number_of_nodes()} nodes and {adjacency_graph.number_of_edges()} edges.")
else:
    print("Creating new adjacency graph...")
    if DISTANCE_MEASURE == "Location_based_accessibility":
        G_drive = load_and_prepare_driving_graph()
        adjacency_matrix = create_adjacency_graph(regions_gdf, distance=ADJACENCY_DISTANCE, distance_measure=DISTANCE_MEASURE, G_drive=G_drive)

        # Export adjacency matrix
        adjacency_matrix.to_parquet(os.path.join(CHECKPOINT_DIR, f'adjacency_matrix_res{RESOLUTION}_{DISTANCE_MEASURE.lower()}.parquet'))
    elif DISTANCE_MEASURE == "Euclidean":
        adjacency_graph = create_adjacency_graph(regions_gdf, distance=ADJACENCY_DISTANCE, distance_measure=DISTANCE_MEASURE, num_workers=NUM_WORKERS)
    else:
        raise ValueError(f"Invalid DISTANCE_MEASURE: {DISTANCE_MEASURE}")

dataset = Urban2VecDataset(regions_gdf, adjacency_matrix, num_ants=NUM_ANTS, walk_length=WALK_LENGTH)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=custom_collate)

num_regions = dataset.__len__()
model = Urban2VecModel(num_regions, EMBEDDING_DIM).to(device)

try:
    pretrained_embeddings = load_pretrained_embeddings(dataset.regions, PRETRAINED_EMBEDDING_FILE)
    model.region_embedding.weight.data.copy_(pretrained_embeddings)
    print("Loaded and PCA-reduced/used as is pre-trained region embeddings.")
except FileNotFoundError:
    print("Pre-trained embeddings not found. Initializing randomly.")

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = CircleLoss()

os.makedirs(CHECKPOINT_DIR, exist_ok=True)
try:
    model, loss_values = train_urban2vec(model, dataloader, optimizer, criterion, device, NUM_EPOCHS)
except RuntimeError as e:
    print(f"RuntimeError during training: {e}")
    print("Consider setting CUDA_LAUNCH_BLOCKING=1 for more detailed error messages.")

print("Saving final embeddings...")
region_embeddings = model.region_embedding.weight.detach().cpu().numpy()
pd.DataFrame(region_embeddings, index=dataset.regions).to_csv(os.path.join(CHECKPOINT_DIR, f'step3_region_embeddings_{DISTANCE_MEASURE.lower()}.csv'))

print("Training completed and embeddings saved.")

In [None]:
# plot losses
plt.plot(loss_values)
plt.plot(pd.Series(loss_values).rolling(100).mean())
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.title("Circle Loss")
plt.show()
plt.xlabel
plt.show()

In [None]:


# Ensure the regions_gdf and embeddings_aerial, embedding_df are loaded correctly
# Assuming regions_gdf['afw', 'fys', 'onv', 'soc', 'vrz', 'won'] contains the scores

# Get the Leefbaarometer scores
scores_df = regions_gdf[['afw', 'fys', 'onv', 'soc', 'vrz', 'won']]

# Initialize lists to store R-squared values
r2_scores_step1 = []
r2_scores_step2 = []
r2_scores_step3 = []
score_names = scores_df.columns

# Get the embeddings as DataFrames with region_id as index
embeddings_step1 = pd.read_csv(CHECKPOINT_DIR + '/step1_region_embeddings.csv')
embeddings_step2 = pd.read_csv(CHECKPOINT_DIR + '/step2_region_embeddings.csv')
embeddings_step3 = pd.DataFrame(region_embeddings, index=dataset.regions)

# set index column to 'Unnamed: 0' and rename as 'region_id'
embeddings_step1 = embeddings_step1.set_index('Unnamed: 0').rename_axis('region_id')
embeddings_step2 = embeddings_step2.set_index('Unnamed: 0').rename_axis('region_id')
embeddings_step3 = embeddings_step3.rename_axis('region_id')

In [None]:
embeddings_step3.head()

In [None]:
# ensure same dimensionality with PCA
pca = PCA(n_components=30)
embeddings_reduced_step1 = pca.fit_transform(embeddings_step1)
embeddings_reduced_step2 = pca.fit_transform(embeddings_step2)
embeddings_reduced_step3_euclidean = pca.fit_transform(embeddings_step3)

# Loop over each score
for score_name in score_names:
    scores = scores_df[score_name]

    # Fit the model for step 1 (using reduced embeddings)
    model_step1 = LinearRegression().fit(embeddings_reduced_step1, scores)
    r2_step1 = r2_score(scores, model_step1.predict(embeddings_reduced_step1))
    r2_scores_step1.append(r2_step1)

    # Fit the model for step 2 (using reduced embeddings)
    model_step2 = LinearRegression().fit(embeddings_reduced_step2, scores)
    r2_step2 = r2_score(scores, model_step2.predict(embeddings_reduced_step2))
    r2_scores_step2.append(r2_step2)

    # Fit the model for step 3 (using reduced embeddings)
    model_step3 = LinearRegression().fit(embeddings_reduced_step3, scores)
    r2_step3 = r2_score(scores, model_step3.predict(embeddings_reduced_step3))
    r2_scores_step3.append(r2_step3)

# Plotting the R-squared values
x = np.arange(len(score_names))
width = 0.25  # Reduced width for better spacing

fig, ax = plt.subplots(figsize=(10, 6))
bar1 = ax.bar(x - width, r2_scores_step1, width, label='Step 1')
bar2 = ax.bar(x, r2_scores_step2, width, label='Step 2')  # Shifted to center
bar3 = ax.bar(x + width, r2_scores_step3, width, label='Step 3')

# Adding labels and titles
ax.set_xlabel('Leefbaarometer Scores')
ax.set_ylabel('R-squared Value')
ax.set_title('R-squared Values of Embeddings Predicting Leefbaarometer Scores')
ax.set_xticks(x)
ax.set_xticklabels(score_names)
ax.legend()

# Display the bar chart
plt.tight_layout()
plt.show()

In [None]:
# from Plotting import pca_plot, cluster_agglomerative_plot, cluster_kmeans_plot
# import warnings
# warnings.filterwarnings("ignore")
# cluster_agglomerative_plot(embeddings_step3, regions_gdf, 10)

In [None]:
# visualise location based accessibility
# Load the adjacency matrix
adjacency_df = pd.read_parquet(os.path.join(CHECKPOINT_DIR, f'adjacency_matrix_res{RESOLUTION}_{DISTANCE_MEASURE.lower()}.parquet'))
