In [None]:
# If you do not have these datafiles yet, use the adaptive_zoning_preprocess_EW_data
# notebook to download and preprocess the data.

import geopandas as gpd
import pandas as pd

centroid_data = gpd.read_parquet('data/centroid_data.parquet')
commuting_data = pd.read_parquet('data/commuting_data.parquet')

In [None]:
# Note: on my laptop this takes up to 4 minutes
# See the next cells to dump / load a pickle of the adaptive_zone_system
from time import time
from adaptive_zoning import AdaptiveZoneSystem

t = time()

centroids = [(pt.x,pt.y) for pt in centroid_data.geometry]
pop =  centroid_data['Residential population'].to_list()
emp =  centroid_data['Workplace population'].to_list() 

# Using population as weights. This is a deviation from the paper which used area-weighted centroids
weight = pop

# For the estimation of beta, see separate notebook on doubly-constrained model estimation.
beta = 0.11253833770751953 / 1000 # taken from doubly constrained calibration

nbh_size = 72 # reduce to 1% of original OD pairs

zone_system = AdaptiveZoneSystem(pop, emp, weight, centroids, beta, nbh_size)

print("Calculation took: ", time()-t, " seconds")

In [None]:
import pickle
# Pickle the adaptive zone sytem for England and Wales
filename = "data/zone_system_EW.pkl"
with open(filename, 'wb') as file:
    pickle.dump(zone_system, file)

In [None]:
import pickle
# Unpickling the adaptive zone system for England and Wales
filename = "data/zone_system_EW.pkl"
with open(filename, 'rb') as file:
    zone_system = pickle.load(file)

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 2, figsize=(18, 9))
zone_system.plot_n_clusters_voronoi(1018, ax[0])
zone_system.plot_neighbourhood_voronoi(800, ax[1])
plt.show()

In [None]:
from config_helper import get_key
openroute_api_key = get_key("OpenRouteService","API key","config.ini")

In [None]:
# initialize results separately, so the next cell can be run multiple times to collect more data.
results = []

In [None]:
from math import sqrt
import random

from openroute_distance import bng_to_wgs84
from openroute_distance import get_full_distance
from openroute_distance import get_halfway_distance

def straightline_distance(a,b):
    dx = a[0] - b[0]
    dy = a[1] - b[1]
    return sqrt(dx**2 + dy**2)
 
cluster = zone_system.map_leaf_zones_to_n_clusters(1018, False)
centroids = zone_system.get_centroids()

centroids_wgs84 = bng_to_wgs84(centroids)

#  sample commuters, this will give good distribution of msoa, representative of commuting patterns
sample_size = 10
zones = list(zip(commuting_data['Area of residence'], commuting_data['Area of workplace']))
weights = list(commuting_data['Commuters'])
codes = random.choices(zones, weights, k = sample_size)
get_index = lambda zone: centroid_data.loc[centroid_data['msoa11cd'] == zone].index[0]
sample = [(get_index(a),get_index(b)) for a,b in codes]
for a,b in sample:
    a_trad = cluster[a]
    b_trad = cluster[b]

    a_adapt = zone_system.find_aggregated_neighbour(b, a)
    b_adapt = zone_system.find_aggregated_neighbour(a, b)
    
    centroids_wgs84_a = centroids_wgs84[a]
    centroids_wgs84_b = centroids_wgs84[b]
    
    centroids_wgs84_a_trad = centroids_wgs84[a_trad]
    centroids_wgs84_b_trad = centroids_wgs84[b_trad]
    
    centroids_wgs84_a_adapt = centroids_wgs84[a_adapt]
    centroids_wgs84_b_adapt = centroids_wgs84[b_adapt]

    centroids_bgn_a = centroids[a]
    centroids_bgn_b = centroids[b]

    centroids_bgn_a_trad = centroids[a_trad]
    centroids_bgn_b_trad = centroids[b_trad]
    
    centroids_bgn_a_adapt = centroids[a_adapt]
    centroids_bgn_b_adapt = centroids[b_adapt]

    straight_full    = straightline_distance(centroids_bgn_a       ,centroids_bgn_b)
    straight_trad    = straightline_distance(centroids_bgn_a_trad  ,centroids_bgn_b_trad)
    straight_adapt_1 = straightline_distance(centroids_bgn_a       ,centroids_bgn_b_adapt)
    straight_adapt_2 = straightline_distance(centroids_bgn_a_adapt ,centroids_bgn_b)

    network_full     = get_full_distance(   centroids_wgs84_a       , centroids_wgs84_b        ,"driving-car", openroute_api_key)['duration']
    network_trad     = get_full_distance(   centroids_wgs84_a_trad  , centroids_wgs84_b_trad   ,"driving-car", openroute_api_key)['duration']
    network_adapt_1  = get_halfway_distance(centroids_wgs84_a       , centroids_wgs84_b_adapt ,"driving-car",openroute_api_key,True)['duration']
    network_adapt_2  = get_halfway_distance(centroids_wgs84_a_adapt , centroids_wgs84_b       ,"driving-car",openroute_api_key,False)['duration']
   
    if all([v != None for v in [network_full, network_trad, network_adapt_1, network_adapt_2]]):
        
        straight_adapt = (straight_adapt_1 + straight_adapt_2)/2
        network_adapt = network_adapt_1 + network_adapt_2

        # avoid division by zero
        if straight_trad == 0 : straight_trad  = 1
        if straight_adapt == 0: straight_adapt = 1
        
        network_trad_best_guess  = straight_full * network_trad / straight_trad
        network_adapt_best_guess = straight_full * network_adapt/ straight_adapt
        
        results.append((network_full, network_trad_best_guess, network_adapt_best_guess))

print(len(results))

    
    

In [None]:
import matplotlib.pyplot as plt
#import numpy as np
from sklearn.metrics import r2_score

def plot_aggregation_comparison(data):
    """
    Plots a scatter plot comparing traditional and adaptive aggregation against accurate values,
    when the data is in a list of tuples.

    Args:
        data: List of tuples, where each tuple contains (accurate_value, traditional_value, adaptive_value).
    """

    accurate_values = [item[0] for item in data]
    traditional_values = [item[1] for item in data]
    adaptive_values = [item[2] for item in data]

    plt.figure(figsize=(10, 8))

    # Scatter plot for traditional aggregation
    plt.scatter(accurate_values, traditional_values, label='Traditional Aggregation', marker='o')

    # Scatter plot for adaptive aggregation
    plt.scatter(accurate_values, adaptive_values, label='Adaptive Aggregation', marker='x')

    # Calculate R-squared for traditional aggregation
    r2_traditional = r2_score(accurate_values, traditional_values)
    plt.text(0.05, 0.95, f'R² (Traditional): {r2_traditional:.3f}', transform=plt.gca().transAxes, verticalalignment='top')

    # Calculate R-squared for adaptive aggregation
    r2_adaptive = r2_score(accurate_values, adaptive_values)
    plt.text(0.05, 0.90, f'R² (Adaptive): {r2_adaptive:.3f}', transform=plt.gca().transAxes, verticalalignment='top')
    improvement_factor = (1-r2_traditional)/(1-r2_adaptive)
    plt.text(0.05, 0.85, f'Improvement factor: {improvement_factor:.3f}', transform=plt.gca().transAxes, verticalalignment='top')
  
    # Add the y=x line (accurate prediction line)
    min_val = min(min(accurate_values), min(traditional_values), min(adaptive_values))
    max_val = max(max(accurate_values), max(traditional_values), max(adaptive_values))

    plt.plot([min_val, max_val], [min_val, max_val], color='red', linestyle='--', label='y=x (Accurate)')

    # Add labels and title
    plt.xlabel('Accurate Values')
    plt.ylabel('Aggregated Values')
    plt.title('Aggregation Comparison - Travel time (s)')
    #plt.xlim(0,5400)
    #plt.ylim(0,5400)
    plt.legend()
    plt.grid(True)

    # Show the plot
    plt.show()

plot_aggregation_comparison(results)