In [11]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from geopandas import GeoDataFrame
from shapely.geometry import shape, mapping, Point
import copy
import json
import math

# Import data

In [12]:
# Import Borough json
with open('../data/raw/misc/london_boroughs.json') as json_file:
    borough_coordinates = json.load(json_file)

In [13]:
# Collect borough names
boroughs = []
boroughs_no_spaces = []

for f in tqdm(borough_coordinates['features']):
    boroughs.append(f['properties']['name'])
    boroughs_no_spaces.append(f['properties']['name'].replace(" ", ""))

100%|██████████| 33/33 [00:00<00:00, 7484.56it/s]


# Construct geopandas dataframe

In [14]:
# Convert dictionary to dataframe
boroughs_df = pd.DataFrame.from_dict(borough_coordinates['features'])
# Remove unnecessary columns to avoid overlap with unpacked columns (see next line)
boroughs_df = boroughs_df.drop(columns=['type','id'])
# Expand dictionary-like columns
boroughs_df = boroughs_df.join(boroughs_df['properties'].apply(pd.Series))
# Drop unpacked column and unnecessary columns
boroughs_df = boroughs_df.drop(columns=['properties','id','code','inner_statistical'])

In [15]:
# Convert geometry to shapely multipolygon
boroughs_df['geometry'] = boroughs_df['geometry'].apply(lambda x: shape(x))

In [16]:
# Convert dataframe to geopandas
crs = {'init': 'epsg:4326'}
boroughs_gdf = GeoDataFrame(boroughs_df, crs=crs, geometry=boroughs_df['geometry'])

  return _prepare_from_string(" ".join(pjargs))


# Extract centroid of Boroughs

In [17]:
boroughs_gdf['lon'] = boroughs_gdf['geometry'].apply(lambda p: p.centroid.x)
boroughs_gdf['lat'] = boroughs_gdf['geometry'].apply(lambda p: p.centroid.y)

In [18]:
# Remove spaces from Borough names
boroughs_gdf['name'] = boroughs_gdf['name'].map(dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))

# Export data to file

In [19]:
# boroughs_gdf = boroughs_gdf.sort_values(by='name')
# boroughs_gdf.to_file("../data/validation/commuter/borough_coordinates.json", driver='GeoJSON')

# Extract conceptual radii of Boroughs

In [21]:
# Loop over Multipolygons
for i,row in tqdm(boroughs_gdf.iterrows(),total=boroughs_gdf.shape[0]):
    # Initialise maximum,minimum distance
    max_dist = 0
    min_dist = 0
    # Loop over polygons in Multipolygon
    for polygon in row.geometry:
        # Loop over points in polygon (-1 prevents duplicating the first vertex)
        for point in polygon.exterior.coords[:-1]:
            # Compute distance between centroid and point in polygon
            dist = math.sqrt(row.geometry.centroid.distance(Point(point)))
            # Update maximum distance if current distance is larger
            if dist > max_dist:
                max_dist = dist
            if dist < max_dist:
                min_dist = dist
    # Append maximum distance between centroid and polygon and divide by sqrt(2) 
    # to approximate radius of cirle surrounding polygon
    boroughs_gdf.at[i,'max_radius'] = max_dist/math.sqrt(2)
    boroughs_gdf.at[i,'min_radius'] = min_dist/math.sqrt(2)
    boroughs_gdf.at[i,'avg_radius'] = min_dist #(max_dist+min_dist)/2

100%|██████████| 33/33 [00:12<00:00,  2.69it/s]


# Construct cost matrix

In [22]:
# Initialise empty cost dataframe
cost_matrix = pd.DataFrame(0, index=boroughs_no_spaces, columns=boroughs_no_spaces)
# Sort dataframe by index
cost_matrix = cost_matrix.sort_index(axis=1).sort_index(axis=0)

In [23]:
# Loop over cost matrix rows
for origin,row in tqdm(cost_matrix.iterrows(),total=cost_matrix.shape[0]):
    # Compute distance between centroids for entries off the diagonal
    for destination in boroughs_no_spaces:
        # Computations in the case of origin = destination have been completed - so avoid them
        if destination != origin:
            # Store origin and destination geometry centroids
            origin_centroid = boroughs_gdf[boroughs_gdf['name']==origin].geometry.centroid.values[0]
            destination_centroid = boroughs_gdf[boroughs_gdf['name']==destination].geometry.centroid.values[0]
            # Store distance between origin and destination centroids
            cost_matrix.loc[origin,destination] = math.sqrt(origin_centroid.distance(destination_centroid))
        else:
            # Fill in diagonal with radii values
#             cost_matrix.loc[origin,destination] = boroughs_gdf[boroughs_gdf['name']==origin]['max_radius'].values[0]
#             cost_matrix.loc[origin,destination] = boroughs_gdf[boroughs_gdf['name']==origin]['min_radius'].values[0]
            cost_matrix.loc[origin,destination] = boroughs_gdf[boroughs_gdf['name']==origin]['avg_radius'].values[0]
    
            

100%|██████████| 33/33 [00:03<00:00,  8.28it/s]


# Export cost matrix and Borough names as dataframe and numpy array

In [26]:
# Export to csv
# cost_matrix.to_csv('../data/validation/commuter/cost_matrix.csv')
# Export to txt
np.savetxt('../data/input/commuter_borough/cost_matrix.txt',cost_matrix.to_numpy())