In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from geopandas import GeoDataFrame
from shapely.geometry import shape, mapping, Point
import copy
import json
import math

# Import data

In [2]:
# Import Borough json
with open('../data/raw/misc/london_boroughs.json') as json_file:
    borough_coordinates = json.load(json_file)

In [3]:
# Collect borough names
boroughs = []
boroughs_no_spaces = []

for f in tqdm(borough_coordinates['features']):
    boroughs.append(f['properties']['name'])
    boroughs_no_spaces.append(f['properties']['name'].replace(" ", ""))

100%|██████████| 33/33 [00:00<00:00, 25684.18it/s]


# Construct geopandas dataframe

In [4]:
# Convert dictionary to dataframe
boroughs_df = pd.DataFrame.from_dict(borough_coordinates['features'])
# Remove unnecessary columns to avoid overlap with unpacked columns (see next line)
boroughs_df = boroughs_df.drop(columns=['type','id'])
# Expand dictionary-like columns
boroughs_df = boroughs_df.join(boroughs_df['properties'].apply(pd.Series))
# Drop unpacked column and unnecessary columns
boroughs_df = boroughs_df.drop(columns=['properties','id','code','inner_statistical'])

In [5]:
# Convert geometry to shapely multipolygon
boroughs_df['geometry'] = boroughs_df['geometry'].apply(lambda x: shape(x))

In [6]:
# Convert dataframe to geopandas
crs = {'init': 'epsg:4326'}
boroughs_gdf = GeoDataFrame(boroughs_df, crs=crs, geometry=boroughs_df['geometry'])

  return _prepare_from_string(" ".join(pjargs))


# Extract centroid of Boroughs

In [7]:
boroughs_gdf['lon'] = boroughs_gdf['geometry'].apply(lambda p: p.centroid.x)
boroughs_gdf['lat'] = boroughs_gdf['geometry'].apply(lambda p: p.centroid.y)

In [8]:
# Remove spaces from Borough names
boroughs_gdf['name'] = boroughs_gdf['name'].map(dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))

In [9]:
boroughs_gdf = boroughs_gdf.sort_values(by='name')
boroughs_gdf.to_file("../data/input/commuter/borough_coordinates.json", driver='GeoJSON')

# Extract conceptual radii of Boroughs

In [26]:
# Loop over Multipolygons
for i,row in tqdm(boroughs_gdf.iterrows(),total=boroughs_gdf.shape[0]):
    # Initialise maximum,minimum distance
    max_dist = 0
    min_dist = 0
    # Loop over polygons in Multipolygon
    for polygon in row.geometry:
        # Loop over points in polygon (-1 prevents duplicating the first vertex)
        for point in polygon.exterior.coords[:-1]:
            # Compute distance between centroid and point in polygon
            dist = math.sqrt(row.geometry.centroid.distance(Point(point)))
            # Update maximum distance if current distance is larger
            if dist > max_dist:
                max_dist = dist
            if dist < max_dist:
                min_dist = dist
    # Append maximum distance between centroid and polygon and divide by sqrt(2) 
    # to approximate radius of cirle surrounding polygon
    boroughs_gdf.at[i,'max_radius'] = max_dist/math.sqrt(2)
    boroughs_gdf.at[i,'min_radius'] = min_dist/math.sqrt(2)
    boroughs_gdf.at[i,'avg_radius'] = (max_dist+min_dist)/2

100%|██████████| 33/33 [00:12<00:00,  2.55it/s]


# Construct cost matrix

In [27]:
# Initialise empty cost dataframe
cost_matrix = pd.DataFrame(0, index=boroughs_no_spaces, columns=boroughs_no_spaces)
# Sort dataframe by index
cost_matrix = cost_matrix.sort_index(axis=1).sort_index(axis=0)

In [28]:
# Loop over cost matrix rows
for origin,row in tqdm(cost_matrix.iterrows(),total=cost_matrix.shape[0]):
    # Compute distance between centroids for entries off the diagonal
    for destination in boroughs_no_spaces:
        # Computations in the case of origin = destination have been completed - so avoid them
        if destination != origin:
            # Store origin and destination geometry centroids
            origin_centroid = boroughs_gdf[boroughs_gdf['name']==origin].geometry.centroid.values[0]
            destination_centroid = boroughs_gdf[boroughs_gdf['name']==destination].geometry.centroid.values[0]
            # Store distance between origin and destination centroids
            cost_matrix.loc[origin,destination] = math.sqrt(origin_centroid.distance(destination_centroid))
        else:
            # Fill in diagonal with radii values
#             cost_matrix.loc[origin,destination] = boroughs_gdf[boroughs_gdf['name']==origin]['max_radius'].values[0]
#             cost_matrix.loc[origin,destination] = boroughs_gdf[boroughs_gdf['name']==origin]['min_radius'].values[0]
            cost_matrix.loc[origin,destination] = boroughs_gdf[boroughs_gdf['name']==origin]['avg_radius'].values[0]
    
            

100%|██████████| 33/33 [00:03<00:00,  8.59it/s]


# Export cost matrix and Borough names as dataframe and numpy array

In [29]:
cost_matrix

Unnamed: 0,BarkingandDagenham,Barnet,Bexley,Brent,Bromley,Camden,CityofLondon,Croydon,Ealing,Enfield,...,Merton,Newham,Redbridge,RichmonduponThames,Southwark,Sutton,TowerHamlets,WalthamForest,Wandsworth,Westminster
BarkingandDagenham,0.255782,0.592245,0.294494,0.633694,0.437838,0.539401,0.477223,0.539601,0.681992,0.494776,...,0.597822,0.314024,0.265336,0.676939,0.469334,0.600856,0.414047,0.392537,0.577469,0.544515
Barnet,0.592245,0.273222,0.619697,0.285503,0.598085,0.295403,0.394112,0.536836,0.391095,0.357252,...,0.454392,0.511404,0.536182,0.449499,0.443684,0.505965,0.447766,0.445644,0.407863,0.336379
Bexley,0.294494,0.619697,0.259798,0.648216,0.352436,0.557126,0.489061,0.499941,0.689681,0.545804,...,0.584059,0.353663,0.377232,0.67352,0.464148,0.576469,0.430542,0.451865,0.571737,0.553756
Brent,0.633694,0.285503,0.648216,0.280448,0.608148,0.333264,0.425449,0.521456,0.269776,0.450375,...,0.405599,0.5529,0.587159,0.353316,0.459624,0.464976,0.485687,0.507577,0.366866,0.339852
Bromley,0.437838,0.598085,0.352436,0.608148,0.330974,0.521693,0.450129,0.373736,0.641156,0.558224,...,0.501651,0.39633,0.46374,0.609251,0.402087,0.478865,0.411464,0.480748,0.500849,0.505762
Camden,0.539401,0.295403,0.557126,0.333264,0.521693,0.204222,0.26922,0.451189,0.41862,0.35489,...,0.377013,0.441184,0.486388,0.432704,0.332514,0.430584,0.353966,0.390409,0.315234,0.180903
CityofLondon,0.477223,0.394112,0.489061,0.425449,0.450129,0.26922,0.143892,0.399485,0.488853,0.369106,...,0.385325,0.359526,0.42703,0.48213,0.212717,0.418338,0.237974,0.33501,0.337133,0.262943
Croydon,0.539601,0.536836,0.499941,0.521456,0.373736,0.451189,0.399485,0.25703,0.543736,0.543747,...,0.350567,0.461074,0.531217,0.491868,0.344207,0.301112,0.412133,0.500059,0.371647,0.418383
Ealing,0.681992,0.391095,0.689681,0.269776,0.641156,0.41862,0.488853,0.543736,0.292442,0.524939,...,0.418099,0.60618,0.641694,0.28689,0.511005,0.471138,0.543587,0.571252,0.401409,0.412236
Enfield,0.494776,0.357252,0.545804,0.450375,0.558224,0.35489,0.369106,0.543747,0.524939,0.263661,...,0.514733,0.417329,0.419202,0.554511,0.422255,0.550172,0.378758,0.306408,0.47214,0.394725


In [30]:
# Export to csv
cost_matrix.to_csv('../data/input/commuter/cost_matrix.csv')
# Export to txt
np.savetxt('../data/input/commuter/cost_matrix.txt',cost_matrix.to_numpy())

In [31]:
# Export to txt
np.savetxt('../data/input/commuter/origins-destinations.txt',np.array(boroughs_no_spaces),fmt="%s")