In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
import copy
import json

# Import data

In [13]:
borough_coordinates = gpd.read_file("../data/input/misc/borough_coordinates.json").set_index('name')

In [14]:
# Import Borough json
with open('../data/raw/misc/london_boroughs.json') as json_file:
    boroughs_coords = json.load(json_file)

In [15]:
# Import commuter data
borough_commuters = pd.read_csv("../data/raw/commuter/commuting-patterns-borough.csv")
bc = copy.deepcopy(borough_commuters)

# Clean Borough names and Origin/Destination dataframe

In [11]:
# Collect borough names
boroughs = []
boroughs_no_spaces = []
for f in tqdm(boroughs_coords['features']):
    boroughs.append(f['properties']['name'])
    boroughs_no_spaces.append(f['properties']['name'].replace(" ", ""))

100%|██████████| 33/33 [00:00<00:00, 51416.06it/s]


In [12]:
# Subset columns based on borough names
bc = bc[['Origin Area']+boroughs+['Into area (A)','Out of area (B)']]

# Subset rows based on borough names
bc = bc[bc['Origin Area'].isin(boroughs)]

In [None]:
# Remove spaces from origin names
bc.loc[:,'Origin Area'] = bc['Origin Area'].apply(lambda x: x.replace(" ",""))
# Remove spaces from destination names
bc = bc.rename(columns = dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))

In [None]:
# Separate inflow/outflow columns from rest of O/D matrix
in_out_flows = copy.deepcopy(bc[['Origin Area','Into area (A)','Out of area (B)']])
od_matrix = copy.deepcopy(bc.drop(columns=['Into area (A)','Out of area (B)']))

# Rename columns
in_out_flows = in_out_flows.rename(columns={'Origin Area':'Origin','Into area (A)':'Inflows','Out of area (B)':'Outflows'})
od_matrix = od_matrix.rename(columns={'Origin Area':'Origin'})

# Update index
in_out_flows = in_out_flows.set_index(keys='Origin')
od_matrix = od_matrix.set_index(keys='Origin')

# Sort by index
in_out_flows = in_out_flows.sort_index(axis=0)
od_matrix = od_matrix.sort_index(axis=1).sort_index(axis=0)

In [None]:
# Get origin supply
origin_supply = copy.deepcopy(od_matrix.sum(axis=1).reset_index())
# origin_supply = origin_supply.rename(columns={'index':'Origin',0:'Supply'})
origin_supply.columns = ['Origin','Supply']
origin_supply = origin_supply.set_index('Origin').sort_index()
origin_supply['lon'] = borough_coordinates['lon'].values
origin_supply['lat'] = borough_coordinates['lat'].values

In [None]:
# Get destination demand
destination_demand = copy.deepcopy(od_matrix.sum(axis=0).reset_index())
#destination_demand = destination_demand.rename(columns={'Origin':'Destination',0:'Demand'})
destination_demand.columns = ['Destination','Demand']
destination_demand = destination_demand.set_index('Destination').sort_index()
destination_demand['lon'] = borough_coordinates['lon'].values
destination_demand['lat'] = borough_coordinates['lat'].values

In [None]:
# Replace zeros with ones
od_matrix = od_matrix.replace(0,1)

# Export data as dataframe and numpy array

In [38]:
# Export to csv
od_matrix.to_csv('../data/validation/commuter/borough_od_matrix.csv')
# Export to txt
np.savetxt('../data/validation/commuter/borough_od_matrix.txt',od_matrix.to_numpy())

In [39]:
# Export to csv
origin_supply.to_csv('../data/validation/commuter/borough_origin_supply.csv')
# Export to txt
np.savetxt('../data/validation/commuter/borough_origin_supply.txt',origin_supply['Supply'].to_numpy())
np.savetxt('../data/validation/commuter/borough_origin_locations.txt',origin_supply[['lon','lat']].to_numpy())

In [41]:
# Export to csv
destination_demand.to_csv('../data/validation/commuter/borough_destination_demand.csv')
# Export to txt
np.savetxt('../data/validation/commuter/borough_destination_demand.txt',destination_demand['Demand'].to_numpy())
np.savetxt('../data/validation/commuter/borough_locations.txt',destination_demand[['lon','lat']].to_numpy())

In [37]:
np.savetxt('../data/input/commuter/destination_locations.txt',destination_demand[['lon','lat']].to_numpy())