In [44]:
import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
import copy
import json

# Import data

In [65]:
borough_coordinates = gpd.read_file("../data/input/commuter/borough_coordinates.json").set_index('name')

In [66]:
# Import Borough json
with open('../data/input/london_boroughs.json') as json_file:
    boroughs_coords = json.load(json_file)

In [67]:
# Import commuter data
borough_commuters = pd.read_csv("../data/raw/commuter/commuting-patterns-borough.csv")
bc = copy.deepcopy(borough_commuters)

# Clean Borough names and Origin/Destination dataframe

In [68]:
# Collect borough names
boroughs = []
boroughs_no_spaces = []
for f in tqdm(boroughs_coords['features']):
    boroughs.append(f['properties']['name'])
    boroughs_no_spaces.append(f['properties']['name'].replace(" ", ""))

100%|██████████| 33/33 [00:00<00:00, 35453.90it/s]


In [69]:
# Subset columns based on borough names
bc = bc[['Origin Area']+boroughs+['Into area (A)','Out of area (B)']]

# Subset rows based on borough names
bc = bc[bc['Origin Area'].isin(boroughs)]

In [70]:
# Remove spaces from origin names
bc.loc[:,'Origin Area'] = bc['Origin Area'].apply(lambda x: x.replace(" ",""))
# Remove spaces from destination names
bc = bc.rename(columns = dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))

In [71]:
# Separate inflow/outflow columns from rest of O/D matrix
in_out_flows = copy.deepcopy(bc[['Origin Area','Into area (A)','Out of area (B)']])
od_matrix = copy.deepcopy(bc.drop(columns=['Into area (A)','Out of area (B)']))

# Rename columns
in_out_flows = in_out_flows.rename(columns={'Origin Area':'Origin','Into area (A)':'Inflows','Out of area (B)':'Outflows'})
od_matrix = od_matrix.rename(columns={'Origin Area':'Origin'})

# Update index
in_out_flows = in_out_flows.set_index(keys='Origin')
od_matrix = od_matrix.set_index(keys='Origin')

# Sort by index
in_out_flows = in_out_flows.sort_index(axis=0)
od_matrix = od_matrix.sort_index(axis=1).sort_index(axis=0)

In [73]:
# Get origin supply
origin_supply = copy.deepcopy(od_matrix.sum(axis=1).reset_index())
# origin_supply = origin_supply.rename(columns={'index':'Origin',0:'Supply'})
origin_supply.columns = ['Origin','Supply']
origin_supply = origin_supply.set_index('Origin')
origin_supply['lon'] = borough_coordinates['lon'].values
origin_supply['lat'] = borough_coordinates['lat'].values

In [77]:
# Get destination demand
destination_demand = copy.deepcopy(od_matrix.sum(axis=0).reset_index())
#destination_demand = destination_demand.rename(columns={'Origin':'Destination',0:'Demand'})
destination_demand.columns = ['Destination','Demand']
destination_demand = destination_demand.set_index('Destination')
destination_demand['lon'] = borough_coordinates['lon'].values
destination_demand['lat'] = borough_coordinates['lat'].values

In [78]:
# Replace zeros with ones
od_matrix = od_matrix.replace(0,1)

# Export data as dataframe and numpy array

In [None]:
# Export to csv
in_out_flows.to_csv('../data/input/commuter/inflows_outflows.csv')
# Export to txt
np.savetxt('../data/input/commuter/inflows_outflows.txt',in_out_flows.to_numpy())

In [None]:
# Export to csv
od_matrix.to_csv('../data/input/commuter/od_matrix.csv')
# Export to txt
np.savetxt('../data/input/commuter/od_matrix.txt',od_matrix.to_numpy())

In [None]:
# Export to csv
origin_supply.to_csv('../data/input/commuter/origin_supply.csv')
# Export to txt
np.savetxt('../data/input/commuter/origin_supply.txt',origin_supply.to_numpy())

In [80]:
destination_demand

Unnamed: 0_level_0,Demand,lon,lat
Destination,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BarkingandDagenham,38747,0.133528,51.545277
Barnet,74789,-0.210017,51.616027
Bexley,45421,0.140355,51.458819
Brent,68322,-0.267821,51.558556
Bromley,76223,0.051524,51.371999
Camden,174520,-0.157424,51.546394
CityofLondon,211588,-0.092171,51.514845
Croydon,96120,-0.087157,51.355335
Ealing,83997,-0.331026,51.522475
Enfield,62826,-0.087272,51.650995


In [None]:
# Export to csv
destination_demand.to_csv('../data/input/commuter/destination_demand.csv')
# Export to txt
np.savetxt('../data/input/commuter/destination_demand.txt',destination_demand.to_numpy())