In [17]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import copy
import json

# Import data

In [18]:
# Import Borough json
with open('../data/raw/misc/london_boroughs.json') as json_file:
    borough_coordinates = json.load(json_file)

In [19]:
# Import commuter data
borough_commuters = pd.read_csv("../data/raw/commuter/commuting-patterns-borough.csv")
bc = copy.deepcopy(borough_commuters)

# Clean Borough names and Origin/Destination dataframe

In [20]:
# Collect borough names
boroughs = []
boroughs_no_spaces = []
for f in tqdm(borough_coordinates['features']):
    boroughs.append(f['properties']['name'])
    boroughs_no_spaces.append(f['properties']['name'].replace(" ", ""))

100%|██████████| 33/33 [00:00<00:00, 18823.89it/s]


In [27]:
for b in np.sort(boroughs):
    print(b)

Barking and Dagenham
Barnet
Bexley
Brent
Bromley
Camden
City of London
Croydon
Ealing
Enfield
Greenwich
Hackney
Hammersmith and Fulham
Haringey
Harrow
Havering
Hillingdon
Hounslow
Islington
Kensington and Chelsea
Kingston upon Thames
Lambeth
Lewisham
Merton
Newham
Redbridge
Richmond upon Thames
Southwark
Sutton
Tower Hamlets
Waltham Forest
Wandsworth
Westminster


In [21]:
# Subset columns based on borough names
bc = bc[['Origin Area']+boroughs+['Into area (A)','Out of area (B)']]

# Subset rows based on borough names
bc = bc[bc['Origin Area'].isin(boroughs)]

In [6]:
# Remove spaces from origin names
bc.loc[:,'Origin Area'] = bc['Origin Area'].apply(lambda x: x.replace(" ",""))
# Remove spaces from destination names
bc = bc.rename(columns = dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))

In [7]:
# Separate inflow/outflow columns from rest of O/D matrix
in_out_flows = copy.deepcopy(bc[['Origin Area','Into area (A)','Out of area (B)']])
od_matrix = copy.deepcopy(bc.drop(columns=['Into area (A)','Out of area (B)']))

# Rename columns
in_out_flows = in_out_flows.rename(columns={'Origin Area':'Origin','Into area (A)':'Inflows','Out of area (B)':'Outflows'})
od_matrix = od_matrix.rename(columns={'Origin Area':'Origin'})

# Update index
in_out_flows = in_out_flows.set_index(keys='Origin')
od_matrix = od_matrix.set_index(keys='Origin')

# Sort by index
in_out_flows = in_out_flows.sort_index(axis=0)
od_matrix = od_matrix.sort_index(axis=1).sort_index(axis=0)

In [8]:
# Get origin supply
origin_supply = copy.deepcopy(od_matrix.sum(axis=1).reset_index())
# origin_supply = origin_supply.rename(columns={'index':'Origin',0:'Supply'})
origin_supply.columns = ['Origin','Supply']
origin_supply = origin_supply.set_index('Origin')

In [9]:
# Get destination demand
destination_demand = copy.deepcopy(od_matrix.sum(axis=0).reset_index())
#destination_demand = destination_demand.rename(columns={'Origin':'Destination',0:'Demand'})
destination_demand.columns = ['Destination','Demand']
destination_demand = destination_demand.set_index('Destination')

In [None]:
# Replace zeros with ones
od_matrix = od_matrix.replace(0,1)

# Export data as dataframe and numpy array

In [None]:
# Export to csv
in_out_flows.to_csv('../data/input/commuter/inflows_outflows.csv')
# Export to txt
np.savetxt('../data/input/commuter/inflows_outflows.txt',in_out_flows.to_numpy())

In [None]:
# Export to csv
od_matrix.to_csv('../data/input/commuter/od_matrix.csv')
# Export to txt
np.savetxt('../data/input/commuter/od_matrix.txt',od_matrix.to_numpy())

In [None]:
# Export to csv
origin_supply.to_csv('../data/input/commuter/origin_supply.csv')
# Export to txt
np.savetxt('../data/input/commuter/origin_supply.txt',origin_supply.to_numpy())

In [None]:
# Export to csv
destination_demand.to_csv('../data/input/commuter/destination_demand.csv')
# Export to txt
np.savetxt('../data/input/commuter/destination_demand.txt',destination_demand.to_numpy())