In [1]:
import csv
from pprint import pprint
import numpy as np
import pandas as pd

In [2]:
# Pick a few master-postcode-pairs (which have been augmented with time_sec in Notebook #4).
MPC_PAIRS_FILES = ["./output/master-postcode-pairs-20210607-162126-perlis.csv"]

mpc_pairs_all = []

# Import the pairs into dicts
for path in MPC_PAIRS_FILES:
    with open(path) as fp:
        mpc_pairs_all.append(list(csv.DictReader(fp, delimiter="|")))
# Note: The lat and lon are strings instead of floats, but that's ok as we need strings for the API anyways.


# Time Matrix

Using the same idea as the Distance Matrix, we instead find the Google Maps API time taken to travel between each master postcode pairing.

In [3]:
# Reuse the distance matrix from an earlier notebook.
df_mat = pd.read_pickle("./output/distance-matrix.pkl")
df_mat = df_mat.applymap(lambda x: np.NaN).astype(float)  # Change all to NA


In [4]:
# Assign 
for mpc_pairs in mpc_pairs_all:
    for item in mpc_pairs:
        mpc1 = (item["ppv_region"], item["master_postcode_1"])
        mpc2 = (item["ppv_region"], item["master_postcode_2"])
        duration = int(item["time_sec"]) if item["time_sec"] != "inf" else np.inf
        df_mat.loc[mpc1, mpc2] = duration
        df_mat.loc[mpc2, mpc1] = duration # also add across the diagonal


# Drop all rows and columns which are completely NaN
df_mat = df_mat[df_mat>0].dropna(how="all", axis=0).dropna(how="all", axis=1)

# Set the diagonals to zero.
for i in range(len(df_mat)):
    df_mat.iloc[i, i] = 0 # Set diagonals to zero

display(df_mat)

Unnamed: 0_level_0,Unnamed: 1_level_0,Perlis,Perlis,Perlis,Perlis,Perlis,Perlis
Unnamed: 0_level_1,Unnamed: 1_level_1,01000,02000,02100,02200,02600,02700
Perlis,1000,0.0,877.0,2417.0,1745.0,973.0,1009.0
Perlis,2000,877.0,0.0,2752.0,2283.0,890.0,720.0
Perlis,2100,2417.0,2752.0,0.0,1010.0,2071.0,2799.0
Perlis,2200,1745.0,2283.0,1010.0,0.0,2267.0,2454.0
Perlis,2600,973.0,890.0,2071.0,2267.0,0.0,1035.0
Perlis,2700,1009.0,720.0,2799.0,2454.0,1035.0,0.0


In [5]:
# Also, generate the matrix in minutes (instead of seconds), for easy viewing.
df_mat_minfloored = df_mat // 60
display(df_mat_minfloored)

Unnamed: 0_level_0,Unnamed: 1_level_0,Perlis,Perlis,Perlis,Perlis,Perlis,Perlis
Unnamed: 0_level_1,Unnamed: 1_level_1,01000,02000,02100,02200,02600,02700
Perlis,1000,0.0,14.0,40.0,29.0,16.0,16.0
Perlis,2000,14.0,0.0,45.0,38.0,14.0,12.0
Perlis,2100,40.0,45.0,0.0,16.0,34.0,46.0
Perlis,2200,29.0,38.0,16.0,0.0,37.0,40.0
Perlis,2600,16.0,14.0,34.0,37.0,0.0,17.0
Perlis,2700,16.0,12.0,46.0,40.0,17.0,0.0


# Export files

In [6]:
# Excel
df_mat.to_excel("./output/time-matrix-sec.xlsx")
df_mat_minfloored.to_excel("./output/time-matrix-minfloored.xlsx")

# Csv - drop one level of the MultiIndex
# Note we must keep the index in the csv (unlike other csv exports so far).
df_mat.index = df_mat.index.droplevel()
df_mat.columns = df_mat.columns.droplevel()
df_mat.to_csv("./output/time-matrix-sec.csv", sep="|")
df_mat_minfloored.index = df_mat_minfloored.index.droplevel()
df_mat_minfloored.columns = df_mat_minfloored.columns.droplevel()
df_mat_minfloored.to_csv("./output/time-matrix-minfloored.csv", sep="|")


In [7]:
display(df_mat)
display(df_mat_minfloored)

Unnamed: 0,01000,02000,02100,02200,02600,02700
1000,0.0,877.0,2417.0,1745.0,973.0,1009.0
2000,877.0,0.0,2752.0,2283.0,890.0,720.0
2100,2417.0,2752.0,0.0,1010.0,2071.0,2799.0
2200,1745.0,2283.0,1010.0,0.0,2267.0,2454.0
2600,973.0,890.0,2071.0,2267.0,0.0,1035.0
2700,1009.0,720.0,2799.0,2454.0,1035.0,0.0


Unnamed: 0,01000,02000,02100,02200,02600,02700
1000,0.0,14.0,40.0,29.0,16.0,16.0
2000,14.0,0.0,45.0,38.0,14.0,12.0
2100,40.0,45.0,0.0,16.0,34.0,46.0
2200,29.0,38.0,16.0,0.0,37.0,40.0
2600,16.0,14.0,34.0,37.0,0.0,17.0
2700,16.0,12.0,46.0,40.0,17.0,0.0
