In [8]:
import googlemaps
import pandas as pd
import numpy as np
import csv
from src.utils import get_api_key, now
from pprint import pprint
import json
import pickle

In [9]:
with open("./output/mpc_pairs.csv") as fp:
    mpc_pairs = list(csv.DictReader(fp))

pprint(mpc_pairs[:5])

[{'latitude_1': '1.42513621',
  'latitude_2': '1.456123',
  'longitude_1': '103.61443042',
  'longitude_2': '103.761701',
  'master_postcode_1': '79000',
  'master_postcode_2': '80000',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.662964',
  'longitude_1': '103.61443042',
  'longitude_2': '103.600178',
  'master_postcode_1': '79000',
  'master_postcode_2': '81000',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.606506',
  'longitude_1': '103.61443042',
  'longitude_2': '103.647617',
  'master_postcode_1': '79000',
  'master_postcode_2': '81400',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.876001',
  'longitude_1': '103.61443042',
  'longitude_2': '103.614046',
  'master_postcode_1': '79000',
  'master_postcode_2': '81440',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.75050278',
  'longitude_1': '103.61443042',
  'longitude_2': '103.672815',
  'master_postcode_1': '7900

In [10]:
# Let's separate this by ppv_region:
mpc_pairs_by_region = dict()
for mpc_pair in mpc_pairs:
    ppv = mpc_pair["ppv_region"]
    if ppv not in mpc_pairs_by_region.keys():
        mpc_pairs_by_region[ppv] = []
    mpc_pairs_by_region[ppv].append(mpc_pair)

# Sample: Kedah
pprint(mpc_pairs_by_region["Kedah"][:5])

# Number of pairs by state:
pprint({k: len(v) for k, v in mpc_pairs_by_region.items()})


[{'latitude_1': '6.131583',
  'latitude_2': '6.269412',
  'longitude_1': '100.373412',
  'longitude_2': '100.42052',
  'master_postcode_1': '05000',
  'master_postcode_2': '06000',
  'ppv_region': 'Kedah'},
 {'latitude_1': '6.131583',
  'latitude_2': '6.437103',
  'longitude_1': '100.373412',
  'longitude_2': '100.428782',
  'master_postcode_1': '05000',
  'master_postcode_2': '06010',
  'ppv_region': 'Kedah'},
 {'latitude_1': '6.131583',
  'latitude_2': '6.51576667',
  'longitude_1': '100.373412',
  'longitude_2': '100.421848',
  'master_postcode_1': '05000',
  'master_postcode_2': '06050',
  'ppv_region': 'Kedah'},
 {'latitude_1': '6.131583',
  'latitude_2': '6.392752',
  'longitude_1': '100.373412',
  'longitude_2': '100.305771',
  'master_postcode_1': '05000',
  'master_postcode_2': '06100',
  'ppv_region': 'Kedah'},
 {'latitude_1': '6.131583',
  'latitude_2': '5.610519',
  'longitude_1': '100.373412',
  'longitude_2': '100.444822',
  'master_postcode_1': '05000',
  'master_postcod

In [12]:
mpc_pairs_by_region.keys()

dict_keys(['Johor', 'Kedah', 'Kelantan', 'Melaka', 'Negeri Sembilan', 'Pahang', 'Perak', 'Perlis', 'Pulau Pinang', 'Sabah', 'Sarawak', 'Selangor/KL/Putrajaya', 'Terengganu'])

In [33]:
mpc_pairs_api = mpc_pairs_by_region["Sarawak"]

SKIP = True # Change this to False (and comment out the sample above) to use the Maps API
if not SKIP:
    gmaps = googlemaps.Client(key=get_api_key())
    responses = []
    ts = now()
    with open(f"./responses/distance-matrix-{ts}.json", mode="a") as fp:
        for item in mpc_pairs_api:
            # coordinates separated by comma, no spaces
            origin = f"{item['latitude_1']},{item['longitude_1']}"
            destination = f"{item['latitude_2']},{item['longitude_2']}"
            try:
                response = gmaps.distance_matrix(origin, destination)
                # print(response)
            except Exception as exc:
                print(exc)
            responses.append(response)
        json.dump(responses, fp)

In [37]:
# with open(f"./responses/distance-matrix-{ts}.json") as fp: # Use this line for MAPS API
with open("./responses/distance-matrix-20210606-221535-sarawak.json") as fp: # Use this line for sample
    responses = json.load(fp)

assert len(mpc_pairs_api) == len(responses)

# Add the corresponding duration into the mpc_pairs dictionaries
try:
    for item, resp in zip(mpc_pairs_api, responses):
        element = resp["rows"][0]["elements"][0]
        if element["status"] == "ZERO_RESULTS":
            item["duration"] = "inf"
        else:
            item["duration"] = element["duration"]["value"]
except KeyError as exc:
    print(resp)
    raise exc

In [38]:
# time matrix
# lets reuse the distance matrix

# Using pickle for now, because importing a csv/Excel causes certain postcodes to become integers instead of strings.
# The pickle file is generated at the end of Notebook #1.
with open("./output/distance_matrix.pickle", mode="rb") as fp:
    df_mat = pickle.load(fp)
df_mat = df_mat.applymap(lambda x: 0, na_action="ignore") # Change all numbers to 0.


In [39]:
# [[[0, 605], [535, 0]], [[0, 2558], [2597, 0]]]
for item in mpc_pairs_api:
    mpc1 = (item["ppv_region"], item["master_postcode_1"])
    mpc2 = (item["ppv_region"], item["master_postcode_2"])
    duration = item["duration"] if item["duration"] != "inf" else np.inf
    df_mat.loc[mpc1, mpc2] = duration
    df_mat.loc[mpc2, mpc1] = duration # also add across the diagonal



In [40]:
result = df_mat[df_mat>0].dropna(how="all", axis=0).dropna(how="all", axis=1)  # View the data
for i in range(len(result)):
    result.iloc[i, i] = 0 # Set diagonals to zero
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak,Sarawak
Unnamed: 0_level_1,Unnamed: 1_level_1,09130,93000,94000,94200,94300,94500,94600,94650,94700,94800,...,97100,97200,98000,98050,98100,98150,98200,98300,98700,98800
Sarawak,9130,0.0,30675.0,31979.0,29448.0,29069.0,36103.0,27302.0,16686.0,28048.0,27104.0,...,7481.0,5005.0,15292.0,21942.0,15621.0,12608.0,11359.0,17332.0,25373.0,40038.0
Sarawak,93000,30675.0,0.0,2853.0,2549.0,1952.0,6528.0,3559.0,18306.0,4095.0,6214.0,...,37971.0,32406.0,45782.0,52432.0,46110.0,43098.0,41849.0,47821.0,55863.0,70528.0
Sarawak,94000,31979.0,2853.0,0.0,2996.0,3412.0,4407.0,5019.0,19766.0,4542.0,7674.0,...,39430.0,33866.0,47242.0,53891.0,47570.0,44558.0,43308.0,49281.0,57322.0,71987.0
Sarawak,94200,29448.0,2549.0,2996.0,0.0,1495.0,7278.0,3101.0,17849.0,1706.0,5756.0,...,37314.0,31750.0,45125.0,51775.0,45454.0,42441.0,41192.0,47165.0,55206.0,69871.0
Sarawak,94300,29069.0,1952.0,3412.0,1495.0,0.0,7412.0,1971.0,16718.0,2912.0,4626.0,...,36383.0,30818.0,44194.0,50844.0,44522.0,41510.0,40260.0,46233.0,54275.0,68940.0
Sarawak,94500,36103.0,6528.0,4407.0,7278.0,7412.0,0.0,9055.0,23803.0,8737.0,11711.0,...,43467.0,37903.0,51278.0,57928.0,51607.0,48595.0,47345.0,53318.0,61359.0,76024.0
Sarawak,94600,27302.0,3559.0,5019.0,3101.0,1971.0,9055.0,0.0,14966.0,4033.0,2874.0,...,34630.0,29066.0,42441.0,49091.0,42770.0,39757.0,38508.0,44481.0,52522.0,67187.0
Sarawak,94650,16686.0,18306.0,19766.0,17849.0,16718.0,23803.0,14966.0,0.0,16514.0,14846.0,...,24088.0,18524.0,31899.0,38549.0,32228.0,29215.0,27966.0,33938.0,41980.0,56645.0
Sarawak,94700,28048.0,4095.0,4542.0,1706.0,2912.0,8737.0,4033.0,16514.0,0.0,2881.0,...,35685.0,30121.0,43497.0,50146.0,43825.0,40813.0,39563.0,45536.0,53577.0,68243.0
Sarawak,94800,27104.0,6214.0,7674.0,5756.0,4626.0,11711.0,2874.0,14846.0,2881.0,0.0,...,34487.0,28923.0,42298.0,48948.0,42627.0,39614.0,38365.0,44338.0,52379.0,67044.0


In [41]:
result.index = result.index.droplevel()
result.columns = result.columns.droplevel()


In [42]:
result

Unnamed: 0,09130,93000,94000,94200,94300,94500,94600,94650,94700,94800,...,97100,97200,98000,98050,98100,98150,98200,98300,98700,98800
9130,0.0,30675.0,31979.0,29448.0,29069.0,36103.0,27302.0,16686.0,28048.0,27104.0,...,7481.0,5005.0,15292.0,21942.0,15621.0,12608.0,11359.0,17332.0,25373.0,40038.0
93000,30675.0,0.0,2853.0,2549.0,1952.0,6528.0,3559.0,18306.0,4095.0,6214.0,...,37971.0,32406.0,45782.0,52432.0,46110.0,43098.0,41849.0,47821.0,55863.0,70528.0
94000,31979.0,2853.0,0.0,2996.0,3412.0,4407.0,5019.0,19766.0,4542.0,7674.0,...,39430.0,33866.0,47242.0,53891.0,47570.0,44558.0,43308.0,49281.0,57322.0,71987.0
94200,29448.0,2549.0,2996.0,0.0,1495.0,7278.0,3101.0,17849.0,1706.0,5756.0,...,37314.0,31750.0,45125.0,51775.0,45454.0,42441.0,41192.0,47165.0,55206.0,69871.0
94300,29069.0,1952.0,3412.0,1495.0,0.0,7412.0,1971.0,16718.0,2912.0,4626.0,...,36383.0,30818.0,44194.0,50844.0,44522.0,41510.0,40260.0,46233.0,54275.0,68940.0
94500,36103.0,6528.0,4407.0,7278.0,7412.0,0.0,9055.0,23803.0,8737.0,11711.0,...,43467.0,37903.0,51278.0,57928.0,51607.0,48595.0,47345.0,53318.0,61359.0,76024.0
94600,27302.0,3559.0,5019.0,3101.0,1971.0,9055.0,0.0,14966.0,4033.0,2874.0,...,34630.0,29066.0,42441.0,49091.0,42770.0,39757.0,38508.0,44481.0,52522.0,67187.0
94650,16686.0,18306.0,19766.0,17849.0,16718.0,23803.0,14966.0,0.0,16514.0,14846.0,...,24088.0,18524.0,31899.0,38549.0,32228.0,29215.0,27966.0,33938.0,41980.0,56645.0
94700,28048.0,4095.0,4542.0,1706.0,2912.0,8737.0,4033.0,16514.0,0.0,2881.0,...,35685.0,30121.0,43497.0,50146.0,43825.0,40813.0,39563.0,45536.0,53577.0,68243.0
94800,27104.0,6214.0,7674.0,5756.0,4626.0,11711.0,2874.0,14846.0,2881.0,0.0,...,34487.0,28923.0,42298.0,48948.0,42627.0,39614.0,38365.0,44338.0,52379.0,67044.0


In [43]:
name = "sarawak"
result_min = result // 60
result.to_csv(f"./output/time-matrix-{name}-sec.csv")
result_min.to_csv(f"./output/time-matrix-{name}-minfloored.csv")
