In [1]:
import csv
import json
from pprint import pprint

import googlemaps
import numpy as np
import pandas as pd

import src.utils

# Google Maps Distance Matrix API

We now have every valid pairing of master postcodes, and their coordinates. As a reminder:

- Master postcodes in one ppv_region cannot reach another ppv_region (due to state travel restrictions).
- Selangor, KL and Putrajaya are considered the same ppv_region.
- Each other state is its own ppv_region.

We will use the Google Maps Distance Matrix API to obtain the time required to travel from one valid master postcode to another.

As the API returns a different result at different times, it is best to perform the all requests at the same time, preferable at night when roads are empty. This way, we can avoid traffic jams messing up our time between postcodes.

In [2]:
# Import the list of all master postcode pairings from previous notebook.
with open("./output/master-postcode-pairs.csv") as fp:
    mpc_pairs = list(csv.DictReader(fp, delimiter="|"))
# Note: The lat and lon are strings instead of floats, but that's ok as we need strings for the API anyways.

pprint(mpc_pairs[:3])

[{'lat_1': '1.42513621',
  'lat_2': '1.456123',
  'lon_1': '103.61443042',
  'lon_2': '103.761701',
  'master_postcode_1': '79000',
  'master_postcode_2': '80000',
  'ppv_region': 'Johor'},
 {'lat_1': '1.42513621',
  'lat_2': '1.662964',
  'lon_1': '103.61443042',
  'lon_2': '103.600178',
  'master_postcode_1': '79000',
  'master_postcode_2': '81000',
  'ppv_region': 'Johor'},
 {'lat_1': '1.42513621',
  'lat_2': '1.4971995',
  'lon_1': '103.61443042',
  'lon_2': '103.711685',
  'master_postcode_1': '79000',
  'master_postcode_2': '81300',
  'ppv_region': 'Johor'}]


In [3]:
# For testing purposes, it is best to hit the API with one ppv_region at a time.
# Let's separate this by ppv_region:
mpc_pairs_by_region = dict()
for mpc_pair in mpc_pairs:
    ppv = mpc_pair["ppv_region"]
    if ppv not in mpc_pairs_by_region.keys():
        mpc_pairs_by_region[ppv] = []
    mpc_pairs_by_region[ppv].append(mpc_pair)

print("Example: Kedah (first 3)")
pprint(mpc_pairs_by_region["Kedah"][:3])
print("-----")
print("Number of valid pairings by state:")
pprint({k: len(v) for k, v in mpc_pairs_by_region.items()})


Example: Kedah (first 3)
[{'lat_1': '6.131583',
  'lat_2': '6.269412',
  'lon_1': '100.373412',
  'lon_2': '100.42052',
  'master_postcode_1': '05000',
  'master_postcode_2': '06000',
  'ppv_region': 'Kedah'},
 {'lat_1': '6.131583',
  'lat_2': '6.443931344999999',
  'lon_1': '100.373412',
  'lon_2': '100.46085003499999',
  'master_postcode_1': '05000',
  'master_postcode_2': '06010',
  'ppv_region': 'Kedah'},
 {'lat_1': '6.131583',
  'lat_2': '6.51576667',
  'lon_1': '100.373412',
  'lon_2': '100.421848',
  'master_postcode_1': '05000',
  'master_postcode_2': '06050',
  'ppv_region': 'Kedah'}]
-----
Number of valid pairings by state:
{'Johor': 1378,
 'Kedah': 528,
 'Kelantan': 210,
 'Melaka': 136,
 'Negeri Sembilan': 351,
 'Pahang': 666,
 'Perak': 2145,
 'Perlis': 15,
 'Pulau Pinang': 231,
 'Sabah': 496,
 'Sarawak': 990,
 'Selangor/KL/Putrajaya': 1225,
 'Terengganu': 231}


In [4]:
# The ppv_regions we can use for API testing.
print(mpc_pairs_by_region.keys())

dict_keys(['Johor', 'Kedah', 'Kelantan', 'Melaka', 'Negeri Sembilan', 'Pahang', 'Perak', 'Perlis', 'Pulau Pinang', 'Sabah', 'Sarawak', 'Selangor/KL/Putrajaya', 'Terengganu'])


In [12]:
state = "Terengganu"
mpc_pairs_api = mpc_pairs_by_region[state] # <-- This is what is fed into the API. Change as you please.

SKIP = True # Change this to False to use the Maps API.
if not SKIP:
    gmaps = googlemaps.Client(key=src.utils.get_api_key())
    responses = []  # All the responses we get from API.
    ts = src.utils.now()
    # Store the responses in a json file, inside ./responses folder.
    with open(f"./responses/distance-matrix-{ts}-{state}.json", mode="a") as fp:
        for item in mpc_pairs_api:
            # coordinates separated by comma, no spaces
            origin = f"{item['lat_1']},{item['lon_1']}"
            destination = f"{item['lat_2']},{item['lon_2']}"
            try:
                response = gmaps.distance_matrix(origin, destination)
                # pprint(response)  # for debugging
            except Exception as exc:
                pprint(exc)
            responses.append(response)
        json.dump(responses, fp)
    print("First response:")
    pprint(responses[0])
    print("-----")
    print(f"Total responses: {len(responses)}")
    ok_resp = sum(1 for r in responses if r["status"] == "OK")
    print(f"OK responses: {ok_resp}")


First response:
{'destination_addresses': ['E8, 21060 Kuala Terengganu, Terengganu, Malaysia'],
 'origin_addresses': ['Terminal Penumpang Kuala Terengganu, Jalan Sultan '
                      'Zainal Abidin, 20200 Kuala Terengganu, Terengganu, '
                      'Malaysia'],
 'rows': [{'elements': [{'distance': {'text': '17.8 km', 'value': 17768},
                         'duration': {'text': '25 mins', 'value': 1505},
                         'status': 'OK'}]}],
 'status': 'OK'}
-----
Total responses: 231
OK responses: 231


In [38]:
# Augment the mpc_pairs_api dictionaries with the duration returned by API.
state = "Terengganu"
NAME = "20210611-001855-Terengganu"
mpc_pairs_api = mpc_pairs_by_region[state] # <-- This is what is fed into the API. Change as you please.

with open(f"./responses/final/distance-matrix-{NAME}.json") as fp: # Use this line for sample
    responses = json.load(fp)

# Add the corresponding duration into the mpc_pairs dictionaries
try:
    for item, resp in zip(mpc_pairs_api, responses):
        element = resp["rows"][0]["elements"][0]
        if element["status"] == "ZERO_RESULTS":
            item["time_sec"] = "inf"
        else:
            item["time_sec"] = element["duration"]["value"]
except KeyError as exc:
    print(resp)
    raise exc

pprint(mpc_pairs_api[:3])


[{'lat_1': '5.337306',
  'lat_2': '5.331277666666666',
  'lon_1': '103.13771',
  'lon_2': '103.02555933333333',
  'master_postcode_1': '20000',
  'master_postcode_2': '21400',
  'ppv_region': 'Terengganu',
  'time_sec': 1505},
 {'lat_1': '5.337306',
  'lat_2': '5.424328',
  'lon_1': '103.13771',
  'lon_2': '102.837014',
  'master_postcode_1': '20000',
  'master_postcode_2': '21450',
  'ppv_region': 'Terengganu',
  'time_sec': 3092},
 {'lat_1': '5.337306',
  'lat_2': '5.38459347',
  'lon_1': '103.13771',
  'lon_2': '102.84120621',
  'master_postcode_1': '20000',
  'master_postcode_2': '21500',
  'ppv_region': 'Terengganu',
  'time_sec': 3179}]


# Export files

In [39]:
assert len(mpc_pairs_api) == len(responses)  # Check all responses are here
with open(f"./output/master-postcode-pairs-{NAME}.csv", mode="w", newline="") as fp:
    writer = csv.DictWriter(fp, fieldnames=mpc_pairs_api[0].keys(), delimiter="|")
    writer.writeheader()
    for pair in mpc_pairs_api:
        writer.writerow(pair)