In [1]:
import googlemaps
import pandas as pd
import numpy as np
import csv
from src.utils import get_api_key, now
from pprint import pprint
import json
import pickle

In [2]:
with open("./output/mpc_pairs.csv") as fp:
    mpc_pairs = list(csv.DictReader(fp))

pprint(mpc_pairs[:5])

[{'latitude_1': '1.42513621',
  'latitude_2': '1.456123',
  'longitude_1': '103.61443042',
  'longitude_2': '103.761701',
  'master_postcode_1': '79000',
  'master_postcode_2': '80000',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.662964',
  'longitude_1': '103.61443042',
  'longitude_2': '103.600178',
  'master_postcode_1': '79000',
  'master_postcode_2': '81000',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.606506',
  'longitude_1': '103.61443042',
  'longitude_2': '103.647617',
  'master_postcode_1': '79000',
  'master_postcode_2': '81400',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.876001',
  'longitude_1': '103.61443042',
  'longitude_2': '103.614046',
  'master_postcode_1': '79000',
  'master_postcode_2': '81440',
  'ppv_region': 'Johor'},
 {'latitude_1': '1.42513621',
  'latitude_2': '1.75050278',
  'longitude_1': '103.61443042',
  'longitude_2': '103.672815',
  'master_postcode_1': '7900

In [3]:
# Sample data; comment out if doing for real.
mpc_pairs = [
    {'latitude_1': '4.76804722',
    'latitude_2': '4.426911',
    'longitude_1': '103.18692',
    'longitude_2': '103.452517',
    'master_postcode_1': '23200',
    'master_postcode_2': '24200',
    'ppv_region': 'Terengganu'},
    {'latitude_1': '4.76804722',
    'latitude_2': '4.50032',
    'longitude_1': '103.18692',
    'longitude_2': '103.440871',
    'master_postcode_1': '23200',
    'master_postcode_2': '24300',
    'ppv_region': 'Terengganu'},
    {'latitude_1': '4.573532',
    'latitude_2': '4.619279',
    'longitude_1': '103.313231',
    'longitude_2': '103.209329',
    'master_postcode_1': '23300',
    'master_postcode_2': '23400',
    'ppv_region': 'Terengganu'},
    {'latitude_1': '4.573532',
    'latitude_2': '4.230972',
    'longitude_1': '103.313231',
    'longitude_2': '103.427966',
    'master_postcode_1': '23300',
    'master_postcode_2': '24000',
    'ppv_region': 'Terengganu'},
    {'latitude_1': '4.573532',
    'latitude_2': '4.26868955',
    'longitude_1': '103.313231',
    'longitude_2': '103.2119044',
    'master_postcode_1': '23300',
    'master_postcode_2': '24050',
    'ppv_region': 'Terengganu'},
]

SKIP = True # Change this to False (and comment out the sample above) to use the Maps API
if not SKIP:
    gmaps = googlemaps.Client(key=get_api_key())
    responses = []
    ts = now()
    with open(f"./responses/distance-matrix-{ts}.json", mode="a") as fp:
        for item in mpc_pairs:
            # coordinates separated by comma, no spaces
            origin = f"{item['latitude_1']},{item['longitude_1']}"
            destination = f"{item['latitude_2']},{item['longitude_2']}"
            try:
                response = gmaps.distance_matrix(origin, destination)
                # print(response)
            except Exception as exc:
                print(exc)
            responses.append(response)
        json.dump(responses, fp)

In [4]:
# with open(f"./responses/distance-matrix-{ts}.json") as fp: # Use this line for MAPS API
with open("./responses/distance-matrix-20210605-214333.json") as fp: # Use this line for sample
    responses = json.load(fp)

assert len(mpc_pairs) == len(responses)

# Add the corresponding duration into the mpc_pairs dictionaries
for item, resp in zip(mpc_pairs, responses):
    item["duration"] = resp["rows"][0]["elements"][0]["duration"]["value"]

In [5]:
# time matrix
# lets reuse the distance matrix

# Using pickle for now, because importing a csv/Excel causes certain postcodes to become integers instead of strings.
# The pickle file is generated at the end of Notebook #1.
with open("./output/distance_matrix.pickle", mode="rb") as fp:
    df_mat = pickle.load(fp)
df_mat = df_mat.applymap(lambda x: 0, na_action="ignore") # Change all numbers to 0.


In [6]:
# [[[0, 605], [535, 0]], [[0, 2558], [2597, 0]]]
for item in mpc_pairs:
    mpc1 = (item["ppv_region"], item["master_postcode_1"])
    mpc2 = (item["ppv_region"], item["master_postcode_2"])
    duration = item["duration"]
    df_mat.loc[mpc1, mpc2] = duration
    df_mat.loc[mpc2, mpc1] = duration # also add across the diagonal

# TODO: Set zeros


In [7]:
df_mat[df_mat>0].dropna(how="all", axis=0).dropna(how="all", axis=1)  # View the data

Unnamed: 0_level_0,Unnamed: 1_level_0,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu
Unnamed: 0_level_1,Unnamed: 1_level_1,23200,23300,23400,24000,24050,24200,24300
Terengganu,23200,,,,,,2789.0,2431.0
Terengganu,23300,,,952.0,3057.0,2428.0,,
Terengganu,23400,,952.0,,,,,
Terengganu,24000,,3057.0,,,,,
Terengganu,24050,,2428.0,,,,,
Terengganu,24200,2789.0,,,,,,
Terengganu,24300,2431.0,,,,,,
