In [12]:
import googlemaps
import pandas as pd
import numpy as np
import csv
from src.utils import get_api_key, chunks
from collections import namedtuple
from functools import reduce
from pprint import pprint
import json

In [13]:
MpcData = namedtuple("MpcData", "ppv_region, mpc, lat, lon")

with open("./output/mpc_pairs.csv") as fp:
    mpc_pairs = list(csv.DictReader(fp))
# mpc_batch = list(chunks(mpc_pairs, 20))
# print(f"Number of batches: {len(mpc_batch)}")
# print("Batch sizes:")
# for batch in mpc_batch:
#     print(len(batch), end=", ")
mpc_regions = dict()
for item in mpc_pairs:
    # Use sets to remove duplicates
    region_set = mpc_regions.get(item["ppv_region"], set())
    region_set.add(MpcData(
        ppv_region=item["ppv_region"], 
        mpc=item["master_postcode_1"],
        lat=item["latitude_1"],
        lon=item["longitude_1"])
    )
    region_set.add(MpcData(
        ppv_region=item["ppv_region"], 
        mpc=item["master_postcode_2"],
        lat=item["latitude_2"],
        lon=item["longitude_2"])
    )
    mpc_regions[item["ppv_region"]] = region_set


print("Number of postcodes in each mpc_region:")
for k, v in mpc_regions.items():
    mpc_regions[k] = sorted(v) # each region is now a sorted list of MpcDatas
    print(k, len(v))
print("-----")
print(f"Total postcodes: {sum(map(len, mpc_regions.values()))}")

# Batch into sets of 10 for each state.
for k, v in mpc_regions.items():
    mpc_regions[k] = list(chunks(v, 10))
pprint(mpc_regions["Kelantan"])

Number of postcodes in each mpc_region:
Johor 52
Kedah 33
Kelantan 21
Melaka 15
Negeri Sembilan 25
Pahang 37
Perak 65
Perlis 6
Pulau Pinang 22
Sabah 32
Sarawak 45
Selangor/KL/Putrajaya 48
Terengganu 21
-----
Total postcodes: 422
[[MpcData(ppv_region='Kelantan', mpc='15000', lat='6.121666', lon='102.239932'),
  MpcData(ppv_region='Kelantan', mpc='16040', lat='6.121897', lon='102.202192'),
  MpcData(ppv_region='Kelantan', mpc='16200', lat='6.197963', lon='102.169468'),
  MpcData(ppv_region='Kelantan', mpc='16300', lat='6.057671', lon='102.399878'),
  MpcData(ppv_region='Kelantan', mpc='16400', lat='5.967122', lon='102.295417'),
  MpcData(ppv_region='Kelantan', mpc='16450', lat='5.95956', lon='102.250549'),
  MpcData(ppv_region='Kelantan', mpc='16500', lat='5.89941944', lon='102.232883'),
  MpcData(ppv_region='Kelantan', mpc='16600', lat='5.87232927', lon='102.23266766'),
  MpcData(ppv_region='Kelantan', mpc='16700', lat='5.864271', lon='102.489078'),
  MpcData(ppv_region='Kelantan', mpc=

In [14]:
# Sample data; comment out if doing for real.
mpc_regions = [
    [
        MpcData(ppv_region='Kelantan', mpc='15000', lat='6.121666', lon='102.239932'),
        MpcData(ppv_region='Kelantan', mpc='16040', lat='6.121897', lon='102.202192'),
    ],
    [
        MpcData(ppv_region='Kelantan', mpc='16810', lat='5.895012', lon='102.334539'),
        MpcData(ppv_region='Kelantan', mpc='17000', lat='6.042556', lon='102.144889'),
    ],
]

SKIP = True
if not SKIP:
    gmaps = googlemaps.Client(key=get_api_key())
    responses = []
    ts = now()
    with open(f"./responses/distance-matrix-{ts}.json", mode="a") as fp:
        for batch in mpc_regions:
            coords = [f"{lat},{lon}" for mpc_data in batch] # coordinates separated by comma, no spaces
            try:
                response = gmaps.distance_matrix(coords, coords)
                print(response)
            except Exception as exc:
                print(exc)
            responses.append(response)
        json.dump(responses, fp)

In [15]:
# with open(f"./responses/distance-matrix-{ts}.json") as fp:
with open("./responses/distance-matrix-20210604-234351.json") as fp:
    responses = json.load(fp)

# Flatten the response - we only want rows.elements.duration.value (in seconds).
# duration will be a list of lists; the inner list correspond to a batch.
data = []
for batch in responses:
    data2 = []
    rows = batch["rows"]
    for item in rows:
        data3 = []
        elements = item["elements"]
        for ele in elements:
            data3.append(ele["duration"]["value"])
        data2.append(data3)
    data.append(data2)
pprint(data)

[[[0, 605], [535, 0]], [[0, 2558], [2597, 0]]]


In [30]:
# time matrix
# lets reuse the distance matrix
df_mat = pd.read_excel(
    "./output/postcode_output.xlsx", 
    sheet_name="Master Postcode Distance Matrix",
    header=[0, 1],
    index_col=[0, 1])
df_mat = df_mat.applymap(lambda x: 0, na_action="ignore")
display(df_mat)


ValueError: Cannot convert non-finite values (NA or inf) to integer

In [23]:
# [[[0, 605], [535, 0]], [[0, 2558], [2597, 0]]]
for mpc_singles, batch in mpc_regions:


Unnamed: 0_level_0,Unnamed: 1_level_0,Johor,Johor,Johor,Johor,Johor,Johor,Johor,Johor,Johor,Johor,...,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu,Terengganu,WP Labuan
Unnamed: 0_level_1,Unnamed: 1_level_1,79000,80000,81000,81400,81440,81450,81500,81550,81600,81700,...,23100,23200,23300,23400,24000,24050,24100,24200,24300,87000
Johor,79000,0.000000,16.718734,26.47602,20.489126,50.102419,36.733454,14.816449,3.701622,55.620452,32.491557,...,,,,,,,,,,
Johor,80000,16.718734,0.000000,29.15918,20.973199,49.457668,34.170561,28.579943,19.022374,40.170806,15.837006,...,,,,,,,,,,
Johor,81000,26.476020,29.159180,0.00000,8.193270,23.723820,12.638248,19.979265,23.860660,65.792808,40.099813,...,,,,,,,,,,
Johor,81400,20.489126,20.973199,8.19327,0.000000,30.178914,16.244601,18.859817,18.668948,58.141493,32.365212,...,,,,,,,,,,
Johor,81440,50.102419,49.457668,23.72382,30.178914,0.000000,15.398003,42.547997,47.581970,79.307618,55.622567,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Terengganu,24050,,,,,,,,,,,...,48.006446,55.560159,35.687422,38.960337,24.307933,0.000000,30.600460,31.936762,36.140766,
Terengganu,24100,,,,,,,,,,,...,34.056266,58.007021,32.268003,43.515205,12.945771,30.600460,0.000000,10.614830,18.833153,
Terengganu,24200,,,,,,,,,,,...,23.602662,47.985241,22.440356,34.391023,21.943034,31.936762,10.614830,0.000000,8.258988,
Terengganu,24300,,,,,,,,,,,...,15.381903,40.942914,16.313085,28.854912,29.965463,36.140766,18.833153,8.258988,0.000000,
