In [1]:
import numpy as np
import pandas as pd
import src.utils

In [2]:
df = pd.read_pickle("./output/posm-postcodes-full.pkl")

config = {
    "sheet_name": "static",
    "usecols": ["code", "state", "district", "lat", "lon", "ppv"],
    "index_col": 0,
}
ppv_public = pd.read_excel("./data/sensitive/dep_ppv.xlsx", **config)
ppv_private = pd.read_excel("./data/sensitive/dep_ppv_phcorp.xlsx", **config)

display(ppv_public)
display(ppv_private)

Unnamed: 0_level_0,state,district,lat,lon,ppv
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
21-01040034,Johor,Kota Tinggi,1.553324,104.233728,"KK Bandar Penawar, Kota Tinggi"
21-01050110,Johor,Mersing,2.084023,103.946362,"KK Tenggaroh 2, Mersing"
A01-01-01,Johor,Batu Pahat,1.849400,102.932600,"Dewan Jubli Intan, Batu Pahat"
A01-01-02,Johor,Batu Pahat,1.867000,103.113056,"Dewan Orang Ramai Parit Raja, Batu Pahat"
A01-02-01,Johor,Johor Bahru,1.550500,103.743900,"Dewan Muafakat Johor Taman Adda, Johor Bahru"
...,...,...,...,...,...
A14-01-04,W.P. Kuala Lumpur,Kuala Lumpur,3.071500,101.714000,"Kompleks Sukan Desa Tasik, Kuala Lumpur"
A15-01-01,W.P. Labuan,Labuan,5.310400,115.233200,"Dewan Serbaguna Perbadanan Labuan, Labuan"
A15-01-02,W.P. Labuan,Labuan,5.289190,115.264651,"Dewan Kompleks Darul Kifayah MAIWP, Labuan"
A15-01-03,W.P. Labuan,Labuan,5.287617,115.249989,Dewan Serbaguna (Seri Labuan) JKR Labuan


Unnamed: 0_level_0,state,district,lat,lon,ppv
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13-01020040,Johor,Johor Bahru,1.498065,103.872609,Regency Specialist Hospital
13-01020045,Johor,Johor Bahru,1.481109,103.891789,Pasir Gudang Specialist Hospital Sdn Bhd
13-01010011,Johor,Batu Pahat,1.861841,102.951174,Pantai Hospital Batu Pahat
13-01010028,Johor,Batu Pahat,1.857952,102.922379,Putra Specialist Hospital
23-01010073,Johor,Batu Pahat,1.926446,102.849549,Klinik Sri Sulong
...,...,...,...,...,...
23-14010853,W.P. Kuala Lumpur,Kuala Lumpur,3.060065,101.668068,Klinik Pearl City
23-14010818,W.P. Kuala Lumpur,Kuala Lumpur,3.052620,101.672844,Qualitas Health Klinik Famili Medivista
23-14010626,W.P. Kuala Lumpur,Kuala Lumpur,3.129632,101.731670,Klinik Ng Dan Lee
23-14010320,W.P. Kuala Lumpur,Kuala Lumpur,3.153267,101.665602,Klinik Segara


In [3]:
# We only want the master postcodes for this analysis.
df = df.drop(columns="postcode").drop_duplicates().set_index("master_postcode")

In [4]:
mpc_coords = {mpc: (x["lat"], x["lon"]) for mpc, x in df.iterrows()}

def closest_mpc(lat: float, lon: float, coords: dict[str, (float, float)]) -> (str, float, float, float):
    distance = dict()
    for mpc, coord in coords.items():
        mpc_lat, mpc_lon = coord
        distance[mpc] = src.utils.haversine_km(lat, lon, mpc_lat, mpc_lon)
    nearest = min(distance, key=distance.get)
    return (nearest, coords[nearest][0], coords[nearest][1], distance[nearest])

for df_ppv in [ppv_public, ppv_private]:
    df_ppv[["nearest_master_postcode", "master_lat", "master_lon", "ppv_to_master_km"]] = (
        df_ppv.apply(
            lambda x: closest_mpc(x["lat"], x["lon"], mpc_coords),
            axis="columns",
            result_type="expand"
        )
    )

In [5]:
print("Public PPV distance from master postcode:")
display(ppv_public["ppv_to_master_km"].describe())
print("-----")
print("Private PPV distance from master postcode:")
display(ppv_private["ppv_to_master_km"].describe())


Public PPV distance from master postcode:


count    325.000000
mean       5.253267
std        8.253693
min        0.022155
25%        0.552188
50%        2.358539
75%        6.179767
max       64.173812
Name: ppv_to_master_km, dtype: float64

-----
Private PPV distance from master postcode:


count    329.000000
mean       3.425588
std        2.928411
min        0.010556
25%        1.468784
50%        2.886429
75%        4.686323
max       26.317929
Name: ppv_to_master_km, dtype: float64

In [6]:
gb_public = ppv_public.groupby("nearest_master_postcode").count().apply(max, axis="columns").to_dict()
gb_private = ppv_private.groupby("nearest_master_postcode").count().apply(max, axis="columns").to_dict()

df[["num_ppv_public", "num_ppv_private"]] = (
    df.apply(
        lambda x: (gb_public.get(x.name, 0), gb_private.get(x.name, 0)),
        axis="columns",
        result_type="expand",
    )
)
df["total_num_ppv"] = df[["num_ppv_public", "num_ppv_private"]].apply(sum, axis="columns")
display(df)

Unnamed: 0_level_0,state,ppv_region,lat,lon,num_ppv_public,num_ppv_private,total_num_ppv
master_postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
79000,Johor,Johor,1.425136,103.614430,0,3,3
80000,Johor,Johor,1.456123,103.761701,0,7,7
81000,Johor,Johor,1.662964,103.600178,1,3,4
81300,Johor,Johor,1.497200,103.711685,1,9,10
81400,Johor,Johor,1.606506,103.647617,1,0,1
...,...,...,...,...,...,...,...
24300,Terengganu,Terengganu,4.500320,103.440871,2,0,2
50000,WP Kuala Lumpur,Selangor/KL/Putrajaya,3.143717,101.694244,1,32,33
53100,WP Kuala Lumpur,Selangor/KL/Putrajaya,3.260238,101.733399,0,2,2
87000,WP Labuan,WP Labuan,5.276694,115.245045,3,0,3


In [7]:
# Which master postcode has no PPV?
mpc_no_ppv = df[df["total_num_ppv"] == 0]
display(mpc_no_ppv)

Unnamed: 0_level_0,state,ppv_region,lat,lon,num_ppv_public,num_ppv_private,total_num_ppv
master_postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
81450,Johor,Johor,1.750503,103.672815,0,0,0
81600,Johor,Johor,1.363912,104.111340,0,0,0
81850,Johor,Johor,1.817035,103.474730,0,0,0
81920,Johor,Johor,1.676373,104.020516,0,0,0
82100,Johor,Johor,1.596936,103.331408,0,0,0
...,...,...,...,...,...,...,...
23300,Terengganu,Terengganu,4.573532,103.313231,0,0,0
24000,Terengganu,Terengganu,4.249831,103.319935,0,0,0
24050,Terengganu,Terengganu,4.268690,103.211904,0,0,0
24100,Terengganu,Terengganu,4.335356,103.479837,0,0,0


# Export file

In [8]:
df.to_excel("./output/master-postcodes-ppv-count.xlsx")