In [1]:
import numpy as np
import pandas as pd
import src.utils

In [2]:
df = pd.read_pickle("./output/posm-postcodes-full.pkl")

config = {
    "sheet_name": "static",
    "usecols": ["code", "state", "district", "lat", "lon", "ppv"],
    "index_col": 0,
    "converters": {"state": lambda s: s.replace(".", "")},  # W.P. Kuala Lumpur -> WP Kuala Lumpur
}
ppv_public = pd.read_excel("./data/sensitive/dep_ppv.xlsx", **config)
ppv_private = pd.read_excel("./data/sensitive/dep_ppv_phcorp.xlsx", **config)



# Data Cleaning

In [3]:
for ppv in [ppv_public, ppv_private]:
    ppv["ppv_region"] = ppv["state"].apply(
        lambda x: "Selangor/KL/Putrajaya"
        if x in ["Selangor", "WP Kuala Lumpur", "WP Putrajaya"]
        else x
    )

display(ppv_public)
display(ppv_private)

Unnamed: 0_level_0,state,district,lat,lon,ppv,ppv_region
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
21-01040034,Johor,Kota Tinggi,1.553324,104.233728,"KK Bandar Penawar, Kota Tinggi",Johor
21-01050110,Johor,Mersing,2.084023,103.946362,"KK Tenggaroh 2, Mersing",Johor
A01-01-01,Johor,Batu Pahat,1.849400,102.932600,"Dewan Jubli Intan, Batu Pahat",Johor
A01-01-02,Johor,Batu Pahat,1.867000,103.113056,"Dewan Orang Ramai Parit Raja, Batu Pahat",Johor
A01-02-01,Johor,Johor Bahru,1.550500,103.743900,"Dewan Muafakat Johor Taman Adda, Johor Bahru",Johor
...,...,...,...,...,...,...
A14-01-04,WP Kuala Lumpur,Kuala Lumpur,3.071500,101.714000,"Kompleks Sukan Desa Tasik, Kuala Lumpur",Selangor/KL/Putrajaya
A15-01-01,WP Labuan,Labuan,5.310400,115.233200,"Dewan Serbaguna Perbadanan Labuan, Labuan",WP Labuan
A15-01-02,WP Labuan,Labuan,5.289190,115.264651,"Dewan Kompleks Darul Kifayah MAIWP, Labuan",WP Labuan
A15-01-03,WP Labuan,Labuan,5.287617,115.249989,Dewan Serbaguna (Seri Labuan) JKR Labuan,WP Labuan


Unnamed: 0_level_0,state,district,lat,lon,ppv,ppv_region
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
13-01020040,Johor,Johor Bahru,1.498065,103.872609,Regency Specialist Hospital,Johor
13-01020045,Johor,Johor Bahru,1.481109,103.891789,Pasir Gudang Specialist Hospital Sdn Bhd,Johor
13-01010011,Johor,Batu Pahat,1.861841,102.951174,Pantai Hospital Batu Pahat,Johor
13-01010028,Johor,Batu Pahat,1.857952,102.922379,Putra Specialist Hospital,Johor
23-01010073,Johor,Batu Pahat,1.926446,102.849549,Klinik Sri Sulong,Johor
...,...,...,...,...,...,...
23-14010853,WP Kuala Lumpur,Kuala Lumpur,3.060065,101.668068,Klinik Pearl City,Selangor/KL/Putrajaya
23-14010818,WP Kuala Lumpur,Kuala Lumpur,3.052620,101.672844,Qualitas Health Klinik Famili Medivista,Selangor/KL/Putrajaya
23-14010626,WP Kuala Lumpur,Kuala Lumpur,3.129632,101.731670,Klinik Ng Dan Lee,Selangor/KL/Putrajaya
23-14010320,WP Kuala Lumpur,Kuala Lumpur,3.153267,101.665602,Klinik Segara,Selangor/KL/Putrajaya


In [4]:
# We only want the master postcodes for this analysis.
df = df.drop(columns="postcode").drop_duplicates().set_index("master_postcode")

In [5]:
def closest_mpc(x: pd.Series):
    temp = df.loc[:, ["lat", "lon"]]
    temp["dist"] = temp.apply(
        lambda y: src.utils.haversine_km(x["lat"], x["lon"], y["lat"], y["lon"]),
        axis="columns"
    )
    nearest = temp.sort_values("dist", ascending=True).iloc[0]
    return (nearest.name, nearest["lat"], nearest["lon"], nearest["dist"])

for df_ppv in [ppv_public, ppv_private]:
    df_ppv[["nearest_master", "master_lat", "master_lon", "dist"]] = \
        df_ppv.apply(closest_mpc, axis="columns", result_type="expand")
    display(df_ppv)   

Unnamed: 0_level_0,state,district,lat,lon,ppv,ppv_region,nearest_master,master_lat,master_lon,dist
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
21-01040034,Johor,Kota Tinggi,1.553324,104.233728,"KK Bandar Penawar, Kota Tinggi",Johor,81930,1.555441,104.230902,0.392308
21-01050110,Johor,Mersing,2.084023,103.946362,"KK Tenggaroh 2, Mersing",Johor,81900,1.734735,103.899799,39.157680
A01-01-01,Johor,Batu Pahat,1.849400,102.932600,"Dewan Jubli Intan, Batu Pahat",Johor,83000,1.868410,102.943824,2.452886
A01-01-02,Johor,Batu Pahat,1.867000,103.113056,"Dewan Orang Ramai Parit Raja, Batu Pahat",Johor,86400,1.865015,103.110069,0.398383
A01-02-01,Johor,Johor Bahru,1.550500,103.743900,"Dewan Muafakat Johor Taman Adda, Johor Bahru",Johor,81300,1.497200,103.711685,6.920174
...,...,...,...,...,...,...,...,...,...,...
A14-01-04,WP Kuala Lumpur,Kuala Lumpur,3.071500,101.714000,"Kompleks Sukan Desa Tasik, Kuala Lumpur",Selangor/KL/Putrajaya,43300,3.022100,101.705470,5.570589
A15-01-01,WP Labuan,Labuan,5.310400,115.233200,"Dewan Serbaguna Perbadanan Labuan, Labuan",WP Labuan,87000,5.276694,115.245045,3.968295
A15-01-02,WP Labuan,Labuan,5.289190,115.264651,"Dewan Kompleks Darul Kifayah MAIWP, Labuan",WP Labuan,87000,5.276694,115.245045,2.575865
A15-01-03,WP Labuan,Labuan,5.287617,115.249989,Dewan Serbaguna (Seri Labuan) JKR Labuan,WP Labuan,87000,5.276694,115.245045,1.331407


Unnamed: 0_level_0,state,district,lat,lon,ppv,ppv_region,nearest_master,master_lat,master_lon,dist
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
13-01020040,Johor,Johor Bahru,1.498065,103.872609,Regency Specialist Hospital,Johor,81750,1.487056,103.882415,1.638102
13-01020045,Johor,Johor Bahru,1.481109,103.891789,Pasir Gudang Specialist Hospital Sdn Bhd,Johor,81750,1.487056,103.882415,1.233365
13-01010011,Johor,Batu Pahat,1.861841,102.951174,Pantai Hospital Batu Pahat,Johor,83000,1.868410,102.943824,1.095084
13-01010028,Johor,Batu Pahat,1.857952,102.922379,Putra Specialist Hospital,Johor,83000,1.868410,102.943824,2.650215
23-01010073,Johor,Batu Pahat,1.926446,102.849549,Klinik Sri Sulong,Johor,83500,1.975481,102.884558,6.693977
...,...,...,...,...,...,...,...,...,...,...
23-14010853,WP Kuala Lumpur,Kuala Lumpur,3.060065,101.668068,Klinik Pearl City,Selangor/KL/Putrajaya,46000,3.086439,101.649091,3.608880
23-14010818,WP Kuala Lumpur,Kuala Lumpur,3.052620,101.672844,Qualitas Health Klinik Famili Medivista,Selangor/KL/Putrajaya,46000,3.086439,101.649091,4.590304
23-14010626,WP Kuala Lumpur,Kuala Lumpur,3.129632,101.731670,Klinik Ng Dan Lee,Selangor/KL/Putrajaya,68000,3.146870,101.762690,3.939076
23-14010320,WP Kuala Lumpur,Kuala Lumpur,3.153267,101.665602,Klinik Segara,Selangor/KL/Putrajaya,50000,3.143717,101.694244,3.350550


In [6]:
print("Public PPV distance from master postcode:")
display(ppv_public["dist"].describe())
print("-----")
print("Private PPV distance from master postcode:")
display(ppv_private["dist"].describe())


Public PPV distance from master postcode:


count    324.000000
mean       5.269004
std        8.261575
min        0.022155
25%        0.558797
50%        2.363921
75%        6.226179
max       64.173812
Name: dist, dtype: float64

-----
Private PPV distance from master postcode:


count    329.000000
mean       3.425588
std        2.928411
min        0.010556
25%        1.468784
50%        2.886429
75%        4.686323
max       26.317929
Name: dist, dtype: float64

In [7]:
gb_public = ppv_public.groupby("nearest_master").count().apply(max, axis="columns").to_dict()
gb_private = ppv_private.groupby("nearest_master").count().apply(max, axis="columns").to_dict()

df[["num_ppv_public", "num_ppv_private"]] = (
    df.apply(
        lambda x: (gb_public.get(x.name, 0), gb_private.get(x.name, 0)),
        axis="columns",
        result_type="expand",
    )
)
df["total_num_ppv"] = df[["num_ppv_public", "num_ppv_private"]].apply(sum, axis="columns")
display(df)

Unnamed: 0_level_0,state,ppv_region,lat,lon,num_ppv_public,num_ppv_private,total_num_ppv
master_postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
79000,Johor,Johor,1.425136,103.614430,0,3,3
80000,Johor,Johor,1.456123,103.761701,0,7,7
81000,Johor,Johor,1.662964,103.600178,1,3,4
81300,Johor,Johor,1.497200,103.711685,1,9,10
81400,Johor,Johor,1.606506,103.647617,1,0,1
...,...,...,...,...,...,...,...
24300,Terengganu,Terengganu,4.500320,103.440871,2,0,2
50000,WP Kuala Lumpur,Selangor/KL/Putrajaya,3.143717,101.694244,1,32,33
53100,WP Kuala Lumpur,Selangor/KL/Putrajaya,3.260238,101.733399,0,2,2
87000,WP Labuan,WP Labuan,5.276694,115.245045,3,0,3


In [8]:
# Which master postcode has no PPV?
mpc_no_ppv = df[df["total_num_ppv"] == 0]
display(mpc_no_ppv)

Unnamed: 0_level_0,state,ppv_region,lat,lon,num_ppv_public,num_ppv_private,total_num_ppv
master_postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
81450,Johor,Johor,1.750503,103.672815,0,0,0
81600,Johor,Johor,1.363912,104.111340,0,0,0
81850,Johor,Johor,1.817035,103.474730,0,0,0
81920,Johor,Johor,1.676373,104.020516,0,0,0
82100,Johor,Johor,1.596936,103.331408,0,0,0
...,...,...,...,...,...,...,...
23300,Terengganu,Terengganu,4.573532,103.313231,0,0,0
24000,Terengganu,Terengganu,4.249831,103.319935,0,0,0
24050,Terengganu,Terengganu,4.268690,103.211904,0,0,0
24100,Terengganu,Terengganu,4.335356,103.479837,0,0,0


In [9]:
ppv = pd.concat([ppv_public, ppv_private])

def closest_ppv_in_region(x):
    """Given a Series from the master postcode df, what is the closest ppv in the same region?"""
    mpc_region = x["ppv_region"]
    temp = ppv[ppv["ppv_region"] == mpc_region] # All ppvs in this region
    temp["dist"] = temp.apply(
        lambda y: src.utils.haversine_km(x["lat"], x["lon"], y["lat"], y["lon"]),
        axis="columns"
    )
    nearest = temp.sort_values("dist", ascending=True).iloc[0]
    return (nearest.name, nearest["lat"], nearest["lon"], nearest["dist"])

df[["closest_ppv", "ppv_lat", "ppv_lon", "ppv_dist"]] = \
    df.apply(closest_ppv_in_region, axis="columns", result_type="expand")

# display(df)
print("-----")
print("MPCs with no linked PPV only - now they have the closest PPV in the region")
mpc_no_ppv = df[df.index.isin(mpc_no_ppv.index)]
display(mpc_no_ppv)


-----
MPCs with no linked PPV only - now they have the closest PPV in the region


Unnamed: 0_level_0,state,ppv_region,lat,lon,num_ppv_public,num_ppv_private,total_num_ppv,closest_ppv,ppv_lat,ppv_lon,ppv_dist
master_postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
81450,Johor,Johor,1.750503,103.672815,0,0,0,23-01210784,1.660110,103.638652,10.737775
81600,Johor,Johor,1.363912,104.111340,0,0,0,21-01040034,1.553324,104.233728,25.057735
81850,Johor,Johor,1.817035,103.474730,0,0,0,A01-04-02,1.875114,103.615248,16.888872
81920,Johor,Johor,1.676373,104.020516,0,0,0,A01-04-01,1.736180,103.901240,14.822202
82100,Johor,Johor,1.596936,103.331408,0,0,0,A01-07-01,1.486599,103.388073,13.782589
...,...,...,...,...,...,...,...,...,...,...,...
23300,Terengganu,Terengganu,4.573532,103.313231,0,0,0,21-11020019,4.625791,103.207591,13.063234
24000,Terengganu,Terengganu,4.249831,103.319935,0,0,0,A11-03-01,4.229944,103.426048,11.965360
24050,Terengganu,Terengganu,4.268690,103.211904,0,0,0,A11-03-01,4.229944,103.426048,24.118724
24100,Terengganu,Terengganu,4.335356,103.479837,0,0,0,A11-03-01,4.229944,103.426048,13.143232


# Export file

In [12]:
df.to_excel("./output/master-postcodes-and-ppvs.xlsx")