### Data Loading

In [3]:
import json
import pydeck
import pickle
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import pandas as pd
import os
import shapefile

In [4]:
data_dir = "./data"

In [5]:
# load location index
with open(os.path.join(data_dir, "akl_loc_idx.pkl"), 'rb') as f:
    loc_idx = pickle.load(f) # datazone to point index
    idx_loc = {v:k for k, v in loc_idx.items()} # point index to datazone    
    print(f" -- loaded location index with dimension {len(loc_idx)}")

# load time index
with open(os.path.join(data_dir, "akl_t_idx.pkl"), 'rb') as f:
    t_idx = pickle.load(f) # datetime to time index
    idx_t = {v:k for k, v in t_idx.items()} # time index to datetime
    print(f" -- loaded time index with dimension {len(t_idx)}")
    
# load precomputed odt
with open(os.path.join(data_dir, "akl_odt.npy"), 'rb') as f:    
    odt = np.load(f)
    print(f" -- loaded odt cube with dimensions {odt.shape}")

 -- loaded location index with dimension 2011
 -- loaded time index with dimension 72
 -- loaded odt cube with dimensions (2011, 2011, 72)


In [6]:
# show odt time rane
times = list(t_idx.keys())
print(min(times), "-", max(times))

2020-06-01 07:00:00 - 2020-06-01 18:50:00


In [7]:
# load polygon data
with open(os.path.join(data_dir, "akl_polygons_id.geojson")) as f:
    polys = json.load(f)

In [8]:
# load shapefile of points (data zone population centeroids)
sf_path = os.path.join(data_dir, "akl_points.shp")
sf = shapefile.Reader(sf_path)
records = sf.records()
coords = {}
for i, r in enumerate(records):    
    coords[r[0]] = sf.shape(i).points[0]
sf.close()

In [9]:
# load IMD
imd = pd.read_csv(os.path.join(data_dir, "akl_imd.csv"), index_col="DZ2018")
imd.head()

Unnamed: 0_level_0,Count_MB18,Census18_P,dhb2015__1,dhb2015__2,ged2020n_1,ged2020n_2,ta2020co_1,ta2020na_1,regc2020_1,regc2020_2,...,Crime,decCrime,Housing,decHousing,Health,decHealth,Education,decEduc,Access,decAccess
DZ2018,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
300025,8,846,1,Northland,32,Northland,3,Kaipara District,1,Northland Region,...,516,1,934,2,2886,5,4085,7,5519,9
300030,10,618,1,Northland,32,Northland,3,Kaipara District,1,Northland Region,...,2987,5,2006,4,662,2,4090,7,5695,10
1200001,16,843,4,Counties Manukau,7,Coromandel,12,Hauraki District,3,Waikato Region,...,4890,8,1884,4,3154,6,4980,9,6004,10
1300001,8,918,4,Counties Manukau,39,Port Waikato,13,Waikato District,3,Waikato Region,...,1944,4,2588,5,5650,10,4020,7,5533,9
1300003,11,744,4,Counties Manukau,39,Port Waikato,13,Waikato District,3,Waikato Region,...,1042,2,2453,4,4623,8,2230,4,5182,9


In [11]:
# load vdr

vdr = pd.read_csv(os.path.join(data_dir, "vdr_values.csv"), index_col="lzuid").dropna()
vdr.index = vdr.index.astype(np.int32)
#vdr = pd.read_csv("../data/vdr_values.csv").dropna()

# replace 'S' suppressed values with 0
# vdr["count_vdr"] = vdr["count_vdr"].replace('S', 0)
# vdr["pop"] = vdr["pop"].replace('S', 0)

#print(vdr[vdr.count_vdr == 'S'].shape[0]/vdr.shape[0])

# drop rows with suppressed values
#vdr = vdr.drop(vdr[vdr.count_vdr == 'S'].index)

# filter for valid counts
vdr = vdr[vdr.count_vdr != 'S']

# set types
#vdr = vdr.astype({"lzuid":np.int32, "mpoMaoriPacific":str, "ageband":str, "pop":np.int32, "count_vdr":np.int32})
vdr = vdr.astype({"mpoMaoriPacific":str, "ageband":str, "pop":np.int32, "count_vdr":np.int32})
vdr.head()

Unnamed: 0_level_0,mpoMaoriPacific,ageband,pop,count_vdr
lzuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100001,MaoriPacific,20-44,186,6
100001,MaoriPacific,45-64,222,30
100001,MaoriPacific,65+,93,36
100001,nMnP,65+,120,18
100002,MaoriPacific,45-64,102,12


In [12]:
# clinics
# these are currently manuallys set to the nearest data zone location (population centeroid)
clinics = pd.read_csv(os.path.join(data_dir, "akl_clinics.csv"), index_col="DZ2018")
clinics.head()

Unnamed: 0_level_0,name,address,lat,lon
DZ2018,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7600424,Age Concern,57 Rosebank Road,-36.89445,174.69747
7601160,Oranga Community Center,52-54 Waitangi Road,-36.91129,174.79955
7601911,Waiheke Centre,61 Ostend Road,-36.79633,175.04809
7601911,Waiheke Island Trust,5 Belgium Street,-36.79583,175.04453
7601441,Te Oro Music & Arts Centre,98 Line Road,-36.87884,174.85684


### Some basic plotting with pydeck

Can use this to select a location ID

In [13]:
# deckgl show polygons and location ids
view_state = pydeck.ViewState(
    longitude=174.7633,
    latitude=-36.8485,
    zoom=11,    
    max_zoom=16,
    pitch=0,
    bearing=0
)

# default view
geojson = pydeck.Layer(
    "GeoJsonLayer",
    polys, # needs to be wgs84
    opacity=0.2,
    stroked=True,
    line_width_min_pixels=1,
    filled=True,           
    pickable=True,
    auto_highlight=True,
    get_fill_color=[128, 128, 128],
    get_line_color=[255, 255, 255],    
)

r = pydeck.Deck(
    layers=[geojson], 
    initial_view_state=view_state, 
    map_style='mapbox://styles/mapbox/light-v9',   
    tooltip = {
        "text": "Location: {id}"
    }
)
#r.to_html("geojson_layer.html", iframe_width="100%")
r.show()



DeckGLWidget(json_input='{"initialViewState": {"bearing": 0, "latitude": -36.8485, "longitude": 174.7633, "max…

### One source to all destinations with one-way journey threshold

In [None]:
loc = 7600522 # seaview terrace, mt albert
lon, lat = coords[loc]
origin = loc_idx[loc] # get odt index from location id
dt = odt[origin, :, :] # get destination-time matrix for this origin
print(dt.shape)

In [None]:
# view dt matrix
fig, ax = plt.subplots(figsize=(15, 15))
ax.imshow(np.transpose(dt))
ax.set_xlabel("location index")
ax.set_ylabel("time index")

In [None]:
# compute mean, std travel time
mean_tt = np.nanmean(dt, axis=1).reshape(-1, 1)
std_tt = np.nanstd(dt, axis=1).reshape(-1, 1)

In [None]:
# create dataframe
ids = np.array(list(idx_loc.values())).reshape(-1, 1)
d = np.concatenate((ids, mean_tt, std_tt), axis=1)
df = pd.DataFrame(d, columns=["id", "mean_tt", "std_tt"])
df = df.astype({'id': 'int32'})
df = df.dropna()

# join with imd
df = df.join(imd, on="id")

In [None]:
# threshold by mean_tt
threshold = 60 # minutes
df = df[df["mean_tt"] < threshold]

In [None]:
# plot - note geojson data needs to be wgs84
fig = px.choropleth_mapbox(
        df,
        geojson=polys, 
        featureidkey="id",
        locations="id",        
        center = {"lat": lat, "lon": lon},
        mapbox_style="carto-positron",
        color="mean_tt",
        color_continuous_scale="Viridis",
        zoom=12)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
# plot - note geojson data needs to be wgs84
fig = px.choropleth_mapbox(
        df,
        geojson=polys, 
        featureidkey="id",
        locations="id",        
        center = {"lat": lat, "lon": lon},
        mapbox_style="carto-positron",
        color="std_tt",
        color_continuous_scale="Viridis",
        opacity=1,
        zoom=12)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

### All locations to one destination with one-way journey threshold

In [None]:
print(clinics["name"])

In [None]:
# get clinic location by name
clinic_name = "Rehab Plus"
clinic_loc = clinics.index[clinics["name"] == clinic_name].tolist()[0]
print(clinic_name, clinic_loc)

In [None]:
loc = clinic_loc # or just set any location id
lon, lat = coords[loc]
destination = loc_idx[loc] # get odt index from location id
ot = odt[:, destination, :] # get origin-time matrix for this destination
print(ot.shape)

In [None]:
# view ot matrix
fig, ax = plt.subplots(figsize=(15, 15))
ax.imshow(np.transpose(ot))
ax.set_xlabel("location index")
ax.set_ylabel("time index")

In [None]:
# compute mean, std travel time
mean_tt = np.nanmean(ot, axis=1).reshape(-1, 1)
std_tt = np.nanstd(ot, axis=1).reshape(-1, 1)

In [None]:
# create dataframe
ids = np.array(list(idx_loc.values())).reshape(-1, 1)
d = np.concatenate((ids, mean_tt, std_tt), axis=1)
df = pd.DataFrame(d, columns=["id", "mean_tt", "std_tt"])
df = df.astype({'id': 'int32'})
df = df.dropna()

# join with imd
df = df.join(imd, on="id")

In [None]:
# threshold by mean_tt
threshold = 60 # minutes
df = df[df["mean_tt"] < threshold]

In [None]:
# plot - note geojson data needs to be wgs84
fig = px.choropleth_mapbox(
        df,
        geojson=polys, 
        featureidkey="id",
        locations="id",        
        center = {"lat": lat, "lon": lon},
        mapbox_style="carto-positron",
        color="mean_tt",
        color_continuous_scale="Viridis",
        zoom=12)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
# plot - note geojson data needs to be wgs84
fig = px.choropleth_mapbox(
        df,
        geojson=polys, 
        featureidkey="id",
        locations="id",        
        center = {"lat": lat, "lon": lon},
        mapbox_style="carto-positron",
        color="std_tt",
        color_continuous_scale="Viridis",
        zoom=12)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

### Point to point travel times to investigate variablitily throughout the day

In [None]:
from_loc = 7600316 # high stdev
to_loc = clinic_loc 

origin_idx = loc_idx[from_loc] # get odt index from location id
dest_idx = loc_idx[to_loc]

dt = odt[origin_idx, dest_idx, :] # get destination travel time series for this origin
dt = pd.Series(dt, index=pd.DatetimeIndex(list(t_idx.keys())))
print(dt.shape)

In [None]:
dt.plot(figsize=(15, 5), xlabel="Departure Time", ylabel="ETA (minutes)")

In [None]:
from_loc = 7600870 # low stdev
to_loc = clinic_loc 

origin_idx = loc_idx[from_loc] # get odt index from location id
dest_idx = loc_idx[to_loc]

dt = odt[origin_idx, dest_idx, :] # get destination travel time series for this origin
dt = pd.Series(dt, index=pd.DatetimeIndex(list(t_idx.keys())))
dt.plot(figsize=(15, 5), xlabel="Departure Time", ylabel="ETA (minutes)")

### Find the travel time from every location to every clinic

In [None]:
# get clinic odt indexs
clinic_locs = clinics.index.tolist()
clinic_idxs = [loc_idx[l] for l in clinic_locs]

# get clinic odt
odt_clinic = odt[:, clinic_idxs, :]
print(odt_clinic.shape)

In [None]:
# compute mean, std travel time
mean_tt = np.nanmean(odt_clinic, axis=-1)
std_tt = np.nanstd(odt_clinic, axis=-1)
print(mean_tt.shape)

In [None]:
# find minimum mean time and its stdev
min_tt = np.zeros((mean_tt.shape[0], 1))
min_tt_std = np.zeros((mean_tt.shape[0], 1))

for i in range(mean_tt.shape[0]):
    min_t = mean_tt[i, 0]
    min_j = 0
    
    for j in range(1, mean_tt.shape[1]):
        if np.isnan(min_t) and not np.isnan(mean_tt[i, j]):
            min_t = mean_tt[i, j]
            min_j = j
        elif np.isnan(mean_tt[i, j]):
            pass
        elif mean_tt[i, j] < min_t:
            min_t = mean_tt[i, j]
            min_j = j
            
    min_tt[i] = min_t
    min_tt_std[i] = std_tt[i, min_j]
    #print(i, min_t, min_j, std_tt[i, min_j])        

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(min_tt, min_tt_std)
plt.xlabel("Mean travel time to nearest clinic")
plt.ylabel("stdev travel time to nearest clinic")

In [None]:
# use log travel travel times to plot on map more easily
log_min_tt = np.log10(min_tt)
log_min_tt_std = np.log10(min_tt_std)

# replace destinations with 0 travel time
log_min_tt[np.isneginf(log_min_tt)] = 0
log_min_tt_std[np.isneginf(log_min_tt_std)] = 0

In [None]:
# create dataframe
ids = np.array(list(idx_loc.values())).reshape(-1, 1)
d = np.concatenate((ids, min_tt, min_tt_std, log_min_tt, log_min_tt_std), axis=1)
df = pd.DataFrame(d, columns=["id", "min_tt", "min_tt_std", "log_min_tt", "log_min_tt_std"])
df = df.astype({'id': 'int32'})
df = df.dropna()

# join with imd
df = df.join(imd, on="id")

In [None]:
# plot - note geojson data needs to be wgs84
fig = px.choropleth_mapbox(
        df,
        geojson=polys, 
        featureidkey="id",
        locations="id",        
        center = {"lat": lat, "lon": lon},
        mapbox_style="carto-positron",
        color="log_min_tt",
        color_continuous_scale="Viridis",
        zoom=12)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
# plot - note geojson data needs to be wgs84
fig = px.choropleth_mapbox(
        df,
        geojson=polys, 
        featureidkey="id",
        locations="id",        
        center = {"lat": lat, "lon": lon},
        mapbox_style="carto-positron",
        color="log_min_tt_std",
        color_continuous_scale="Viridis",
        zoom=12)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
loc = 7601177
idx = loc_idx[loc]
min_tt_std[idx]

### Compared with deprivation

In [None]:
df.columns

In [None]:
# does accessibility to nearest clinic predict IMD Health Index?

x = "log_min_tt"
xlabel = "Travel time to nearest clinic"
y = "Health"
ylabel = f"{y} Index"

plt.figure(figsize=(10, 10))
plt.scatter(df[x], df[y])
plt.xlabel(xlabel)
plt.ylabel(ylabel)

In [None]:
# loop through them all
Y = ['Census18_P', 'IMD18', 'Employment', 'Income', 'Crime', 'Housing', 'Health', 'Education', 'Access']
x = "log_min_tt"

for y in Y:        
    plt.figure(figsize=(10, 10))
    plt.scatter(df[x], df[y])
    plt.xlabel(x)
    plt.ylabel(y)        

In [None]:
# rename id column and save
df.rename(columns={"id":"DZ2018"}).to_csv("imd-with-travel-time.csv", index=False)

In [None]:
# join with vdr
df_vdr = df.join(vdr, on="id")
df_vdr.head()

In [None]:
# select ethnicity and age combo
df_vdr[(df_vdr.mpoMaoriPacific == "MaoriPacific") & (df_vdr.ageband == "20-44")].head()

In [None]:
x = "log_min_tt"

plt.figure(figsize=(10, 10))
for eth in ["MaoriPacific"]:
    for ageband in ["20-44", "45-64", "65+"]:
        sample = df_vdr[(df_vdr.mpoMaoriPacific == eth) & (df_vdr.ageband == ageband)].dropna()        

        plt.scatter(sample[x], 100 * sample["count_vdr"]/sample["pop"], label=f"{eth}, {ageband}", alpha=0.5)
        plt.xlabel("travel time (log10 minutes) to nearest clinic")
        plt.ylabel("vdr as % of pop")  
        plt.ylim(0, 100)
        
plt.title(f"{eth} VDR vs travel time")
plt.legend()

In [None]:
x = "log_min_tt"

plt.figure(figsize=(10, 10))
for eth in ["nMnP"]:
    for ageband in ["20-44", "45-64", "65+"]:
        sample = df_vdr[(df_vdr.mpoMaoriPacific == eth) & (df_vdr.ageband == ageband)].dropna()        

        plt.scatter(sample[x], 100 * sample["count_vdr"]/sample["pop"], label=f"{eth}, {ageband}", alpha=0.5)
        plt.xlabel("travel time (log10 minutes) to nearest clinic")
        plt.ylabel("vdr as % of pop")  
        plt.ylim(0, 100)
        
plt.title(f"{eth} VDR vs travel time")
plt.legend()

In [None]:
# maps

eth = "MaoriPacific"
#eth = "nMnP"
ageband = "45-64"
print(f"VDR as % of pop. for {eth}, aged {ageband}")
      
sample = df_vdr[(df_vdr.mpoMaoriPacific == eth) & (df_vdr.ageband == ageband)].dropna()
sample["vdr_perc"] = 100 * sample["count_vdr"] / sample["pop"]

# plot - note geojson data needs to be wgs84
fig = px.choropleth_mapbox(
        sample,
        geojson=polys, 
        featureidkey="id",
        locations="id",        
        center = {"lat": lat, "lon": lon},
        mapbox_style="carto-positron",
        color="vdr_perc",
        color_continuous_scale="Viridis",
        zoom=12)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
np.unique(df.id.values).shape[0]

In [None]:
np.unique(df_vdr.dropna().id.values).shape[0]