# Calculate distance from NHP

This notebook helps calculate the distance from a patient's residental area to National Hospital of Pediatrics, Hanoi, Vietnam, using `Azure Maps Route` and `Geopy` libraries.

In [1]:
# Import necessary libraries
from azure.maps.route import MapsRouteClient
from azure.core.credentials import AzureKeyCredential
from azure.maps.route.models import LatLon
from geopy.geocoders import AzureMaps
from pprint import pprint
import pandas as pd

In [None]:
# Create map and route clients
credential = AzureKeyCredential('AZURE_MAPS_KEY')
geolocation_client = AzureMaps(credential.key)
route_client = MapsRouteClient(credential=credential)

In [None]:
# Set default location for NHP to prevent miscalculation
NHP_coordinates = LatLon(21.026047, 105.810077)
print('NHP coordinates:', geolocation_client.reverse("%f, %f" % (NHP_coordinates.lat, NHP_coordinates.lon)))

In [None]:
# Get data from a csv file using pandas
csv_df = pd.read_csv('../test.csv')
addrs = list(map(geolocation_client.geocode, csv_df['complete_patient_address']))

In [None]:
# Get distance from NHP to each address using Azure Maps Route API
def get_distance(addr):
    route = route_client.get_route_directions(route_points=[
        LatLon(addr.latitude, addr.longitude),
        NHP_coordinates
    ])
    return route.as_dict()['routes'][0]['summary']

In [None]:
distances = []

for addr in addrs:
    dist = get_distance(addr)
    distances.append(dist)

    # print('Distance between %s (%f,%f) and NHP(%f,%f): %skm in %s'
    #       % (addr, addr.latitude, addr.longitude, NHP_coordinates.lat, NHP_coordinates.lon,
    #          dist['length_in_meters'] / 1000, str(timedelta(seconds=dist['travel_time_in_seconds']))))
    
print("Finish calculating distances")

In [None]:
# Remove old column
if 'patient_home_coordinate' in csv_df.keys():
    csv_df.drop('patient_home_coordinate', axis=1, inplace=True)
csv_df.insert(0, 'patient_home_coordinate', list(map(lambda addr: "%f,%f" % (addr.latitude, addr.longitude), addrs)))

# Remove old column
if 'distance_to_nhp' in csv_df.keys():
    csv_df.drop('distance_to_nhp', axis=1, inplace=True)
csv_df.insert(0, 'distance_to_nhp', list(map(lambda d: d['length_in_meters'] / 1000, distances)))

In [None]:
# Export to csv file
csv_df.to_csv('../export.csv', index=False)

# Histogram

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

df = pd.read_csv('../dataset/patient_location.csv')
# counts, bins = np.histogram(df['distance_to_nhp'], bins=100)

interval = {
    "[0, 10)": lambda x: x < 10,
    "[10, 25)": lambda x: x >= 10 and x < 25,
    "[25, 50)": lambda x: x >= 25 and x < 50,
    "[50, 100)": lambda x: x >= 50 and x < 100,
    "[100, 200)": lambda x: x >= 100 and x < 200,
    "[200, +inf)": lambda x: x >= 200
}

data = {}

for key, value in interval.items():
    data[key] = len(df[df['distance_to_nhp'].apply(value)])

plt.bar(x=data.keys(), height=data.values())

# Draw map

In [None]:
import geopandas as gpd
import pandas as pd
import geoplot as gplt
import mapclassify as mc

df = pd.read_csv('../dataset/province_stat.csv')
gdf = gpd.read_file('../dataset/gadm41_VNM_1.shp')


patients = list(df['Patient']) + [0] * (len(gdf) - len(df))

gdf = gdf.sort_values(by=['NAME_1'])

gdf.insert(0, 'Patient', patients)
scheme = mc.Quantiles(gdf['Patient'], k=100)

gplt.choropleth(gdf, hue=patients, scheme=scheme, figsize=(50, 50), cmap='Reds')

# print(gdf)


## Map by year

Year: 2017

In [None]:
df = pd.read_csv('../dataset/year_and_province.csv')
gdf = gpd.read_file('../dataset/gadm41_VNM_1.shp')

patients = list(df['2017']) + [0] * (len(gdf) - len(df))
gdf = gdf.sort_values(by=['NAME_1'])

gdf.insert(0, 'Patient', patients)
scheme = mc.Quantiles(gdf['Patient'], k=100)

gplt.choropleth(gdf, hue=patients, scheme=scheme, figsize=(10, 10), cmap='Reds')

Year: 2018

In [None]:
df = pd.read_csv('../dataset/year_and_province.csv')
gdf = gpd.read_file('../dataset/gadm41_VNM_1.shp')

patients = list(df['2018']) + [0] * (len(gdf) - len(df))
gdf = gdf.sort_values(by=['NAME_1'])

gdf.insert(0, 'Patient', patients)
scheme = mc.Quantiles(gdf['Patient'], k=100)

gplt.choropleth(gdf, hue=patients, scheme=scheme, figsize=(10, 10), cmap='Reds')

Year: 2019

In [None]:
df = pd.read_csv('../dataset/year_and_province.csv')
gdf = gpd.read_file('../dataset/gadm41_VNM_1.shp')

patients = list(df['2019']) + [0] * (len(gdf) - len(df))
gdf = gdf.sort_values(by=['NAME_1'])

gdf.insert(0, 'Patient', patients)
scheme = mc.Quantiles(gdf['Patient'], k=100)

gplt.choropleth(gdf, hue=patients, scheme=scheme, figsize=(10, 10), cmap='Reds')