In [None]:
import pandas as pd
import unidecode
import googlemaps
import re
from unicodedata import normalize
import json, csv
import numpy as np
import time
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import requests

In [None]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }a
</style>
"""))

In [None]:
data_cajamarca = pd.read_excel(r'../../_data/cajamarca_data_helth_estb.xlsx')

In [None]:
centroids = pd.read_excel( r'../../_data/peru_districts_centroids.xlsx' )

In [None]:
cen_caj = centroids[ centroids[ 'NOMBDEP' ] == 'CAJAMARCA' ]

### 4. Keep HE closest to centroid of district

In [None]:
HE = pd.merge( data_cajamarca, cen_caj, on = [ 'NOMBPROV', 'NOMBDIST' ], validate = 'm:1' )

In [None]:
HE[ 'DISTANCE' ] = HE.apply( lambda row: np.sqrt( ( row[ 'Centroid_Latitude' ] - row[ 'latitud' ] ) ** 2 + ( row[ 'longitud' ] - row[ 'Centroid_Longitude' ] ) ** 2 ), 
                             axis = 1 )

### 5. Use Google API Directions to find the driving travel time and distance from health establishments to district centroids.

In [None]:
def directions(x, lat_origin, lon_origin, lat_destination, lon_destination, key, region, time):
    
    endpoint= 'https://maps.googleapis.com/maps/api/directions/json?'
    traffic_model = ['best_guess', 'pessimistic', 'optimistic']
    departure_time= time
    mode = 'driving'
    
    api_key = key
    region = region
    
    origin = '{},{}'.format(str(x[lat_origin]), str(x[lon_origin]))
    destination = '{},{}'.format(str(x[lat_destination]), str(x[lon_destination]))
    distance = []
    duration = []
    
    for model in tqdm(traffic_model):
        nav_request = 'origin={}&destination={}&departure_time={}&traffic_model={}&mode={}&region={}&key={}'.format(origin , 
                            destination , departure_time , model , mode, region, api_key)

        request = endpoint + nav_request
        response = requests.get(request)  ##Decided to use requests library because urllib was unable to load most requests (way too many timeout errors)
        directions = response.json()
        
        if directions['status'] == 'ZERO_RESULTS':
            distance.append('No results')
            duration.append('No results')
        else:
            legs = directions['routes'][0]['legs'][0]
            distance.append(legs['distance']['value'])
            duration.append(legs['duration_in_traffic']['value'])
    return distance, duration

In [None]:
departure_time = '1643058000'
key = '' # Ran out of credit on my api key

In [None]:
data_cajamarca['directions'] = data_cajamarca.apply(lambda x: directions(x, 'latitud', 'longitud', 'Centroid_Latitude', 'Centroid_Longitude', key, 'pe', departure_time), axis=1)

In [None]:
distance, duration = map(list, zip(*data_cajamarca['directions'].tolist()))
travel_time_best_guess, travel_time_pessimistic, travel_time_optimistic = map(list, zip(*duration))
travel_distance_best_guess, travel_distance_pessimistic, travel_distance_optimistic = map(list, zip(*distance))

In [None]:
d = {'travel_time_best_guess':travel_time_best_guess, 'travel_time_pessimistic':travel_time_pessimistic, 'travel_time_optimistic':travel_time_optimistic, 'travel_distance_best_guess':travel_distance_best_guess, 'travel_distance_pessimistic':travel_distance_pessimistic, 'travel_distance_optimistic':travel_distance_optimistic}

In [None]:
df = pd.DataFrame(d, index=data_cajamarca.index)
data_cajamarca[['travel_time_best_guess', 'travel_time_pessimistic', 'travel_time_optimistic', 'travel_distance_best_guess', 'travel_distance_pessimistic', 'travel_distance_optimistic']] = df
data_cajamarca.pop('directions')
data_cajamarca

### 6. Graphs