In [1]:
import pandas as pd
import requests
import json

In [2]:
google_API_key = open('../credentials/google_api_key.txt').read()

In [3]:
def osm_distance(lon_start, lat_start, lon_dest, lat_dest):
    # Get routes from OSRM
    r = requests.get(f"http://router.project-osrm.org/route/v1/foot/{lon_start},{lat_start};{lon_dest},{lat_dest}?overview=false")

    # Parse routes from JSON output
    routes = json.loads(r.content)

    # Return fastest route in meters
    return routes.get("routes")[0]["distance"]

def goog_distance(lon_start, lat_start, lon_dest, lat_dest):
    # Get routes from Google API
    r = requests.get(f"https://maps.googleapis.com/maps/api/directions/json?origin={lat_start},{lon_start}&destination={lat_dest},{lon_dest}&mode=walking&key={google_API_key}")

    # Parse routes from JSON output
    results = json.loads(r.content)

    legs = results.get("routes").pop(0).get("legs")
    # Return fastest route in meters
    return legs[0].get("distance")['value']

def distance(lon_start, lat_start, lon_dest, lat_dest):
    # Using OSM distance; might switch to Google API
    return osm_distance(lon_start, lat_start, lon_dest, lat_dest)

In [4]:
venues = pd.read_csv("../data/RHH_Venues.csv")
metros = pd.read_csv("../data/DC_Metro_Stations.csv")

In [5]:
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371* c
    return km * 1000

In [6]:
acceptable_walking_distance_in_meters = 500

def compute_closest_metros(venue):
    # Find 5 closest Euclidian distances and only API call for them
    q_distances = []
    for index, metro in metros.iterrows():
        q_distances.append(haversine(metro['lon'], metro['lat'], venue['lon'], venue['lat']))

    sorted_q_distances = sorted(set(q_distances))[0:5]
    top_5_indexes = []
    for q_dist in sorted_q_distances:
        top_5_indexes.append(q_distances.index(q_dist))

    distances = []
    for index, metro in metros.iloc[top_5_indexes].iterrows():
        distances.append(distance(metro['lon'], metro['lat'], venue['lon'], venue['lat']))

    sorted_distances = sorted(set(distances))
    
    close_metros = []
    shortest_distance = sorted_distances[0]
    for dist in sorted_distances:
        # If the walking distance is acceptable AND it is not twice the distance as the shortest metro
        if dist < acceptable_walking_distance_in_meters and dist < shortest_distance * 1.75:
            close_metros.append(distances.index(dist))
    
    # If we don't find anything, try it again but double the walking distance
    if len(close_metros) == 0:
        for dist in sorted_distances:
            # If the walking distance is acceptable AND it is not twice the distance as the shortest metro
            if dist < acceptable_walking_distance_in_meters * 2 and dist < shortest_distance * 2:
                close_metros.append(distances.index(dist))
    
    # If we still don't find anything, just take the 1 closest
    if len(close_metros) == 0:
        close_metros = [distances.index(sorted_distances[0])]
    
    return list(metros['Name'].iloc[top_5_indexes].iloc[close_metros])

In [7]:
venues['Closest Metros'] = venues.apply(lambda row: compute_closest_metros(row), axis=1)

In [8]:
venues.to_csv("../data/RHH_Venues.csv", index=False)