# Get location data from Google Earth

In [1]:
import googlemaps
import pandas as pd
import numpy as np

Functions to extract latitude, longitude and to calculate distance between 2 coordinates

In [2]:
def getLatLng(geocode_result):
    """Extracts the latitude and longitude from a geocode_result variable

    Arguments:
        geocode_result -- generated by googlemaps.client using its geocode method

    Returns:
        tuple (latitude, longitude)
    """
    loc = geocode_result[0]['geometry']['location']
    lat = loc['lat']
    lng = loc['lng']
    return (lat, lng)

def getDist(coords1, coords2, units=None):
    """Calculates distance between 2 coordinates using the Vicenty formula
    
    Arguments:
        coords1, coords2 -- tuples of the format (latitude, longitude)
        units -- pass str 'miles' to get distance in miles
    
    Returns:
        Distance between coords1 and coords2 in miles or kilometers
    """
    import geopy.distance
    if units == 'miles':
        return (geopy.distance.vincenty(coords1, coords2).miles)
    else:
        return (geopy.distance.vincenty(coords1, coords2).km)

In [3]:
def populateLatLng(df, gmaps, addr_col_name):
    """Adds columns latitude, longitude and distance from reference coords to provided pandas dataframe
    
    Arguments:
        df -- pandas dataframe with formatted institution information
        gmaps -- a googlemaps.client object
        addr_col_name -- str with name of the column that should be used to query address
    
    Returns:
        dataframe with added columns as specified above
    """
    ref_coords = (-23.5999515, -46.7150129) # HOSPITAL ISRAELITA ALBERT EINSTEIN
    latLngDist = np.zeros((df.shape[0], 3))
    latLngDist[:] = np.nan
    for index, row in df.iterrows():
        inst = row[addr_col_name]
        geocode = []
        geocode = gmaps.geocode(inst)
        if geocode == []:
            continue
        lat, lng = getLatLng(geocode)
        dist = getDist(ref_coords, (lat, lng), 'km')
        latLngDist[index, :] = [lat, lng, dist]
    df['Lat'] = latLngDist[:,0]
    df['Lng'] = latLngDist[:,1]
    df['Dist'] = latLngDist[:,2]
    return df

## main()
The api_key provided here was generated by sumanthsridhar.009@gmail.com

Initialize:

In [4]:
api_key='AIzaSyDFGsAhv47KwjjXtKlfquu7e_Ag5eQOrgg'
gmaps = googlemaps.Client(key=api_key)
if 'data' not in locals():
    print('Data not present in workspace... importing from local csv')
    data = pd.read_csv('Example02.csv')

Data not present in workspace... importing from local csv


Execute:  
(NOTE: Takes a couple of minutes to execute. Be patient)

In [5]:
df = populateLatLng(data, gmaps, 'Complete Address')

In [6]:
print(df)

    Unnamed: 0                                        Institution  CNES Code  \
0            0                          A C CAMARGO CANCER CENTER    2077531   
1            1     A MAIS MEDICINA DIAGNOSTICA UNID AUGUSTO TOLLE    9227571   
2            2     A MAIS MEDICINA DIAGNOSTICA UNIDADE CAMPO BELO    6441106   
3            3          A MAIS MEDICINA DIAGNOSTICA UNIDADE MOEMA    5167612   
4            4                               AFIP VILA CLEMENTINO    3813517   
5            5  AME AMBULATORIO MEDICO DE ESPEC DRA MARIA CRIS...    6432530   
6            6                             BETA SAUDE SANTO AMARO    9096159   
7            7                                     BFA RADIOLOGIA    7931212   
8            8  CARDIOLOGICA MEDICINA DIAGNOSTICA SERV MED E A...    3156877   
9            9            CENTRO AVANCADO DE MEDICINA DIAGNOSTICA    7469691   
10          10            CENTRO DE PROCEDIMENTO E APOIO ZONA SUL    7823339   
11          11   CENTRO MEDICO LIBERDADE

In [7]:
df.to_csv('with_latlng.csv')

In [8]:
try:
    import gmaps
except ImportError:
    import sys
    !conda install --yes --prefix {sys.prefix} -c conda-forge gmaps
    import gmaps
import gmaps.datasets

In [9]:
def map_competitors(df, ref_row, max_dist):
    max_dist=10
    api_key_2 = 'AIzaSyBiFncz_ygbcJQCsm84VK1hljtQ26UGsAo'
    gmaps.configure(api_key_2)
    competitors_df = df[df['Dist'] < max_dist]
    locations_df = competitors_df[['Lat', 'Lng']]
    competitor_names = list(competitors_df['Institution'])
    competitors_layer = gmaps.symbol_layer(locations=locations_df,
                                           hover_text=competitor_names,
                                           fill_color='green',
                                           stroke_color='green',
                                           scale=2)
    ref_coords = ref_row[['Lat','Lng']]
    marker = gmaps.marker_layer(locations=ref_coords, 
                                hover_text=ref_row.iloc[0]['Institution'])
    fig = gmaps.figure()
    fig.add_layer(marker)
    fig.add_layer(competitors_layer)
    return fig

In [10]:
ref_name = 'HOSPITAL ISRAELITA ALBERT EINSTEIN'
ref_row = df.loc[df['Institution'] == ref_name]
fig = map_competitors(df, ref_row, max_dist=10)
fig