**Download the data here**(https://github.com/BradyEngelke/msba_elp/tree/master/2nd_phase_data)

**Data needed**: Minnesota_Hospitals.csv, Food_Shelves.csv, Minnesota_Nursing_Facilities.csv

**Introduction** Here is an example using geopy Python package to generate coordianates automatically. We use these three data as an example 


# Packages

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [7]:
import pandas as pd
from geopy.geocoders import Nominatim

In [9]:
# a simple example using address to find coordinates
geolocator = Nominatim()
location = geolocator.geocode("175 5th Avenue NYC")
print((location.latitude, location.longitude))

(40.741059199999995, -73.98964162240998)


In [10]:
# a simple example using location name to find coordinates
geolocator = Nominatim()
location = geolocator.geocode("University of Minnesota")
print((location.latitude, location.longitude))

(44.97308605, -93.23708812629405)


In [5]:
'''
input: raw csv file from GIS Open Data, county column name in each file
output: dataframe with only Hennepin data

'''       
hospital = pd.read_csv('Minnesota_Hospitals.csv')
hospital = hospital[hospital['COUNTYNAME'] == 'HENNEPIN']

food_shelves = pd.read_csv('Food_Shelves.csv')
food_shelves = food_shelves[food_shelves['County'] == 'Hennepin']

nursing = pd.read_csv('Minnesota_Nursing_Facilities.csv')
nursing = nursing[nursing['COUNTY_NAME'] == 'HENNEPIN']

In [139]:
# In this function, we can use either address name or location name to get coordiates. We use both and choose the more accurate one in this case.

def addr_to_coord(name, table_name, address_col, name_col):
    
    '''
    input: table string, dateframe with address and location name, address column name, location name column name
    output: dataframe with two more columns longitude and lagitude
    
    '''
    table_add = table_name[[address_col, name_col]]
    table_add_list = []
    for index, rows in table_add.iterrows(): 
        my_list =[rows[address_col], rows[name_col]] 
        table_add_list.append(my_list)
    
    geolocator = Nominatim()
    
    # Setup the boundaries of Hennepin County and get notification if the coordinates are out of boundary
    Hennepin_latitude_upper = 45.393850 
    Hennepin_latitude_lower = 44.608197    
    Hennepin_longitude_upper = -93.016710
    Hennepin_longitude_lower = -94.080839
    
    #
    state = 'Minnisota'
    country = 'USA'
    
    # Here to avoid error, we use both address and location name to find the coordiates. If one result is out of boundary or
    # cannot find, we can use the other.
    for add in table_add_list:
        geolocator = Nominatim()
        name_query = str(add[1]) + ',' + state + ',' + country
        add_query = str(add[0]) + ',' + state + ',' + country
        try:
            location = geolocator.geocode(name_query)
            if Hennepin_latitude_lower < location.latitude < Hennepin_latitude_upper and Hennepin_longitude_lower < location.longitude < Hennepin_longitude_upper:
                table_name.loc[table_name[name_col] == add[1], 'Latitude'] = location.latitude
                table_name.loc[table_name[name_col] == add[1], 'Longitude'] = location.longitude
            else:
                try:
                    location = geolocator.geocode(add_query)
                    if Hennepin_latitude_lower < location.latitude < Hennepin_latitude_upper and Hennepin_longitude_lower < location.longitude < Hennepin_longitude_upper:
                        table_name.loc[table_name[address_col] == add[0], 'Latitude'] = location.latitude
                        table_name.loc[table_name[address_col] == add[0], 'Longitude'] = location.longitude
                    else:
                        table_name.loc[table_name[address_col] == add[0], 'Latitude'] = 'Out of range'
                        table_name.loc[table_name[address_col] == add[0], 'Longitude'] = 'Out of range'
                except:
                    table_name.loc[table_name[address_col] == add[0], 'Latitude'] = 'Cannot find'
                    table_name.loc[table_name[address_col] == add[0], 'Longitude'] = 'Cannot find'
        except:
            try:
                location = geolocator.geocode(add_query)
                if Hennepin_latitude_lower < location.latitude < Hennepin_latitude_upper and Hennepin_longitude_lower < location.longitude < Hennepin_longitude_upper:
                    table_name.loc[table_name[address_col] == add[0], 'Latitude'] = location.latitude
                    table_name.loc[table_name[address_col] == add[0], 'Longitude'] = location.longitude
                else:
                    table_name.loc[table_name[address_col] == add[0], 'Latitude'] = 'Out of range'
                    table_name.loc[table_name[address_col] == add[0], 'Longitude'] = 'Out of range'
            except:
                table_name.loc[table_name[address_col] == add[0], 'Latitude'] = 'Cannot find'
                table_name.loc[table_name[address_col] == add[0], 'Longitude'] = 'Cannot find'
    
    num_out_of_range = table_name[table_name['Latitude'] == 'Out of range'].shape[0]
    num_cannot_find = table_name[table_name['Latitude'] == 'Cannot find'].shape[0]
    total_rows = table_name.shape[0]
    success_convert = total_rows - num_cannot_find - num_out_of_range
    
    print('--------------------' + name + '--------------------')
    print('Successfully get ' +  str(success_convert) + ' location coordinates')
    print('There are ' + str(num_out_of_range) + ' address out of range in Hennepin county with ' + str(round(num_out_of_range / total_rows * 100, 1)) + ' percentage')
    print('There are ' + str(num_cannot_find) + ' address cannot find with ' + str(round(num_cannot_find / total_rows * 100, 1)) + ' percentage')
    
    return table_name

In [140]:
# Here we can see that some of them are out of boundaries and the package cannot find the result in some cases.
# But there are only few cases, we can manully find their coordianates by Google Map.

hospital = addr_to_coord('hospital', hospital, 'ADDRESS', 'NAME')
food_shelves = addr_to_coord('food_shelves', food_shelves, 'AddressLine1', 'LocationName')
nursing = addr_to_coord('nursing', nursing, 'Match_addr', 'NAME')

--------------------hospital--------------------
Successfully get 14 location coordinates
There are 0 address out of range in Hennepin county with 0.0 percentage
There are 0 address cannot find with 0.0 percentage
--------------------food_shelves--------------------
Successfully get 44 location coordinates
There are 4 address out of range in Hennepin county with 8.2 percentage
There are 1 address cannot find with 2.0 percentage
--------------------nursing--------------------
Successfully get 42 location coordinates
There are 1 address out of range in Hennepin county with 1.9 percentage
There are 9 address cannot find with 17.3 percentage
