In [1]:
# import necessary modules
import pandas as pd, requests, logging, time

# magic command to display matplotlib plots inline within the ipython notebook
%matplotlib inline

In [2]:
# configure logging for our tool
lfh = logging.FileHandler('logs/reverse_geocoder.log', mode='w', encoding='utf-8')
lfh.setFormatter(logging.Formatter('%(levelname)s %(asctime)s %(message)s'))
log = logging.getLogger('reverse_geocoder')
log.setLevel(logging.INFO)
log.addHandler(lfh)
log.info('process started')

In [3]:
df = pd.read_csv(r"C:/Users/vva02/Desktop/Hotel-city-Rawdata.csv", encoding="cp1252")
df = df[df['HOTEL_COUNTRY_ID'] == 165]
df = df[["HOTEL_ID","HOTEL_CITY_NAME","HOTEL_DISTRICT_NAME","HOTEL_ZIP_CODE", "HOTEL_LATITUDE", "HOTEL_LONGITUDE"]]
df[["HOTEL_LATITUDE"]] = pd.to_numeric(df.HOTEL_LATITUDE, errors='coerce' ).tolist()
df[["HOTEL_LONGITUDE"]] = pd.to_numeric(df.HOTEL_LONGITUDE, errors='coerce').tolist()

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
# create new columns
df['geocode_data'] = ''
df['neighborhood'] = ''
df['city'] = ''
df['state'] = ''
df['postal_code'] = ''
df['country'] = ''

df.head()

Unnamed: 0,HOTEL_ID,HOTEL_CITY_NAME,HOTEL_DISTRICT_NAME,HOTEL_ZIP_CODE,HOTEL_LATITUDE,HOTEL_LONGITUDE,geocode_data,neighborhood,city,state,postal_code,country
200,720981,Hoboken (New Jersey),New Jersey,07302-7587,40.714676,-74.03574,,,,,,
372,139996,Washington (District of Columbia),District of Columbia,20036-1204,38.91043,-77.042996,,,,,,
461,962387,Portland (North Dakota),North Dakota,97214-1305,45.522766,-122.656247,,,,,,
473,377803,Boston (Massachusetts),Massachusetts,2142,42.362168,-71.087316,,,,,,
528,62832,Reno (Nevada),Nevada,89501,39.529446,-119.814496,,,,,,


In [5]:

# function that handles the geocoding requests
def reverse_geocode(latlng):
#     key="AIzaSyBNV8c2HJQd30VS1p1WGTvL3XRLb6d0qVw"
    time.sleep(0.1)
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={0}&key='add API key'    
    request = url.format(latlng)
    log.info(request)
    response = requests.get(request)
    data = response.json()
    if 'results' in data and len(data['results']) > 0:
        return data['results'][0]


In [6]:
# create concatenated lat+lng column then reverse geocode each value
df['latlng'] = df.apply(lambda row: '{},{}'.format(row['HOTEL_LATITUDE'], row['HOTEL_LONGITUDE']), axis=1)
df['geocode_data'] = df['latlng'].map(reverse_geocode)
df.head()

Unnamed: 0,HOTEL_ID,HOTEL_CITY_NAME,HOTEL_DISTRICT_NAME,HOTEL_ZIP_CODE,HOTEL_LATITUDE,HOTEL_LONGITUDE,geocode_data,neighborhood,city,state,postal_code,country,latlng
200,720981,Hoboken (New Jersey),New Jersey,07302-7587,40.714676,-74.03574,"{'address_components': [{'long_name': '70', 's...",,,,,,"40.714676000000004,-74.03574"
372,139996,Washington (District of Columbia),District of Columbia,20036-1204,38.91043,-77.042996,"{'address_components': [{'long_name': '1500', ...",,,,,,"38.91043,-77.042996"
461,962387,Portland (North Dakota),North Dakota,97214-1305,45.522766,-122.656247,"{'address_components': [{'long_name': '910', '...",,,,,,"45.522766,-122.656247"
473,377803,Boston (Massachusetts),Massachusetts,2142,42.362168,-71.087316,"{'address_components': [{'long_name': '350', '...",,,,,,"42.362168,-71.087316"
528,62832,Reno (Nevada),Nevada,89501,39.529446,-119.814496,"{'address_components': [{'long_name': '345', '...",,,,,,"39.529446,-119.81449599999999"


In [7]:
# parse neighborhood data from a google reverse-geocode result
def parse_neighborhood(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'neighborhood' in component['types']:
                return component['long_name']
            elif 'sublocality_level_1' in component['types']:
                return component['long_name']
            elif 'sublocality_level_2' in component['types']:
                return component['long_name'] 

# identify municipality and country data in the json that google sent back
def parse_city(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'locality' in component['types']:
                return component['long_name']
            elif 'postal_town' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_5' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_4' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_3' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_2' in component['types']:
                return component['long_name']
    return None

def parse_state(geocode_data):
    state = None
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'administrative_area_level_1' in component['types']:
                state = component['long_name']
            elif 'administrative_area_level_2' in component['types']:
                state = component['long_name']
            elif 'administrative_area_level_3' in component['types']:
                state = component['long_name']
            elif 'locality' in component['types']:
                state = component['long_name']
    return state


def parse_country(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'country' in component['types']:
                return component['long_name']
    return None

def parse_postal_code(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'postal_code' in component['types']:
                return component['long_name']
    return None

In [8]:
df['neighborhood'] = df['geocode_data'].map(parse_neighborhood)
df['city'] = df['geocode_data'].map(parse_city)
df['state'] = df['geocode_data'].map(parse_state)
df['country'] = df['geocode_data'].map(parse_country)
df['postal_code'] = df['geocode_data'].map(parse_postal_code)
print(len(df))
df1 = df
df1.to_csv('US-Hotel-City-MapsAPI.csv')

94225
