# Auto-populate SNAP Map Data with Google Places API results

Intention: simplify volunteer workload to checking rather than needing to enter information manually if already available on Google Maps.

In [1]:
import googlemaps
import pandas as pd

API_KEY = 'AIzaSyBsnyJeSZEoK3bvSq9M8jsmEsWJ8z9uV9A'
gmaps = googlemaps.Client(key = API_KEY)

df = pd.concat(pd.read_excel('snap_raw_all.xlsx', sheet_name = None), ignore_index = True)
df_sample = df.head(10)
df_sample

Unnamed: 0.1,Unnamed: 0,X,Y,Store_Name,Address,Address_Line__2,City,State,Zip5,Zip4,County,Longitude,Latitude,ObjectId
0,1,-122.11709,37.704102,7-Eleven 14184F,1711 159th Ave,,San Leandro,CA,94578,2076.0,ALAMEDA,-122.11709,37.704102,1033
1,2,-122.28925,37.829037,Target Store T-2767,1555 40th St,,Emeryville,CA,94608,3515.0,ALAMEDA,-122.28925,37.829037,2218
2,3,-121.8969,37.657982,Chevron 2219,1875 Valley Ave,,Pleasanton,CA,94566,3562.0,ALAMEDA,-121.8969,37.657982,2686
3,4,-122.05122,37.695671,Safeway 3010,4015 E Castro Valley Blvd,,Castro Valley,CA,94552,4820.0,ALAMEDA,-122.05122,37.695671,2756
4,5,-122.29823,37.83783,Powell Shell 102,1800 1/2 Powell St,,Emeryville,CA,94608,1849.0,ALAMEDA,-122.29823,37.83783,5045
5,6,-121.9305,37.708691,Sprouts Farmers Market 221,7153 Amador Plaza Rd,,Dublin,CA,94568,2317.0,ALAMEDA,-121.9305,37.708691,5547
6,7,-122.00312,37.591656,7 Hills Food & Liquor,101 Appian Way,,Union City,CA,94587,3703.0,ALAMEDA,-122.00312,37.591656,5713
7,8,-122.06482,37.683228,Middle East Trade Center N/A,1952 B St,,Hayward,CA,94541,3107.0,ALAMEDA,-122.06482,37.683228,5761
8,9,-122.02735,37.540852,HERAT MARKET,5694 Thornton Ave,,Newark,CA,94560,3825.0,ALAMEDA,-122.02735,37.540852,482
9,10,-122.16817,37.756672,Arrwa One Stop Market,8607 Bancroft Ave,,Oakland,CA,94605,3915.0,ALAMEDA,-122.16817,37.756672,502


In [2]:
# Improves search result success by removing words at the end of the site name that contain digits.
# Looking at the data, 7-Eleven produces inconsistencies so we add a special case to handle them.
import regex 

def trim_suffix(site_name):
    if not site_name:
        return site_name
    clean_name = regex.sub(pattern = "\W", repl = " ", string = site_name.lower())
    bits = clean_name.split(" ")
    while not str.isalpha(bits[-1]) and "eleven" not in bits[-1]:
        del bits[-1]
    return " ".join(bits)


In [3]:
# Returns a place id for closest match based on input cols.
# Returns none if there isn't a match.

def get_place_id(site_name, address, city, lon = "", lat = ""):
    search_string = " ".join([trim_suffix(site_name), address, city])
    coords = "".join(["point:", str(lon), ",", str(lat)])

    candidates = gmaps.find_place(
        input = search_string,
        input_type = "textquery",
        location_bias=coords,
    ).get('candidates')

    if not candidates:
        return None
    else: 
        return candidates[0].get('place_id')

### Get attributes

Optimized for one read-through. We want:

* Lat and long coords
* Hours
* Zipcode
* Phone number
* Website
* Status

In [4]:
def get_attributes(place_id):
    place_details = gmaps.place(
            place_id = place_id,
            fields = [
                "geometry", 
                "opening_hours", 
                "address_component", 
                "formatted_phone_number", 
                "website", 
                "business_status"
            ]
        ).get("result")
    if not place_details:
        return None
    
    # lat / long coords
    coords = place_details.get('geometry').get('location')
    lat = coords.get("lat")
    lng = coords.get("lng")
    if not lat: lat = "No lat listed"
    if not lng: lng = "No lng listed"

    # hours
    hours_list = place_details.get('opening_hours')
    if not hours_list: 
        hours = "No hours listed"
    else:
        hours = '\n'.join(hours_list.get('weekday_text'))

    # zipcode
    address_components = place_details.get('address_components')
    zipcode_details = next(
        (item for item in address_components if 'postal_code' in item['types']), 
        None)
    if not zipcode_details: 
        zipcode = "No zipcode listed"
    else:
        zipcode = zipcode_details.get('long_name')

    # phone number
    phone = place_details.get("formatted_phone_number")
    if not phone: phone = "No phone listed"

    # website
    website = place_details.get("website")
    if not website: website = "No website listed"

    # status
    status = place_details.get("business_status")
    if not status: status = "No status listed"

    return lat, lng, hours, zipcode, phone, website, status

In [21]:
def lookup_attributes(row):
    place_id = get_place_id(
        row["Store_Name"], 
        row["Address"], 
        row["City"], 
        row["Longitude"], 
        row["Latitude"]
    )
    if not place_id:
        return "No place found", "No place found", "No place found", \
        "No place found", "No place found", "No place found", "No place found"
    lat, lng, hours, zipcode, phone, website, status = get_attributes(place_id)
    return lat, lng, hours, zipcode, phone, website, status

## Test on some rows

In [6]:
row_index = 431
df.iloc[row_index]

Unnamed: 0                      432
X                          -122.238
Y                           37.7839
Store_Name         12th Street Mart
Address              2200 E 12th St
Address_Line__2                 NaN
City                        Oakland
State                            CA
Zip5                          94606
Zip4                           5010
County                      ALAMEDA
Longitude                  -122.238
Latitude                    37.7839
ObjectId                     121714
Name: 431, dtype: object

In [7]:
test_place_id = get_place_id(
    df["Store_Name"][row_index], 
    df["Address"][row_index], 
    df["City"][row_index], 
    df["Longitude"][row_index], 
    df["Latitude"][row_index]
)

In [8]:
test_place_details = gmaps.place(
            place_id = test_place_id,
            fields = [
                "geometry", 
                "opening_hours", 
                "address_component", 
                "formatted_phone_number", 
                "website", 
                "business_status"
            ]
        ).get("result")

In [9]:
test_place_details.get("address_components")

[{'long_name': '2200', 'short_name': '2200', 'types': ['street_number']},
 {'long_name': 'East 12th Street',
  'short_name': 'E 12th St',
  'types': ['route']},
 {'long_name': 'East Peralta',
  'short_name': 'East Peralta',
  'types': ['neighborhood', 'political']},
 {'long_name': 'Oakland',
  'short_name': 'Oakland',
  'types': ['locality', 'political']},
 {'long_name': 'Alameda County',
  'short_name': 'Alameda County',
  'types': ['administrative_area_level_2', 'political']},
 {'long_name': 'California',
  'short_name': 'CA',
  'types': ['administrative_area_level_1', 'political']},
 {'long_name': 'United States',
  'short_name': 'US',
  'types': ['country', 'political']},
 {'long_name': '94606', 'short_name': '94606', 'types': ['postal_code']},
 {'long_name': '5010', 'short_name': '5010', 'types': ['postal_code_suffix']}]

In [10]:
get_attributes(test_place_id)

(37.78376240000001,
 -122.2382127,
 'No hours listed',
 '94606',
 '(510) 535-1672',
 'http://www.valero.com/',
 'OPERATIONAL')

## Apply to the entire dataset

In [23]:
df["lat_gmaps"],    \
df["lng_gmaps"],    \
df["hours"],        \
df["zip_gmaps"],    \
df["phone"],        \
df["website"],      \
df["status"] = zip(*df.apply(func = lookup_attributes, axis = 1))

df.head(5)

Unnamed: 0.1,Unnamed: 0,X,Y,Store_Name,Address,Address_Line__2,City,State,Zip5,Zip4,...,Longitude,Latitude,ObjectId,lat_gmaps,lng_gmaps,hours,zip_gmaps,phone,website,status
0,1,-122.11709,37.704102,7-Eleven 14184F,1711 159th Ave,,San Leandro,CA,94578,2076.0,...,-122.11709,37.704102,1033,37.7046,-122.117,Monday: Open 24 hours\nTuesday: Open 24 hours\...,94578,(510) 317-8741,https://www.7-eleven.com/locations/ca/san-lean...,OPERATIONAL
1,2,-122.28925,37.829037,Target Store T-2767,1555 40th St,,Emeryville,CA,94608,3515.0,...,-122.28925,37.829037,2218,37.8285,-122.29,Monday: 8:00 AM – 9:00 PM\nTuesday: 8:00 AM – ...,94608,(510) 285-0559,https://www.target.com/sl/oakland-emeryville/2767,OPERATIONAL
2,3,-121.8969,37.657982,Chevron 2219,1875 Valley Ave,,Pleasanton,CA,94566,3562.0,...,-121.8969,37.657982,2686,37.6579,-121.896,Monday: Open 24 hours\nTuesday: Open 24 hours\...,94566,(925) 846-6130,https://www.chevronwithtechron.com/station/187...,OPERATIONAL
3,4,-122.05122,37.695671,Safeway 3010,4015 E Castro Valley Blvd,,Castro Valley,CA,94552,4820.0,...,-122.05122,37.695671,2756,37.6948,-122.05,Monday: 5:00 AM – 11:00 PM\nTuesday: 5:00 AM –...,94552,(510) 886-7351,https://local.safeway.com/safeway/ca/castro-va...,OPERATIONAL
4,5,-122.29823,37.83783,Powell Shell 102,1800 1/2 Powell St,,Emeryville,CA,94608,1849.0,...,-122.29823,37.83783,5045,37.8381,-122.298,Monday: Open 24 hours\nTuesday: Open 24 hours\...,94608,(510) 653-1800,https://find.shell.com/us/fuel/10007884-1800-1...,OPERATIONAL


Now let's write this out to a csv to save the results!

In [24]:
df.to_csv("snap_all_output.csv")