# Auto-populate SNAP Map Data with Google Places API results

Intention: simplify volunteer workload to checking rather than needing to enter information manually if already available on Google Maps.

In [1]:
import googlemaps

In [2]:
API_KEY = 'AIzaSyBsnyJeSZEoK3bvSq9M8jsmEsWJ8z9uV9A'

In [3]:
gmaps = googlemaps.Client(key = API_KEY)

In [99]:
# Improves search result success by removing words at the end of the site name that contain digits.
# Looking at the data, 7-Eleven produces inconsistencies so we add a special case to handle them.
import regex 

def trim_suffix(site_name):
    if not site_name:
        return site_name
    clean_name = regex.sub(pattern = "\W", repl = " ", string = site_name.lower())
    bits = clean_name.split(" ")
    while not str.isalpha(bits[-1]) and "eleven" not in bits[-1]:
        del bits[-1]
    return " ".join(bits)


In [61]:
# Returns a place id for closest match based on input cols.
# Returns none if there isn't a match.

def get_place_id(site_name, address, city, lon, lat):
    search_string = " ".join([trim_suffix(site_name), address, city])
    coords = "".join(["point:", str(lon), ",", str(lat)])

    candidates = gmaps.find_place(
        input = search_string,
        input_type = "textquery",
        location_bias=coords,
    ).get('candidates')

    if not candidates:
        return None
    else: 
        return candidates[0].get('place_id')

In [6]:
# Retrieve hours given a valid place id.

def get_hours(place_id):
    place_details = gmaps.place(
        place_id = place_id,
        fields = ['opening_hours']
    ).get('result')
    if not place_details:
        return None
    hours_list = place_details.get('opening_hours').get('weekday_text')
    return '\n'.join(hours_list)

In [65]:
from pprint import pprint
# Given input, tries to retrieve opening hours listed on Google Maps.
def lookup_hours(row):
    site_name_col = row["site_name"]
    address_col = row["address"]
    city_col = row["city"] 
    long_col = row["long"]
    lat_col = row["lat"]
    place_id = get_place_id(site_name_col, address_col, city_col, long_col, lat_col)
    if not place_id:
        return "No place found"
    hours = get_hours(place_id)
    if not hours:
        return "No hours listed"
    return hours

## Trial Run

I've copied the Santa Clara County stores csv into `snap_input.csv`. Let's now apply our function on all rows to get their hours!

In [50]:
import pandas as pd

In [51]:
df = pd.read_csv("snap_input.csv")

df.head(5)

Unnamed: 0,site_name,address,city,long,lat,county
0,Dollar Tree 5522,2222 Business Cir,San Jose,-121.93281,37.323013,Santa Clara
1,"Joya Supermarket, Llc.",2512 California St,Mountain View,-122.10679,37.403904,Santa Clara
2,Safeway 2887,150 E El Camino Real,Sunnyvale,-122.03108,37.366657,Santa Clara
3,Morgans Hill Market,16935 Monterey St,Morgan Hill,-121.64956,37.124382,Santa Clara
4,Bakery/Panaderia La Mejor,1239 E Julian St,San Jose,-121.87022,37.351727,Santa Clara


In [88]:
df['hours'] = df.apply(func = lookup_hours, axis = 1)
df.head(5)

IndexError: ('list index out of range', 'occurred at index 169')

In [89]:
df.iloc[169]

site_name    CVS/pharmacy 422
address         4110 N 1st St
city                 San Jose
long                 -121.953
lat                   37.4147
county            Santa Clara
Name: 169, dtype: object

In [None]:
df.to_csv("snap_output.csv")

## Tests:

Check all cases when:

1. We find a place and it has listed hours (test: ChIJPwKK9TzLj4ARTHgmrIjpaOs)
2. We find a place and it does not have listed hours (test: ChIJs0PFKPC2j4ARQa9ArwHElcM)
3. We do not find a place

Check that it pulls correctly from Google Sheets
* for now, manually downloading and reuploading

Check that it writes correctly to Google Sheets
* for now, manually downloading and reuploading

In [9]:
# Test variables

LISTED_SITE_NAME = "Dollar Tree 5522"
LISTED_ADDRESS = "2222 Business Cir"
LISTED_CITY = "San Jose"
LISTED_LONG = "-121.93281"
LISTED_LAT = "37.323013"

UNLISTED_SITE_NAME = "Madera Villa"
UNLISTED_ADDRESS = "1052 W Iowa Ave"
UNLISTED_CITY = "Sunnyvale"
UNLISTED_LONG = "-122.0545358"
UNLISTED_LAT = "37.3735964"

# Test spreadsheet



In [10]:
lookup_hours(LISTED_SITE_NAME, LISTED_ADDRESS, LISTED_CITY, LISTED_LONG, LISTED_LAT)

'Monday: 8:00 AM – 8:00 PM\nTuesday: 8:00 AM – 8:00 PM\nWednesday: 8:00 AM – 8:00 PM\nThursday: 8:00 AM – 8:00 PM\nFriday: 8:00 AM – 8:00 PM\nSaturday: 8:00 AM – 8:00 PM\nSunday: 8:00 AM – 8:00 PM'

In [11]:
lookup_hours(UNLISTED_SITE_NAME, UNLISTED_ADDRESS, UNLISTED_CITY, UNLISTED_LONG, UNLISTED_LAT)

'No hours listed'

In [12]:
lookup_hours("", "", "", "", "")

'No place found'