## Importing Libraries & Setting API

In [1]:
import googlemaps
import time
from datetime import datetime
import pandas as pd
import regex as re

In [2]:
## API key input
gmaps = googlemaps.Client(key='KEY HERE')

## Defining Functions to Identify Places

**Identifies places** within a **given radius** of the latititude and longitude of a **given address** that match the name in a **given list of places.**

In [3]:
## Creates dataframe for all places that match the name of an input LIST with a given radius of a given lat/lng
def find_places_list(lat_long_string, radius, places_list, label):
    df = pd.DataFrame()
    for place in places_list:
        print(f"Finding {place} locations...")
        df = find_places(df, place, lat_long_string, radius, label)
    return df

In [4]:
## Finds all the places that match a SINGLE given name within a given radius of a given lat/lng
def find_places(df, place, lat_long_string, radius, label):
    data = pull_json(place, lat_long_string, radius, page_token=None) # Gets JSON data from Google Places API
    
    df = place_df(df, data, place, label) # Adds to dataframe
    page_token = data.get('next_page_token', None) # Sets page_token if there were more than 20 places
    time.sleep(2) # Rate limiting to keep API happy
    
    # Iterates over pages for a given location
    while page_token != None: 
        data = pull_json(place, lat_long_string, radius, page_token)
        df = place_df(df, data, place, label)
        page_token = data.get('next_page_token', None)
        time.sleep(2)
    return df

In [5]:
## Pulls json from Google Places API (limited to 20) for given place, lat/lng, radius
def pull_json(place, lat_long_string, radius, page_token=None):
    data = gmaps.places(query=place,
                     location=lat_long_string,
                     radius=radius,
                     page_token=page_token,
                     type=['restaurant','cafe', 'convenience_store', 'food', 'supermarket'])
    return data

In [6]:
## Adds places data to a dataframe
def place_df(df, data, place, label):
    for r in data['results']:
        index = df.shape[0]
        df.loc[index, 'business_name'] = r['name']
        df.loc[index, 'target_name'] = place
        df.loc[index, 'label'] = label
        df.loc[index, 'business_types'] = ", ".join(r['types'])
        df.loc[index, 'google_place_id'] = r['place_id']
        try:
            df.loc[index, 'rating'] = r['rating']
            df.loc[index, 'user_ratings_total'] = r['user_ratings_total']
        except:
            df.loc[index, 'rating'] = 'no_rating'
            df.loc[index, 'user_ratings_total'] = 'no_rating'
        df.loc[index, 'end_address'] = r['formatted_address']
        df.loc[index, 'latitude'] = r['geometry']['location']['lat']
        df.loc[index, 'longitude'] = r['geometry']['location']['lng']
        df.loc[index, 'pull_date'] = datetime.now().strftime('%Y-%m-%d')
    return df

## Running Fast Food Data Collection
**Test Location:** 410 Delaware St, Denver CO

In [7]:
## User input
address = '49 South St, Natick MA'
state = (re.search('(A[KLRZ]|C[AOT]|D[CE]|FL|GA|HI|I[ADLN]|K[SY]|LA|M[ADEINOST]|N[CDEHJMVY]|O[HKR]'
                                '|P[AR]|RI|S[CD]|T[NX]|UT|V[AIT]|W[AIVY])', address).group(0))

## Google Geocode API to convert address into latitude and longitude coordinates
address_geo = gmaps.geocode(address)
address_lat_lng = [address_geo[0]['geometry']['location']['lat'], address_geo[0]['geometry']['location']['lng']]

## Setting search radius (as the crow flies) for locations
search_radius = 3000

In [8]:
### General fast food search
fast_food_restaurants = ['fast food']

In [9]:
### Specified list of fast food restaurants to look for
fast_food_restaurants = ["McDonald's",'Burger King',"Wendy's","Subway","Starbucks","Dunkin Donuts", 
    "Pizza Hut", "KFC", "Domino's", "Baskin-Robbins", "Hunt Brothers Pizza", "Taco Bell", "Hardee's",
    "Papa John's Pizza", "Dairy Queen", "Little Caesars", "Popeyes Louisiana Kitchen", "Jimmy John's",
    "Jack in the Box", "Chick-fil-A", "Chipotle", "Panda Express", "Denny's", "IHOP", "Carl's Jr.",
    "Five Guys", "Waffle House", "Krispy Kreme" "Long John Silver's", "Jersey Mike's Subs",
    "Good Times Burgers & Frozen Custard", "Culver's"]

In [10]:
## Running the search
fast_food_locations = find_places_list(address_lat_lng, search_radius, fast_food_restaurants, label="fast_food_rest")

Finding McDonald's locations...
Finding Burger King locations...
Finding Wendy's locations...
Finding Subway locations...
Finding Starbucks locations...
Finding Dunkin Donuts locations...
Finding Pizza Hut locations...
Finding KFC locations...
Finding Domino's locations...
Finding Baskin-Robbins locations...
Finding Hunt Brothers Pizza locations...
Finding Taco Bell locations...
Finding Hardee's locations...
Finding Papa John's Pizza locations...
Finding Dairy Queen locations...
Finding Little Caesars locations...
Finding Popeyes Louisiana Kitchen locations...
Finding Jimmy John's locations...
Finding Jack in the Box locations...
Finding Chick-fil-A locations...
Finding Chipotle locations...
Finding Panda Express locations...
Finding Denny's locations...
Finding IHOP locations...
Finding Carl's Jr. locations...
Finding Five Guys locations...
Finding Waffle House locations...
Finding Krispy KremeLong John Silver's locations...
Finding Jersey Mike's Subs locations...
Finding Good Times B

## Running Convenience Store Data Collection

In [11]:
# List of convenience stores to look for
conv_store_list = ["7-eleven", "Kum & Go", "Casey’s General Store", "Cumberland Farms", "Express Mart",
    "Stripes Convenience", "Twice Daily", "Thorntons", "Circle K"]

In [12]:
## Running the search
conv_store_locations = find_places_list(address_lat_lng, search_radius, conv_store_list, label="conv_store")

Finding 7-eleven locations...
Finding Kum & Go locations...
Finding Casey’s General Store locations...
Finding Cumberland Farms locations...
Finding Express Mart locations...
Finding Stripes Convenience locations...
Finding Twice Daily locations...
Finding Thorntons locations...
Finding Circle K locations...


## Running Grocery Store Data Collection

In [13]:
# List of grocery stores to look for
groc_store = ["Trader Joe's", "Safeway", "Natural Grocers", "King Soopers", "Whole Foods", "Hannaford",
    "Stop & Shop", "Sprouts Farmers Market", "Shaw's", "Price Chopper", "Wegmans", "Pete’s Fresh Market",
    "Kroger", "Albertsons", "Publix", "Bojangles' Famous Chicken 'n Biscuit", "Arby's", "Krystal",
    "Mother Earth Natural Foods", "The Fresh Market"]

In [14]:
## Running the search
groc_store_locations = find_places_list(address_lat_lng, search_radius, groc_store, label="groc_store")

Finding Trader Joe's locations...
Finding Safeway locations...
Finding Natural Grocers locations...
Finding King Soopers locations...
Finding Whole Foods locations...
Finding Hannaford locations...
Finding Stop & Shop locations...
Finding Sprouts Farmers Market locations...
Finding Shaw's locations...
Finding Price Chopper locations...
Finding Wegmans locations...
Finding Pete’s Fresh Market locations...
Finding Kroger locations...
Finding Albertsons locations...
Finding Publix locations...
Finding Bojangles' Famous Chicken 'n Biscuit locations...
Finding Arby's locations...
Finding Krystal locations...
Finding Mother Earth Natural Foods locations...
Finding The Fresh Market locations...


## Combining Datasets

In [15]:
df = pd.concat([fast_food_locations, conv_store_locations, groc_store_locations])

## Calculating Distance from Starting Address

In [16]:
def google_distances(start_address, end_addresses, df):
    
    ## Filling in rows using enumerate as the index, so it's important to reset index
    df.reset_index(inplace=True, drop=True)
    
    ## Iterating through each of the addresses
    for i, end_address in enumerate(end_addresses):
        try:
            distance_dict = gmaps.distance_matrix(origins=start_address, destinations = end_address)
            df.loc[i, 'duration'] = distance_dict['rows'][0]['elements'][0]['duration']['value']
            df.loc[i,'distance'] = distance_dict['rows'][0]['elements'][0]['distance']['value']
            df.loc[i,'start_address'] = start_address.replace(",","").replace(" ","-")
        
        ## Redundancy to reduce API errors... probably useless
        except:
            try:
                print(f"Distance retrieval failed for {end_address}. Trying again...")
                distance_dict = gmaps.distance_matrix(origins=start_address, destinations = end_address)
                df.loc[i, 'duration'] = distance_dict['rows'][0]['elements'][0]['duration']['value']
                df.loc[i,'distance'] = distance_dict['rows'][0]['elements'][0]['distance']['value']
                df.loc[i,'start_address'] = start_address.replace(",","").replace(" ","-")
            
            ## Creating alert in dataframe if distance failed
            except:
                print(f"Distance retrieval failed for {end_address}...")
                df.loc[i, 'duration'] = "Failed"
                df.loc[i, 'distance'] = "Failed"
                df.loc[i,'start_address'] = start_address.replace(",","").replace(" ","-")
          
        ## Signposting progress
        if i % 100 == 0:
            print(f"Completed distances for {i} of {len(end_addresses)}")
        time.sleep(0.5)

    return df

In [17]:
df = google_distances(address, df['end_address'], df)

Completed distances for 0 of 1889
Completed distances for 100 of 1889
Completed distances for 200 of 1889
Completed distances for 300 of 1889
Completed distances for 400 of 1889
Completed distances for 500 of 1889
Completed distances for 600 of 1889
Completed distances for 700 of 1889
Completed distances for 800 of 1889
Completed distances for 900 of 1889
Completed distances for 1000 of 1889
Completed distances for 1100 of 1889
Completed distances for 1200 of 1889
Completed distances for 1300 of 1889
Completed distances for 1400 of 1889
Completed distances for 1500 of 1889
Completed distances for 1600 of 1889
Completed distances for 1700 of 1889
Completed distances for 1800 of 1889


## Exporting Data

In [18]:
## Exporting data to CSV
csv_name = f"../data/exports/granular/{datetime.now().strftime('%Y-%m-%d-%H%M%S')}_{state}.csv"
df.to_csv(csv_name, index=False)