In [5]:
import json
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from datetime import timedelta,datetime,date

In [2]:
from config import G_WEB

In [3]:
#STATE listing
STATES = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
         "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
         "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
         "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
         "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
REGION_ID = "US"
#group search TERM lists
WINERY_L = ["winery","vineyard","wine+spirits","wine+garden"]
DISTILLERY_L = ["distillery","distill+spirit","distiller"]
BREWERY_L = ["brewery","brew+pub","taphouse","beer+garden"]

#combined lists
TERM_SEARCH = WINERY_L+DISTILLERY_L+BREWERY_L


In [4]:
NAME_DATA = []
LON_DATA = []
LAT_DATA = []
PLACE_ID = []
JSON_URLS = []
STATE_ABR = []
PLACE_SEARCH = []
EST_DATA = []
SEARCHED = []
TIME_SEC = []
# set up a parameters dictionary

# base url
BASE_URL = "https://maps.googleapis.com/maps/api/place/textsearch/json?"
COUNTER = 0
for TERM in TERM_SEARCH:
    START_TIME = time.time()
    print("Query term(s): "+TERM)
    for STATE in STATES:
        QRY = str(TERM + "+in+" + STATE)
        PARAMS = {"key": G_WEB,"query": QRY,"region":REGION_ID}
        RESPONSE = requests.get(BASE_URL, params = PARAMS)
        PLACE_INFO = RESPONSE.json()
        for res in PLACE_INFO["results"]:
            NAME_DATA.append(res["name"])
            LON_DATA.append(res["geometry"]["location"]["lng"])
            LAT_DATA.append(res["geometry"]["location"]["lat"])
            PLACE_ID.append(res["place_id"])
            STATE_ABR.append(STATE)
            PLACE_SEARCH.append(TERM)
            EST_DATA.append(TERM)
            JSON_URLS.append(RESPONSE.url)
        #time.sleep(1.5)
    END_TIME = round(time.time()-START_TIME,3)
    COUNTER = COUNTER + 1
    print("        API DATA RETRIEVAL COMPLETE for search term: %s. (elapsed time: %s seconds)" %((TERM), (END_TIME)))
    print("        Total of %s term(s) completed out of %s" %(COUNTER, len(TERM_SEARCH)))
    buffer="-"*COUNTER
    print("        Percentage complete: {0:.1%}".format(round(COUNTER/len(TERM_SEARCH),3)))
    print("        "+buffer)
    SEARCHED.append(TERM)
    TIME_SEC.append(END_TIME)

Query term(s): winery
        API DATA RETRIEVAL COMPLETE for search term: winery. (elapsed time: 40.672 seconds)
        Total of 1 term(s) completed out of 11
        Percentage complete: 9.1%
        -
Query term(s): vineyard
        API DATA RETRIEVAL COMPLETE for search term: vineyard. (elapsed time: 42.716 seconds)
        Total of 2 term(s) completed out of 11
        Percentage complete: 18.2%
        --
Query term(s): wine+spirits
        API DATA RETRIEVAL COMPLETE for search term: wine+spirits. (elapsed time: 41.092 seconds)
        Total of 3 term(s) completed out of 11
        Percentage complete: 27.3%
        ---
Query term(s): wine+garden
        API DATA RETRIEVAL COMPLETE for search term: wine+garden. (elapsed time: 35.918 seconds)
        Total of 4 term(s) completed out of 11
        Percentage complete: 36.4%
        ----
Query term(s): distillery
        API DATA RETRIEVAL COMPLETE for search term: distillery. (elapsed time: 33.575 seconds)
        Total of 5 term

In [6]:
print("Total number of records retrieved: {:,.0f}".format(len(PLACE_ID)))

Total number of records retrieved: 8,729


In [7]:
DATA_OUTPUT = pd.DataFrame(np.column_stack([PLACE_SEARCH,NAME_DATA,STATE_ABR,LON_DATA,LAT_DATA,PLACE_ID,EST_DATA,JSON_URLS]),
                       columns = ["query","name","state","lon","lat","place_id","est","json_url"])


In [8]:
PERF_DF = pd.DataFrame(np.column_stack([SEARCHED,TIME_SEC]),columns = ["query","time_in_sec"])

In [9]:
DATA_OUTPUT = DATA_OUTPUT.drop_duplicates(subset="place_id", keep = "last", inplace = False)
print("Total number of unique records retrieved: {:,.0f}".format(len(DATA_OUTPUT)))

Total number of unique records retrieved: 5,726


In [10]:
START_TIME = time.time()
for i in range(DATA_OUTPUT.est.count()):
    for WL in WINERY_L:
        DATA_OUTPUT.est.i = DATA_OUTPUT.est.replace(
        to_replace = WL,
        value = "winery",
        inplace = True
        )
    for BL in BREWERY_L: 
        DATA_OUTPUT.est.i = DATA_OUTPUT.est.replace(
        to_replace = BL,
        value = "brewery",
        inplace = True
        )
    for DL in DISTILLERY_L:
        DATA_OUTPUT.est.i = DATA_OUTPUT.est.replace(
        to_replace = DL,
        value = "distillery",
        inplace = True
        )
print("elapsed time: %s seconds)" %(round(time.time()-START_TIME,3)))        

elapsed time: 26.353 seconds)


In [11]:
DATA_OUTPUT.set_index("place_id").to_csv("google_sample.csv")

In [12]:
PERF_DF.to_csv("performance_report.csv")