In [1]:
import json
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from datetime import timedelta,datetime,date

In [2]:
from config import g_web

In [3]:
#state listing
states= ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
         "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
         "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
         "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
         "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
region_id = "US"
#group search term lists
winery_l=["winery","vineyard","wine+spirits","wine+garden"]
distillery_l=["distillery","distill+spirit","distiller"]
brewery_l=["brewery","brew+pub","taphouse","beer+garden"]

#combined lists
term_search= winery_l+distillery_l+brewery_l


In [4]:
name_data=[]
lon_data=[]
lat_data=[]
place_id=[]
json_urls=[]
rating=[]
state_abr=[]
place_search=[]
est_data=[]
searched=[]
time_sec=[]
# set up a parameters dictionary

# base url
base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json?"
counter=0
for term in term_search:
    start_time=time.time()
    print("Query terms: "+term)
    for state in states:
        qry=str(term+"+in+"+state)
        params = {"key": g_web,"query": qry,"region":region_id}
        response = requests.get(base_url, params=params)
        place_info=response.json()
        for res in place_info["results"]:
            name_data.append(res["name"])
            lon_data.append(res["geometry"]["location"]["lng"])
            lat_data.append(res["geometry"]["location"]["lat"])
            place_id.append(res["place_id"])
            state_abr.append(state)
            place_search.append(term)
            est_data.append(term)
            json_urls.append(response.url)
        #time.sleep(1.5)
    end_time=round(time.time()-start_time,3)
    counter=counter+1
    print("        API DATA RETRIEVAL COMPLETE for search term: %s. (elapsed time: %s seconds)" %((term), (end_time)))
    print("        Total of %s term(s) completed out of %s" %(counter, len(term_search)))
    buffer="-"*counter
    print("        Percentage complete: {0:.1%}".format(round(counter/len(term_search),3)))
    print("        "+buffer)
    searched.append(term)
    time_sec.append(end_time)

Query terms: winery
        API DATA RETRIEVAL COMPLETE for search term: winery. (elapsed time: 41.997 seconds)
        Total of 1 term(s) completed out of 11
        Percentage complete: 9.1%
        -
Query terms: vineyard
        API DATA RETRIEVAL COMPLETE for search term: vineyard. (elapsed time: 40.162 seconds)
        Total of 2 term(s) completed out of 11
        Percentage complete: 18.2%
        --
Query terms: wine+spirits
        API DATA RETRIEVAL COMPLETE for search term: wine+spirits. (elapsed time: 47.543 seconds)
        Total of 3 term(s) completed out of 11
        Percentage complete: 27.3%
        ---
Query terms: wine+garden
        API DATA RETRIEVAL COMPLETE for search term: wine+garden. (elapsed time: 34.595 seconds)
        Total of 4 term(s) completed out of 11
        Percentage complete: 36.4%
        ----
Query terms: distillery
        API DATA RETRIEVAL COMPLETE for search term: distillery. (elapsed time: 34.903 seconds)
        Total of 5 term(s) comple

In [5]:
print("Total number of records retrieved: {:,.0f}".format(len(place_id)))

Total number of records retrieved: 8,636


In [7]:
data_output=pd.DataFrame(np.column_stack([place_search,name_data,state_abr,lon_data,lat_data,place_id,est_data,json_urls]),
                       columns=["query","name","state","lon","lat","place_id","est","json_url"])


In [8]:
performance_df=pd.DataFrame(np.column_stack([searched,time_sec]),columns=["query","time_in_sec"])

In [9]:
data_output=data_output.drop_duplicates(subset="place_id", keep="last", inplace=False)
print("Total number of unique records retrieved: {:,.0f}".format(len(data_output)))

Total number of unique records retrieved: 5,743


In [10]:
start_time=time.time()
for i in range(data_output.est.count()):
    for wl in winery_l:
        data_output.est.i= data_output.est.replace(
        to_replace=wl,
        value="winery",
        inplace=True
        )
    for bl in brewery_l: 
        data_output.est.i= data_output.est.replace(
        to_replace=bl,
        value="brewery",
        inplace=True
        )
    for dl in distillery_l:
        data_output.est.i= data_output.est.replace(
        to_replace=dl,
        value="distillery",
        inplace=True
        )
print("elapsed time: %s seconds)" %(round(time.time()-start_time,3)))        

elapsed time: 40.897 seconds)


In [11]:
data_output.set_index("place_id").to_csv("google_sample.csv")

In [12]:
performance_df.to_csv("performance_report.csv")