In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Load API Credentials
with open('/Users/robo1/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [3]:
# set our API call parameters and filename before the first call
LOCATION = 'Seattle, WA,98101'
TERM = 'Pizza'

In [4]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = f"Data/results_in_progress_Pizza.json"
JSON_FILE

'Data/results_in_progress_Pizza.json'

In [5]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_Pizza.json not found. Saving empty list to file.


In [6]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [7]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [8]:
## How many results total?
total_results = results['total']
total_results

351

In [9]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [10]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

18

In [11]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [12]:
from tqdm.notebook import tqdm_notebook
import time
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 

  0%|          | 0/18 [00:00<?, ?it/s]

In [13]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/18 [00:00<?, ?it/s]

In [14]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,-FOAQv22SXtSBs7nptI3UA,serious-pie-downtown-seattle-2,Serious Pie Downtown,https://s3-media2.fl.yelpcdn.com/bphoto/dy0pJ5...,False,https://www.yelp.com/biz/serious-pie-downtown-...,4357,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 47.61285776946637, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '2001 4th Ave', 'address2': None,...",12068387388,(206) 838-7388,516.760327
1,1rkgyfwzXG7qSdVZdfRt7g,roccos-seattle,Rocco's,https://s3-media3.fl.yelpcdn.com/bphoto/pf6jTK...,False,https://www.yelp.com/biz/roccos-seattle?adjust...,1669,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,"{'latitude': 47.614470434736106, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '2312 2nd Ave', 'address2': '', '...",12063974210,(206) 397-4210,961.965681
2,6bfJ_M7lQvwwmgE6RtTeog,slices-seattle-4,Slices,https://s3-media1.fl.yelpcdn.com/bphoto/GevkHx...,False,https://www.yelp.com/biz/slices-seattle-4?adju...,28,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 47.61407, 'longitude': -122.33348}","[delivery, pickup]",,"{'address1': '809 Olive Way', 'address2': '', ...",12062236150,(206) 223-6150,304.32539
3,ugTsEtjvwRhteac_6JcuPw,italian-family-pizza-seattle,Italian Family Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/SgiSrJ...,False,https://www.yelp.com/biz/italian-family-pizza-...,967,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 47.60937, 'longitude': -122.32546}","[delivery, pickup]",$$,"{'address1': '1028 Madison St', 'address2': No...",12065380040,(206) 538-0040,680.21772
4,M9xzvwgK58T0w7wvXedvuQ,hot-mamas-pizza-seattle,Hot Mama's Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/pcUxq_...,False,https://www.yelp.com/biz/hot-mamas-pizza-seatt...,920,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 47.615379179632, 'longitude': -12...",[delivery],$,"{'address1': '700 E Pine St', 'address2': '', ...",12063226444,(206) 322-6444,922.436262


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
346,hq6gIALemQa6qLANEG1vYQ,racha-thai-and-asian-kitchen-seattle,Racha Thai & Asian Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/_5ZVIa...,False,https://www.yelp.com/biz/racha-thai-and-asian-...,538,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",3.5,"{'latitude': 47.6249992, 'longitude': -122.356...","[delivery, pickup]",$$,"{'address1': '12 Mercer St', 'address2': '', '...",12062818883,(206) 281-8883,2256.82847
347,rBpjilAA2EsFm8XLbJvLrQ,subway-seattle-144,Subway,https://s3-media2.fl.yelpcdn.com/bphoto/hhaKlJ...,False,https://www.yelp.com/biz/subway-seattle-144?ad...,19,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",2.5,"{'latitude': 47.62177319441, 'longitude': -122...","[delivery, pickup]",$,"{'address1': '345 15th Ave E', 'address2': 'St...",12063231880,(206) 323-1880,1960.112053
348,paBhY3J17m4k7MqFpslJ4Q,subway-seattle-129,Subway,https://s3-media3.fl.yelpcdn.com/bphoto/ua1Vc0...,False,https://www.yelp.com/biz/subway-seattle-129?ad...,36,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",2.5,"{'latitude': 47.6045761942765, 'longitude': -1...","[delivery, pickup]",$,"{'address1': '806 3rd Ave', 'address2': '', 'a...",12066254342,(206) 625-4342,768.918695
349,IySZf6Hi1DqGHf0PhAtZng,7-eleven-seattle-28,7-Eleven,https://s3-media1.fl.yelpcdn.com/bphoto/diW2Xw...,False,https://www.yelp.com/biz/7-eleven-seattle-28?a...,17,"[{'alias': 'convenience', 'title': 'Convenienc...",3.0,"{'latitude': 47.618686570354, 'longitude': -12...","[delivery, pickup]",$,"{'address1': '103 15th Ave E', 'address2': '',...",12063238120,(206) 323-8120,1762.365886
350,nEZ8Xgkqj2OjQWw0qFpj7A,cherry-street-coffee-house-seattle-2,Cherry Street Coffee House,https://s3-media3.fl.yelpcdn.com/bphoto/vdgyWB...,False,https://www.yelp.com/biz/cherry-street-coffee-...,164,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",3.5,"{'latitude': 47.6161899, 'longitude': -122.351...",[delivery],$,"{'address1': '2719 1st Ave', 'address2': None,...",12064415489,(206) 441-5489,1418.588219


In [15]:
# check for duplicate IDs
final_df.duplicated(subset='id').sum()

0

In [16]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_seattle_pizza.csv.gz', compression='gzip',index=False)