# Efficient Yelp API Calls

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Load API Credentials
with open('/Users/ahmedfathi/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [3]:
# set our API call parameters 
LOCATION = 'NY,NY'
TERM = 'Chinese'

In [4]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_NY_chinese.json"
JSON_FILE

'Data/results_in_progress_NY_chinese.json'

In [5]:
def create_json_file(JSON_FILE,  delete_if_exists=False):

    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)

    ## If it DOES exist:
    if file_exists == True:

        ## Check if user wants to delete if exists
        if delete_if_exists==True:

            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")


    ## If it does NOT exist:
    else:

        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")

        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)

        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)

In [6]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[!] Data/results_in_progress_NY_chinese.json already exists. Deleting previous file...
[i] Data/results_in_progress_NY_chinese.json not found. Saving empty list to new file.
- 0 previous results found.


305

In [7]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/305 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [8]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,lynQoI3w_pzYfHGeuUU-Qg,shu-jiao-fu-zhou-new-york-2,Shu Jiao Fu Zhou,https://s3-media2.fl.yelpcdn.com/bphoto/Nmkgn1...,False,https://www.yelp.com/biz/shu-jiao-fu-zhou-new-...,1164,"[{'alias': 'chinese', 'title': 'Chinese'}]",4.5,"{'latitude': 40.7174109, 'longitude': -73.9920...","[delivery, pickup]",$,"{'address1': '295 Grand St', 'address2': None,...",12126252532,(212) 625-2532,1590.016594
1,0CjK3esfpFcxIopebzjFxA,joes-shanghai-new-york-2,Joe's Shanghai,https://s3-media3.fl.yelpcdn.com/bphoto/uvRuBT...,False,https://www.yelp.com/biz/joes-shanghai-new-yor...,7034,"[{'alias': 'shanghainese', 'title': 'Shanghain...",4.0,"{'latitude': 40.7156608, 'longitude': -73.9967...","[delivery, pickup]",$$,"{'address1': '46 Bowery St', 'address2': '', '...",12122338888,(212) 233-8888,1763.916423
2,0dDCDQz7DrKkSvu9h2hcQw,chow-house-new-york,Chow House,https://s3-media3.fl.yelpcdn.com/bphoto/kzeCls...,False,https://www.yelp.com/biz/chow-house-new-york?a...,578,"[{'alias': 'szechuan', 'title': 'Szechuan'}, {...",4.5,"{'latitude': 40.72915, 'longitude': -74.00072}","[delivery, restaurant_reservation, pickup]",$$,"{'address1': '181 Bleecker St', 'address2': No...",12128371021,(212) 837-1021,3063.389411
3,X8ZS-dgiMIJvhwf9SaDnjw,wah-fung-no-1-new-york-2,Wah Fung No 1,https://s3-media4.fl.yelpcdn.com/bphoto/uO14Qw...,False,https://www.yelp.com/biz/wah-fung-no-1-new-yor...,1959,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",4.5,"{'latitude': 40.71735, 'longitude': -73.99457}",[delivery],$,"{'address1': '79 Chrystie St', 'address2': '',...",12129255175,(212) 925-5175,1733.960564
4,4DInnPhOyvXFbYpUdO0SMQ,antidote-brooklyn-2,Antidote,https://s3-media1.fl.yelpcdn.com/bphoto/BLX5aF...,False,https://www.yelp.com/biz/antidote-brooklyn-2?a...,294,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.5,"{'latitude': 40.714253413118925, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '66 S 2nd St', 'address2': '', 'a...",17187822585,(718) 782-2585,1412.62753


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,6kJxh1RCQykTHxWor538dA,panda-delight-brooklyn,Panda Delight,https://s3-media3.fl.yelpcdn.com/bphoto/6EBWRX...,False,https://www.yelp.com/biz/panda-delight-brookly...,44,"[{'alias': 'chinese', 'title': 'Chinese'}]",2.5,"{'latitude': 40.687602, 'longitude': -73.989601}","[delivery, pickup]",$$,"{'address1': '121 Smith St', 'address2': '', '...",17184887540,(718) 488-7540,2325.535241
996,8SyjAQ1yTbAAEQsMkElocQ,cathay-22-springfield,Cathay 22,https://s3-media1.fl.yelpcdn.com/bphoto/2l5dVn...,False,https://www.yelp.com/biz/cathay-22-springfield...,251,"[{'alias': 'chinese', 'title': 'Chinese'}, {'a...",4.0,"{'latitude': 40.687587, 'longitude': -74.3171812}",[delivery],$$,"{'address1': '124 Rt. 22 W', 'address2': '', '...",19734678688,(973) 467-8688,28575.717564
997,OpOwJYqJRgRnoBKKPYwdTQ,double-dragon-new-york-3,Double Dragon,https://s3-media4.fl.yelpcdn.com/bphoto/Veoqv_...,False,https://www.yelp.com/biz/double-dragon-new-yor...,5,"[{'alias': 'chinese', 'title': 'Chinese'}]",3.0,"{'latitude': 40.78919, 'longitude': -73.94061}","[delivery, pickup]",,"{'address1': '2037 1st Ave', 'address2': '', '...",12125346399,(212) 534-6399,9704.849522
998,wjIyCBEpCG_HRp8veiI0YQ,ootoya-times-square-new-york-2,Ootoya Times Square,https://s3-media3.fl.yelpcdn.com/bphoto/mBXdPv...,False,https://www.yelp.com/biz/ootoya-times-square-n...,1062,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 40.75487, 'longitude': -73.98599}","[delivery, pickup]",$$,"{'address1': '141 W 41st St', 'address2': '', ...",19296188835,(929) 618-8835,5362.0108
999,5dYMmwKRrgtaZ835mqK9EQ,new-china-wok-closter-2,New China Wok,https://s3-media2.fl.yelpcdn.com/bphoto/h9Z8k9...,False,https://www.yelp.com/biz/new-china-wok-closter...,47,"[{'alias': 'chinese', 'title': 'Chinese'}]",4.0,"{'latitude': 40.973607796171, 'longitude': -73...","[delivery, pickup]",$,"{'address1': '328 Harrington Ave', 'address2':...",12017848818,(201) 784-8818,29684.270183


In [9]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

1

In [11]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [12]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_NY_chinese.csv.gz', compression='gzip',index=False)