# Efficient Yelp API Calls 
- Christina Reeder
- 2 Mar 2023

In [7]:
!pip install yelpapi

Collecting yelpapi
  Downloading yelpapi-2.5.0-py3-none-any.whl (7.4 kB)
Installing collected packages: yelpapi
Successfully installed yelpapi-2.5.0


In [22]:
import json, os, math, time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook 

## create_json_file Function

In [41]:
# function to create new json file
def create_json_file(JSON_FILE, delete_if_exists=False):
    # check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    # if it DOES exist
    if file_exists == True:
        # check if user wants to delete it
        if delete_if_exists == True:
            print(f'[!] {JSON_FILE} already exists. Deleting previous file...')
            # delete file and confirm it no longer exists
            os.remove(JSON_FILE)
            # recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists=False)
        else:
            print(f'[i] {JSON_FILE} already exists')
            
            
    # if it DOES NOT exist
    else:
        # inform user and save empty list
        print(f'[i] {JSON_FILE} not found. Saving empty list to new file')
        
        # create any needed folders
        # get folder name only
        folder = os.path.dirname(JSON_FILE)
        
        # if JSON_FILE included a folder
        if len(folder)>0:
            # create the folder
            os.makedirs(folder, exist_ok=True)
        # save empty list to start the json file
        with open(JSON_FILE, 'w') as f:
            json.dump([],f)

## Create and Save Yelp API Key

In [9]:
with open('C:/Users/James/Documents/DataEnrichment/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [11]:
yelp_api = YelpAPI(login['api-key'], timeout_s = 5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x20fe6be6f10>

## Using Yelp API

In [12]:
# Save search results for pizza businesses in NY, NY
search_results = yelp_api.search_query(location = 'NY, NY',
                                      term = 'Pizza')
search_results.keys()

dict_keys(['businesses', 'total', 'region'])

In [16]:
# Save buisenesses as a dataframe
biz = pd.DataFrame(search_results['businesses'])
biz.head(2)

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,zj8Lq1T8KIC5zwFief15jg,prince-street-pizza-new-york-2,Prince Street Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/I4gm7i...,False,https://www.yelp.com/biz/prince-street-pizza-n...,4791,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.72308755605564, 'longitude': -...","[delivery, pickup]",$,"{'address1': '27 Prince St', 'address2': None,...",12129664100,(212) 966-4100,2209.311618
1,ysqgdbSrezXgVwER2kQWKA,julianas-brooklyn-3,Juliana's,https://s3-media2.fl.yelpcdn.com/bphoto/NVoLFl...,False,https://www.yelp.com/biz/julianas-brooklyn-3?a...,2608,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.70274718768062, 'longitude': -...",[delivery],$$,"{'address1': '19 Old Fulton St', 'address2': '...",17185966700,(718) 596-6700,1289.857286


In [15]:
# how many businesses in our results due to pagination?
len(search_results['businesses'])

20

In [17]:
# execute new search with the second page of results
search_results = yelp_api.search_query(location='NY, NY',
                                      term = 'Pizza',
                                      offset = 20)

In [18]:
# save second page as dataframe
biz2 = pd.DataFrame(search_results['businesses'])
biz2.head(2)

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,UB1mGugr1pvgUUP1eTNXrg,stone-bridge-pizza-and-salad-new-york,Stone Bridge Pizza & Salad,https://s3-media1.fl.yelpcdn.com/bphoto/yfY3OA...,False,https://www.yelp.com/biz/stone-bridge-pizza-an...,320,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.75225, 'longitude': -73.98061}","[pickup, delivery]",$$,"{'address1': '16 E 41st St', 'address2': None,...",16467915690,(646) 791-5690,5039.358201
1,15k7iqFbhf4h8L01yggLqg,song-e-napule-new-york,Song E Napule,https://s3-media2.fl.yelpcdn.com/bphoto/y9G-nh...,False,https://www.yelp.com/biz/song-e-napule-new-yor...,673,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.72813, 'longitude': -74.00188}","[pickup, delivery]",$$,"{'address1': '146 W Houston St', 'address2': N...",12125331242,(212) 533-1242,3034.28645


In [19]:
# combine results into one dataframe
businesses = pd.concat([biz, biz2],
                      ignore_index=True)
display(businesses.head(3), businesses.tail(3))

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,zj8Lq1T8KIC5zwFief15jg,prince-street-pizza-new-york-2,Prince Street Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/I4gm7i...,False,https://www.yelp.com/biz/prince-street-pizza-n...,4791,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.72308755605564, 'longitude': -...","[delivery, pickup]",$,"{'address1': '27 Prince St', 'address2': None,...",12129664100,(212) 966-4100,2209.311618
1,ysqgdbSrezXgVwER2kQWKA,julianas-brooklyn-3,Juliana's,https://s3-media2.fl.yelpcdn.com/bphoto/NVoLFl...,False,https://www.yelp.com/biz/julianas-brooklyn-3?a...,2608,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.70274718768062, 'longitude': -...",[delivery],$$,"{'address1': '19 Old Fulton St', 'address2': '...",17185966700,(718) 596-6700,1289.857286
2,WG639VkTjmK5dzydd1BBJA,rubirosa-new-york-2,Rubirosa,https://s3-media3.fl.yelpcdn.com/bphoto/F65qqO...,False,https://www.yelp.com/biz/rubirosa-new-york-2?a...,2920,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 40.722766, 'longitude': -73.996233}",[pickup],$$,"{'address1': '235 Mulberry St', 'address2': ''...",12129650500,(212) 965-0500,2268.49195


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
37,6BC5LQI4Cg3KChkgigo9sA,lo-duca-pizza-brooklyn,Lo Duca Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/zHuQza...,False,https://www.yelp.com/biz/lo-duca-pizza-brookly...,151,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.6349400235262, 'longitude': -7...",[delivery],$,"{'address1': '14 Newkirk Plz', 'address2': '',...",17188591501.0,(718) 859-1501,8131.724136
38,AHDlsNh8fODK9miC3zv3Zg,unregular-pizza-new-york-2,Unregular Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/FiM-ke...,False,https://www.yelp.com/biz/unregular-pizza-new-y...,183,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.73398, 'longitude': -73.98961}","[pickup, delivery]",$$,"{'address1': '135 4th Ave', 'address2': None, ...",16466094699.0,(646) 609-4699,3094.834457
39,aP1Nwq1-7j0Y7hr7oavY2A,upside-pizza-new-york-2,Upside Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/RWcIDI...,False,https://www.yelp.com/biz/upside-pizza-new-york...,118,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.722115, 'longitude': -73.996407}","[pickup, delivery]",,"{'address1': '51 Spring St', 'address2': '', '...",,,2221.489967


In [21]:
# save first two pages of businesses as a records-oriented JSON file
businesses.to_json(orient='records');

## Efficient Yelp API Calls

In [29]:
# set API call parameters
LOCATION = 'WA'
TERM = 'Asian'

In [24]:
# create results-in-progress JSON file, only if it doesn't exist
JSON_FILE = "Data/results_in_progress_WA_asian.json"
JSON_FILE

'Data/results_in_progress_WA_asian.json'

In [42]:
# create new empty json file
create_json_file(JSON_FILE, delete_if_exists=True)
# load previous results and use len of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
# set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

# use yelp_api variable's search_query to perform API call
results = yelp_api.search_query(location=LOCATION,
                               term=TERM,
                               offset=n_results)
# how many results total?
total_results = results['total']
# how many results do we have details for?
results_per_page = len(results['businesses'])
# use math to find total number of pages of results
n_pages = math.ceil((results['total']-n_results) / results_per_page)
n_pages

[!] Data/results_in_progress_WA_asian.json already exists. Deleting previous file...
[i] Data/results_in_progress_WA_asian.json not found. Saving empty list to new file
- 0 previous results found.


200

In [43]:
# create progress bar
for i in tqdm_notebook(range(1, n_pages+1)):
    # read results in progress file and check length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    # save number of results for use as offset
    n_results = len(previous_results)
    
    # check that the number of results doesn't exceed 1,000
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
        
    # use n_results as the offset
    results = yelp_api.search_query(location=LOCATION,
                                   term=TERM,
                                   offset=n_results)
    
    # append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display previous results
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results,f)
    
    # add 200ms delay
    time.sleep(.2)

  0%|          | 0/200 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [44]:
# convert json file to dataframe
# load final results
df = pd.read_json(JSON_FILE)
display(df.head(), df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,h1Q0Wkx5TUUZeVjJrXTwmQ,chan-seattle-seattle-2,Chan Seattle,https://s3-media2.fl.yelpcdn.com/bphoto/C5Iry3...,False,https://www.yelp.com/biz/chan-seattle-seattle-...,960,"[{'alias': 'asianfusion', 'title': 'Asian Fusi...",4.5,"{'latitude': 47.61312322818342, 'longitude': -...",[delivery],$$,"{'address1': '724 Pine St', 'address2': '', 'a...",14256582626,(425) 658-2626,287.367244
1,UzL8_jvtznfsFDprG-O1UA,biang-biang-noodles-seattle-2,Biang Biang Noodles,https://s3-media1.fl.yelpcdn.com/bphoto/JYa1Xr...,False,https://www.yelp.com/biz/biang-biang-noodles-s...,860,"[{'alias': 'noodles', 'title': 'Noodles'}, {'a...",4.5,"{'latitude': 47.613937, 'longitude': -122.324239}","[delivery, pickup]",$$,"{'address1': '601 E Pike St', 'address2': 'Uni...",12068098999,(206) 809-8999,442.912496
2,rXbU5HJx6mihqazytPTgXA,kedai-makan-seattle-4,Kedai Makan,https://s3-media2.fl.yelpcdn.com/bphoto/y1AOX-...,False,https://www.yelp.com/biz/kedai-makan-seattle-4...,1137,"[{'alias': 'malaysian', 'title': 'Malaysian'},...",4.0,"{'latitude': 47.615109, 'longitude': -122.313107}","[restaurant_reservation, delivery]",$$,"{'address1': '1449 E Pine St', 'address2': '',...",12065562560,(206) 556-2560,1278.101201
3,ZwMgLdqgYDWTztDy3fzGpg,asean-streat-food-hall-seattle,Asean Streat Food Hall,https://s3-media1.fl.yelpcdn.com/bphoto/IeRK5z...,False,https://www.yelp.com/biz/asean-streat-food-hal...,94,"[{'alias': 'malaysian', 'title': 'Malaysian'},...",3.5,"{'latitude': 47.61178538051406, 'longitude': -...",[],$$,"{'address1': '400 Pine St', 'address2': 'Ste 1...",12066952597,(206) 695-2597,629.952334
4,s1KxyLIx8u4ltDavPJgm_g,dough-zone-seattle-downtown-pine-st-seattle-2,Dough Zone - Seattle Downtown Pine St.,https://s3-media2.fl.yelpcdn.com/bphoto/1jKV_t...,False,https://www.yelp.com/biz/dough-zone-seattle-do...,947,"[{'alias': 'shanghainese', 'title': 'Shanghain...",4.0,"{'latitude': 47.613314048958955, 'longitude': ...",[delivery],$$,"{'address1': '815 Pine St', 'address2': '', 'a...",12066826666,(206) 682-6666,196.568883


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,cLx4I2byob84QnBX7OR6tg,king-donuts-seattle,King Donuts,https://s3-media1.fl.yelpcdn.com/bphoto/ceYbiU...,False,https://www.yelp.com/biz/king-donuts-seattle?a...,206,"[{'alias': 'donuts', 'title': 'Donuts'}, {'ali...",4.0,"{'latitude': 47.531918943888115, 'longitude': ...",[delivery],$,"{'address1': '7820 Rainier Ave S', 'address2':...",12067213103,(206) 721-3103,10224.318544
996,SOq4rH48-gsVYxkICLvTCQ,lyons-grocery-seattle,Lyon's Grocery,https://s3-media2.fl.yelpcdn.com/bphoto/cXPYOP...,False,https://www.yelp.com/biz/lyons-grocery-seattle...,105,"[{'alias': 'grocery', 'title': 'Grocery'}, {'a...",4.5,"{'latitude': 47.63787, 'longitude': -122.34314}","[pickup, delivery]",$$$,"{'address1': '2100 Dexter Ave N', 'address2': ...",12062841410,(206) 284-1410,2792.474128
997,v2oKARv9kd0LDlN98-G_VQ,village-sushi-seattle-2,Village Sushi,https://s3-media3.fl.yelpcdn.com/bphoto/Y5nzmH...,False,https://www.yelp.com/biz/village-sushi-seattle...,127,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 47.66684, 'longitude': -122.31325}",[delivery],$$,"{'address1': '5211 University Way NE', 'addres...",12069856870,(206) 985-6870,5972.682815
998,s5QQ1vOJQ6tDNmtAioOICA,kobe-restaurant-bellevue,Kobe Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/yVZtcU...,False,https://www.yelp.com/biz/kobe-restaurant-belle...,396,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",3.5,"{'latitude': 47.6180813476017, 'longitude': -1...","[pickup, delivery]",$$,"{'address1': '850 110th Ave NE', 'address2': '...",14254513888,(425) 451-3888,10247.216558
999,JMD4_XypuSKvsI1bfIko9w,samurai-noodle-seattle,Samurai Noodle,https://s3-media3.fl.yelpcdn.com/bphoto/mVxiSF...,False,https://www.yelp.com/biz/samurai-noodle-seattl...,841,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",3.5,"{'latitude': 47.59726, 'longitude': -122.3275}","[pickup, delivery]",$,"{'address1': '606 5th Ave S', 'address2': '', ...",12066249321,(206) 624-9321,1952.324557


In [45]:
# check for duplicate id's 
df.duplicated(subset='id').sum()

0

In [46]:
# save dataframe as .csv
df.to_csv('Data/final_results_WA_asian.csv.gz', compression='gzip', index=False)