In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Load API Credentials
with open('/Users/yupfj/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [10]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists=False)
        else:
            print(f"[!] {JSON_FILE} already exists.")            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[!] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)

        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  

In [11]:
# set our API call parameters 
LOCATION = 'NY, NY'
TERM = 'Thai'
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = "Data/results_in_progress_NY_Thai.json"

In [12]:
## Create a new empty json file (delet the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
n_results = 0
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[!] Data/results_in_progress_NY_Thai.json already exists. Deleting previous file...
[!] Data/results_in_progress_NY_Thai.json not found. Saving empty list to new file.


145

In [13]:
for i in tqdm_notebook(range(n_pages)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/145 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [14]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,jjJc_CrkB2HodEinB6cWww,lovemama-new-york,LoveMama,https://s3-media1.fl.yelpcdn.com/bphoto/bLlFKT...,False,https://www.yelp.com/biz/lovemama-new-york?adj...,6540,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.730408722512074, 'longitude': ...","[restaurant_reservation, delivery, pickup]",$$,"{'address1': '174 2nd Ave', 'address2': '', 'a...",12122545370,(212) 254-5370,2859.902795
1,-XYp6w50XbZfS90YddS5ew,soothr-new-york-2,Soothr,https://s3-media2.fl.yelpcdn.com/bphoto/HxjVE7...,False,https://www.yelp.com/biz/soothr-new-york-2?adj...,1124,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.732259, 'longitude': -73.987363}","[delivery, pickup]",$$,"{'address1': '204 E 13th St', 'address2': '', ...",12128449789,(212) 844-9789,3043.263183
2,B3_K2kUVbYOU0VaLcj_LTw,thai-villa-new-york-2,Thai Villa,https://s3-media1.fl.yelpcdn.com/bphoto/IxshIB...,False,https://www.yelp.com/biz/thai-villa-new-york-2...,4812,"[{'alias': 'thai', 'title': 'Thai'}]",4.5,"{'latitude': 40.73902, 'longitude': -73.99065}","[delivery, pickup]",$$,"{'address1': '5 E 19th St', 'address2': 'G Flo...",12128029999,(212) 802-9999,3744.5704
3,0IFDnYf3bhqxJR6hVrG7Gw,top-thai-vintage-new-york-3,Top Thai Vintage,https://s3-media3.fl.yelpcdn.com/bphoto/-ZoEVV...,False,https://www.yelp.com/biz/top-thai-vintage-new-...,1106,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.729907419973344, 'longitude': ...","[restaurant_reservation, delivery, pickup]",$$,"{'address1': '55 Carmine St', 'address2': None...",16466092272,(646) 609-2272,2845.705425
4,egDEaHpDumYHzRUZ8JBU-w,pranakhon-thai-restaurant-new-york-2,Pranakhon Thai Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/XB_CUH...,False,https://www.yelp.com/biz/pranakhon-thai-restau...,285,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.73369, 'longitude': -73.99316}","[delivery, pickup]",,"{'address1': '88 University Pl', 'address2': N...",12127866789,(212) 786-6789,3144.403563


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,Qgv2vyUV-DwK8YwrhU-jtw,lan-sheng-restaurant-new-york,Lan Sheng Restaurant,https://s3-media4.fl.yelpcdn.com/bphoto/9Sm_9s...,False,https://www.yelp.com/biz/lan-sheng-restaurant-...,218,"[{'alias': 'szechuan', 'title': 'Szechuan'}]",4.0,"{'latitude': 40.7517141867087, 'longitude': -7...","[pickup, delivery]",$$,"{'address1': '128 W 36th St', 'address2': None...",12125758899,(212) 575-8899,5169.140152
996,RoXFN-T1s6moM5smbY_5Gw,mee-noodle-shop-new-york-2,Mee Noodle Shop,https://s3-media3.fl.yelpcdn.com/bphoto/CfzomA...,False,https://www.yelp.com/biz/mee-noodle-shop-new-y...,566,"[{'alias': 'noodles', 'title': 'Noodles'}, {'a...",3.0,"{'latitude': 40.7653751, 'longitude': -73.9879...","[pickup, delivery]",$$,"{'address1': '795 9th Ave', 'address2': '', 'a...",12127652929,(212) 765-2929,6685.637739
997,Clumryf360iOnwpZ2ML8aw,sushi-you-new-york,Sushi You,https://s3-media3.fl.yelpcdn.com/bphoto/1pw9KM...,False,https://www.yelp.com/biz/sushi-you-new-york?ad...,284,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 40.755555, 'longitude': -73.968736}","[pickup, delivery]",$$,"{'address1': '246 E 51st St', 'address2': None...",12127522987,(212) 752-2987,5974.0693
998,Vbmv3T7hr_owVic5BQXs5w,din-soup-dumplings-brooklyn,Din Soup Dumplings,https://s3-media2.fl.yelpcdn.com/bphoto/HMlvTY...,False,https://www.yelp.com/biz/din-soup-dumplings-br...,203,"[{'alias': 'dimsum', 'title': 'Dim Sum'}, {'al...",4.5,"{'latitude': 40.694191000000004, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '162 Montague St', 'address2': ''...",17185503888,(718) 550-3888,1257.700623
999,NU_bgtavUAmLOPpC2cjkjw,bjork-cafe-and-bistro-new-york,Bjork Cafe & Bistro,https://s3-media2.fl.yelpcdn.com/bphoto/S0lgXd...,False,https://www.yelp.com/biz/bjork-cafe-and-bistro...,14,"[{'alias': 'salad', 'title': 'Salad'}, {'alias...",4.5,"{'latitude': 40.74946490113501, 'longitude': -...",[],,"{'address1': '58 Park Ave', 'address2': '', 'a...",12127793587,(212) 779-3587,5039.839683


In [15]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

1

In [21]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
# save the final results to a compressed csv
csv_file = JSON_FILE.replace('.json', '.csv.gz')
# save the final results to a compressed csv
final_df.to_csv(csv_file, compression='gzip',index=False)