In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook



In [4]:
# Load API Credentials
with open('/Users/dvisi/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)



In [5]:
# setting the API call parameters

LOCATION = 'Tempe, AZ'
TERM = 'Ramen'

In [6]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_tempe_ramen.json"
JSON_FILE



'Data/results_in_progress_tempe_ramen.json'

In [7]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")


[i] Data/results_in_progress_tempe_ramen.json not found. Saving empty list to file.


In [8]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')



- 0 previous results found.


In [9]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()



dict_keys(['businesses', 'total', 'region'])

In [10]:
## How many results total?
total_results = results['total']
total_results



249

In [11]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page



20

In [12]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



13

In [13]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)


In [14]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)




  0%|          | 0/13 [00:00<?, ?it/s]

In [15]:
## delete file and confirm it no longer exits.
os.remove(JSON_FILE)
os.path.isfile(JSON_FILE)



False

In [16]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  



In [17]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



[i] Data/results_in_progress_tempe_ramen.json not found. Saving empty list to new file.
- 0 previous results found.


13

In [18]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)



  0%|          | 0/13 [00:00<?, ?it/s]

In [19]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,wa8QgXQu1ZxwPgdRl9lYlg,tampopo-ramen-tempe,Tampopo Ramen,https://s3-media2.fl.yelpcdn.com/bphoto/fzo8dP...,False,https://www.yelp.com/biz/tampopo-ramen-tempe?a...,855,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.0,"{'latitude': 33.3938104, 'longitude': -111.908...",[delivery],$$,"{'address1': '3223 S McClintock Dr', 'address2...",14804912177,(480) 491-2177,2372.41745
1,WAWWO9PCDTDiXwkKdkw5yA,ramen-dozo-tempe,Ramen Dozo,https://s3-media2.fl.yelpcdn.com/bphoto/wZxMVg...,False,https://www.yelp.com/biz/ramen-dozo-tempe?adju...,281,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.5,"{'latitude': 33.3916792, 'longitude': -111.908...",[delivery],$$,"{'address1': '3415 S McClintock Dr', 'address2...",14806868086,(480) 686-8086,2357.803728
2,5lZpyuWjC7L9uJfLfqIQ_w,hachi-ramen-tempe-2,Hachi Ramen,https://s3-media1.fl.yelpcdn.com/bphoto/18BcFp...,False,https://www.yelp.com/biz/hachi-ramen-tempe-2?a...,521,"[{'alias': 'ramen', 'title': 'Ramen'}]",4.0,"{'latitude': 33.33294, 'longitude': -111.947758}","[delivery, pickup]",$$,"{'address1': '655 W Warner Rd', 'address2': 'S...",14807812603,(480) 781-2603,6370.563911
3,aveZX9jpacMknrg50MuWeQ,azusa-ramen-tempe,Azusa Ramen,https://s3-media1.fl.yelpcdn.com/bphoto/KqlfTc...,False,https://www.yelp.com/biz/azusa-ramen-tempe?adj...,175,"[{'alias': 'ramen', 'title': 'Ramen'}]",4.0,"{'latitude': 33.3948781, 'longitude': -111.940...","[delivery, pickup]",$$,"{'address1': '3128 South Mill Ave', 'address2'...",14805905641,(480) 590-5641,940.131105
4,42eixGgo0CM_fXfD17NDGQ,ramen-time-tempe,Ramen Time,https://s3-media2.fl.yelpcdn.com/bphoto/xUARHj...,False,https://www.yelp.com/biz/ramen-time-tempe?adju...,214,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.5,"{'latitude': 33.4503486, 'longitude': -111.926...","[delivery, pickup]",$,"{'address1': '1857 N Scottsdale Rd', 'address2...",14809944888,(480) 994-4888,6924.620928


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
244,hLMXjL3rHblsw0lwIgiIYQ,7-eleven-scottsdale-2,7-Eleven,https://s3-media3.fl.yelpcdn.com/bphoto/NguET-...,False,https://www.yelp.com/biz/7-eleven-scottsdale-2...,1,"[{'alias': 'convenience', 'title': 'Convenienc...",2.0,"{'latitude': 33.4663571096898, 'longitude': -1...","[pickup, delivery]",$,"{'address1': '8402 E Mcdowell Rd', 'address2':...",14809499534,(480) 949-9534,9148.190537
245,jOMEYEJPjoGez8o4HliG_g,dominos-pizza-scottsdale-11,Domino's Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/3zX6Nt...,False,https://www.yelp.com/biz/dominos-pizza-scottsd...,86,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",2.0,"{'latitude': 33.4805337, 'longitude': -111.935...",[],$,"{'address1': '2902 N 68th St', 'address2': Non...",16029529300,(602) 952-9300,10181.619187
246,1I8XRD2bf2nfRgjQjgeIOQ,7-eleven-phoenix-30,7-Eleven,https://s3-media3.fl.yelpcdn.com/bphoto/KgW9Te...,False,https://www.yelp.com/biz/7-eleven-phoenix-30?a...,10,"[{'alias': 'convenience', 'title': 'Convenienc...",2.5,"{'latitude': 33.4660614, 'longitude': -111.978...","[pickup, delivery]",$,"{'address1': '4748 East Mcdowell Rd', 'address...",16022758452,(602) 275-8452,9547.813957
247,IQGuA-nLqH_WJIzIgOKrbw,don-fito-s-taqueria-gilbert,Don Fito’s Taqueria,https://s3-media2.fl.yelpcdn.com/bphoto/QBNUMK...,False,https://www.yelp.com/biz/don-fito-s-taqueria-g...,6,"[{'alias': 'tacos', 'title': 'Tacos'}, {'alias...",3.5,"{'latitude': 33.35209437972647, 'longitude': -...",[],,"{'address1': '', 'address2': None, 'address3':...",14806512976,(480) 651-2976,12063.471773
248,l2n2Pl4rDW2-wVkJVS2ipg,7-eleven-scottsdale-3,7-Eleven,https://s3-media4.fl.yelpcdn.com/bphoto/un57Vs...,False,https://www.yelp.com/biz/7-eleven-scottsdale-3...,9,"[{'alias': 'convenience', 'title': 'Convenienc...",2.0,"{'latitude': 33.5243014, 'longitude': -111.899...","[pickup, delivery]",$,"{'address1': '8410 East Mcdonald', 'address2':...",14809487363,(480) 948-7363,15370.805513


In [21]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()


1

In [22]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()


0

In [23]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_NY_pizza.csv.gz', compression='gzip',index=False)

