In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Install tmdbsimple (only need to run once)
!pip install tqdm

Defaulting to user installation because normal site-packages is not writeable


You should consider upgrading via the 'C:\Program Files\Python310\python.exe -m pip install --upgrade pip' command.


In [3]:
# Load API Credentials
with open('C:/Users/joshu/.secret/yelp_api.json') as f: 
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x22ecc929eb0>

In [4]:
# set API call 
LOCATION = 'San Diego, CA, 92154'
TERM = 'Ramen'

In [5]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = f"Data/results_in_progress_ramen.json"
JSON_FILE

'Data/results_in_progress_ramen.json'

In [6]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_ramen.json not found. Saving empty list to file.


In [7]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [8]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [9]:
## total results
total_results = results['total']
total_results

538

In [10]:
## 20 per page
results_per_page = len(results['businesses'])
results_per_page

20

In [11]:
# Import additional packages for controlling our loop
import time, math
# round up for the total number of pages of results
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

27

In [12]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [13]:
from tqdm.notebook import tqdm_notebook
import time
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 

  0%|          | 0/27 [00:00<?, ?it/s]

In [14]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/27 [00:00<?, ?it/s]

In [15]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,ninJL2knw22u47LFYsYFOw,okama-chula-vista,Okama,https://s3-media2.fl.yelpcdn.com/bphoto/FFHSS3...,False,https://www.yelp.com/biz/okama-chula-vista?adj...,136,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.62000720019226, 'longitude': -...",[],$$,"{'address1': '1620 Millenia Ave', 'address2': ...",16199476988,(619) 947-6988,6389.328211
1,jchAs_r0UQlrvw43mm5EcA,izakaya-naruto-chula-vista,Izakaya Naruto,https://s3-media3.fl.yelpcdn.com/bphoto/CLO81W...,False,https://www.yelp.com/biz/izakaya-naruto-chula-...,889,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.6094521359027, 'longitude': -1...",[pickup],$$,"{'address1': '1216 3rd Ave', 'address2': None,...",16198822880,(619) 882-2880,8568.806263
2,X3DgkjWqQWDezoSIVvgArw,menya-ultra-san-diego-san-diego,Menya Ultra - San Diego,https://s3-media2.fl.yelpcdn.com/bphoto/eMolMO...,False,https://www.yelp.com/biz/menya-ultra-san-diego...,1898,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.5,"{'latitude': 32.83231, 'longitude': -117.14709}","[delivery, pickup]",$$,"{'address1': '8199 Clairemont Mesa Blvd', 'add...",18585712010,(858) 571-2010,32981.167095
3,uJbBxJnGhXkAXtLCmxrszg,menya-ultra-mira-mesa-san-diego,Menya Ultra - Mira Mesa,https://s3-media2.fl.yelpcdn.com/bphoto/zMTCp9...,False,https://www.yelp.com/biz/menya-ultra-mira-mesa...,934,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.0,"{'latitude': 32.9112446910774, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '8141 Mira Mesa Blvd', 'address2'...",18583972247,(858) 397-2247,41041.316122
4,WNwwWXy9n-BlD_yxWgugQg,yoshi-ramen-tijuana,Yoshi Ramen,https://s3-media1.fl.yelpcdn.com/bphoto/1Xpph_...,False,https://www.yelp.com/biz/yoshi-ramen-tijuana?a...,22,"[{'alias': 'ramen', 'title': 'Ramen'}]",4.5,"{'latitude': 32.533723, 'longitude': -117.036719}",[],$$,"{'address1': 'Av. Revolución 942', 'address2':...",526642101886,+52 664 210 1886,5539.017023


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
533,cXcy4BuADVL5w8VfwS4k3Q,7-eleven-san-diego-30,7-Eleven,https://s3-media2.fl.yelpcdn.com/bphoto/TguW_o...,False,https://www.yelp.com/biz/7-eleven-san-diego-30...,26,"[{'alias': 'convenience', 'title': 'Convenienc...",2.5,"{'latitude': 32.7737573, 'longitude': -117.175...","[delivery, pickup]",$,"{'address1': '6615 Linda Vista Rd', 'address2'...",18582790994,(858) 279-0994,28732.785175
534,c-xPzvQdr-YJZ7zETMPhZQ,iceskimo-san-diego-4,Iceskimo,https://s3-media1.fl.yelpcdn.com/bphoto/MHvloN...,False,https://www.yelp.com/biz/iceskimo-san-diego-4?...,1033,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 32.8239, 'longitude': -117.15433}",[delivery],$$,"{'address1': '4609 Convoy St', 'address2': 'St...",18582161111,(858) 216-1111,32432.822497
535,DLpuHffsjIEGHReRg7HoHg,7-eleven-san-diego-34,7-Eleven,https://s3-media1.fl.yelpcdn.com/bphoto/seyRmI...,False,https://www.yelp.com/biz/7-eleven-san-diego-34...,35,"[{'alias': 'convenience', 'title': 'Convenienc...",3.5,"{'latitude': 32.83094, 'longitude': -117.13158}","[delivery, pickup]",$,"{'address1': '9187 Clairemont Mesa Blvd', 'add...",18588746629,(858) 874-6629,32175.932517
536,ey_7Xz0sMV2H6SrJOdSm9Q,veggie-grill-san-diego-5,Veggie Grill,https://s3-media3.fl.yelpcdn.com/bphoto/KpFfsD...,False,https://www.yelp.com/biz/veggie-grill-san-dieg...,701,"[{'alias': 'vegan', 'title': 'Vegan'}, {'alias...",4.0,"{'latitude': 32.869839765251676, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '4353 La Jolla Village Dr', 'addr...",18584580031,(858) 458-0031,39534.56285
537,aaY_sdMzhfC6W3x1siXNRQ,starbucks-san-diego-43,Starbucks,https://s3-media3.fl.yelpcdn.com/bphoto/8h7JIC...,False,https://www.yelp.com/biz/starbucks-san-diego-4...,200,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",3.0,"{'latitude': 32.830471, 'longitude': -117.153743}",[delivery],$$,"{'address1': '4898 Convoy St', 'address2': 'St...",18585600337,(858) 560-0337,33040.486378


In [16]:
# check for duplicate IDs
final_df.duplicated(subset='id').sum()

0

In [17]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_ramen.csv.gz', compression='gzip',index=False)