In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
with open('/Users/amnam/.secret/yelp_api.json') as f:
    login = json.load(f)

yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)


In [3]:
# set our API call parameters 
LOCATION = 'Dearborn,MI'
TERM = 'Fried Chicken'

In [4]:
JSON_FILE = "Data/results_in_progress_MI_firedchicken.json"
JSON_FILE

'Data/results_in_progress_MI_firedchicken.json'

In [5]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_MI_firedchicken.json already exists.


In [11]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    # Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    # If it DOES exist
    if file_exists == True:
        # Check if user wants to delete if exists
        if delete_if_exists==True:
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            # Delete file and confirm it no longer exists
            os.remove(JSON_FILE)
            # Recursuve call to function after old file is deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")
    # If it does NOT exist
    else:
        # INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        # CREATE ANY NEEDED FOLDERS
        # Get the FOLDER NAME only
        folder = os.path.dirname(JSON_FILE)
        # If JSON_FILE included a folder:
        if len(folder)>0:
            # Create the folder
            os.makedirs(folder,exist_ok=True)
            # Save empty list to start the JSON_FILE
            with open(JSON_FILE, 'w') as f:
                json.dump([],f)

In [12]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)

[!] Data/results_in_progress_MI_firedchicken.json already exists. Deleting previous file...
[i] Data/results_in_progress_MI_firedchicken.json not found. Saving empty list to new file.
- 0 previous results found.


In [14]:
## How many results total?
total_results = results['total']
total_results

567

In [8]:
## How many results total?
total_results = results['total']
total_results

567

In [15]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [16]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

29

In [17]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/29 [00:00<?, ?it/s]

In [18]:
 # load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,Cu3KC6x3WUFTzcz32TkbfA,ex-wifes-famous-chicken-dearborn-heights,Ex-Wife's Famous Chicken,https://s3-media4.fl.yelpcdn.com/bphoto/zuWl_g...,False,https://www.yelp.com/biz/ex-wifes-famous-chick...,48,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",4.0,"{'latitude': 42.32736, 'longitude': -83.25863}",[],$$,"{'address1': '22444 Ford Rd', 'address2': None...",13138801180,(313) 880-1180,4003.498796
1,7orZgupOKK9I7fYekdhIhA,daves-hot-chicken-dearborn,Dave's Hot Chicken,https://s3-media2.fl.yelpcdn.com/bphoto/QAKdyn...,False,https://www.yelp.com/biz/daves-hot-chicken-dea...,89,"[{'alias': 'halal', 'title': 'Halal'}, {'alias...",4.0,"{'latitude': 42.30547, 'longitude': -83.2485}","[delivery, pickup]",$$,"{'address1': '22208 Michigan Ave', 'address2':...",13133800699,(313) 380-0699,3046.090322
2,CYj6Xynv7CwUZPSYoUzCVw,mr-chicken-dearborn-heights,Mr Chicken,https://s3-media3.fl.yelpcdn.com/bphoto/tq_4J1...,False,https://www.yelp.com/biz/mr-chicken-dearborn-h...,92,"[{'alias': 'tradamerican', 'title': 'American ...",4.0,"{'latitude': 42.33103, 'longitude': -83.27218}",[delivery],$$,"{'address1': '6000 N Telegraph Rd', 'address2'...",13132770100,(313) 277-0100,5204.792531
3,H-Ek50bSn5KCBQ1_tVmQAA,gus-world-famous-fried-chicken-westland,Gus' World Famous Fried Chicken,https://s3-media4.fl.yelpcdn.com/bphoto/Lr2s0W...,False,https://www.yelp.com/biz/gus-world-famous-frie...,296,"[{'alias': 'southern', 'title': 'Southern'}, {...",4.0,"{'latitude': 42.32409, 'longitude': -83.39072}","[delivery, pickup]",$$,"{'address1': '35505 Ford Rd', 'address2': '', ...",17347286170,(734) 728-6170,14632.818763
4,ejoVEHzo2b0VNpa90uK44g,fat-daddys-hot-chicken-and-waffles-riverview,Fat Daddy's Hot Chicken & Waffles,https://s3-media1.fl.yelpcdn.com/bphoto/u_LlbE...,False,https://www.yelp.com/biz/fat-daddys-hot-chicke...,139,"[{'alias': 'waffles', 'title': 'Waffles'}, {'a...",4.5,"{'latitude': 42.174526, 'longitude': -83.188452}","[delivery, pickup]",,"{'address1': '18283 Fort St', 'address2': '', ...",17342883051,(734) 288-3051,15678.060298


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
562,RcfSRDYNtRT050paSDCnzw,leos-coney-island-riverview,Leo's Coney Island,https://s3-media4.fl.yelpcdn.com/bphoto/-g1eJ3...,False,https://www.yelp.com/biz/leos-coney-island-riv...,44,"[{'alias': 'diners', 'title': 'Diners'}]",2.5,"{'latitude': 42.168674, 'longitude': -83.189834}","[pickup, delivery]",$,"{'address1': '19230 Fort St', 'address2': '', ...",17347207160,(734) 720-7160,16310.726249
563,zmjbSJiG1x16f1NtTbgy6Q,hong-kong-one-ferndale,Hong Kong One,https://s3-media3.fl.yelpcdn.com/bphoto/fFDNDB...,False,https://www.yelp.com/biz/hong-kong-one-ferndal...,38,"[{'alias': 'cantonese', 'title': 'Cantonese'},...",3.0,"{'latitude': 42.4604994, 'longitude': -83.143901}","[pickup, delivery]",$,"{'address1': '760 W 9 Mile Rd', 'address2': ''...",12485447100,(248) 544-7100,17232.850157
564,fvq6GctVWHECsH3Qr7Cy0w,grand-trunk-pub-detroit-2,Grand Trunk Pub,https://s3-media1.fl.yelpcdn.com/bphoto/mc-KAb...,False,https://www.yelp.com/biz/grand-trunk-pub-detro...,630,"[{'alias': 'pubs', 'title': 'Pubs'}, {'alias':...",3.5,"{'latitude': 42.33064, 'longitude': -83.04554}","[pickup, delivery]",$$,"{'address1': '612 Woodward Ave', 'address2': '...",13139613043,(313) 961-3043,13907.983326
565,6DcFy9QWs72IkwTUnwlMkg,china-inn-livonia,China Inn,https://s3-media2.fl.yelpcdn.com/bphoto/7c2OWJ...,False,https://www.yelp.com/biz/china-inn-livonia?adj...,74,"[{'alias': 'chinese', 'title': 'Chinese'}, {'a...",2.5,"{'latitude': 42.3962102, 'longitude': -83.4142...","[pickup, delivery]",$,"{'address1': '37645 5 Mile Rd', 'address2': ''...",17344626708,(734) 462-6708,18833.737401
566,FGPQd8GdG1zy7X0r1kaDIg,inyo-ferndale,Inyo,https://s3-media3.fl.yelpcdn.com/bphoto/tMqSlo...,False,https://www.yelp.com/biz/inyo-ferndale?adjust_...,428,"[{'alias': 'asianfusion', 'title': 'Asian Fusi...",3.5,"{'latitude': 42.461029663682, 'longitude': -83...",[delivery],$$,"{'address1': '22871 Woodward Ave', 'address2':...",12485439500,(248) 543-9500,17540.390658


In [19]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

0

In [20]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_MI_firedchicken.csv.gz', compression='gzip',index=False)