In [22]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook



## Credentials and Accessing the API

In [23]:
# Load API Credentials
with open('/Users/Jeancarlo Garcia/Documents/Data Enrichment/Wk14-Local-Workflow-practice/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

## Define Search

In [40]:
# set our API call parameters and filename before the first call
LOCATION = 'Yonkers, NY,10701'
TERM = 'Shrimp'

In [42]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = f"Core Assignments/results_in_progress_shrimp.json"
JSON_FILE

'Core Assignments/results_in_progress_shrimp.json'

## Check if our JSON_FILE already exists

This will prevent us from accidently overwriting an existing file.

If it doesn't exist:

Create any folders needed for the file path.
Save an empty list as JSON_File

In [43]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Core Assignments/results_in_progress_shrimp.json not found. Saving empty list to file.


## Determine how many results are already in the file

Load the results file to determine the # of results previously retrieved. Since the file is recently created, you would expect it to be empty.  

Use this as our offset parameter for our API call.  Even if this is the first API call, and the number is 0, we want to define "n_results" based on the length of "previous_results." 

In [44]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [45]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [46]:
## How many results total?
total_results = results['total']
total_results

977

In [47]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

977 results and 20 results per page.  

977 /20 = 48.85

In [48]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

49

## Add this page of results to .json file

Our API returns our results in JSON format, with the businesses in a list of dictionaries. We will append the first page of businesses to our previous_results

In [49]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [50]:
from tqdm.notebook import tqdm_notebook
import time
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 

  0%|          | 0/49 [00:00<?, ?it/s]

## For Loop to call each page

In [51]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/49 [00:00<?, ?it/s]

## Convert .json to dataframe

Load in the "results in progress" JSON file into a DataFrame:

In [52]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,oC-BU83Ag-vlVBcjlwxUZw,pepes-place-new-rochelle,Pepe's Place,https://s3-media2.fl.yelpcdn.com/bphoto/eCZjwI...,False,https://www.yelp.com/biz/pepes-place-new-roche...,412,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.5,"{'latitude': 40.9209170900794, 'longitude': -7...","[delivery, pickup]",$$,"{'address1': '560 North Ave', 'address2': '', ...",19146327373,(914) 632-7373,8348.369289
1,P0KtNoGAK3G_s43V2qzQfA,off-the-hook-of-yonkers-yonkers,Off The Hook Of Yonkers,https://s3-media3.fl.yelpcdn.com/bphoto/euJ8cB...,False,https://www.yelp.com/biz/off-the-hook-of-yonke...,5,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.5,"{'latitude': 40.93576, 'longitude': -73.89841}","[delivery, pickup]",,"{'address1': '47 North Broadway', 'address2': ...",19144573371,(914) 457-3371,2238.342639
2,NZzsPPU0FpkFz9hDmq9ZkQ,lusitania-seafood-restaurant-yonkers,Lusitania Seafood Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/A1-kxI...,False,https://www.yelp.com/biz/lusitania-seafood-res...,38,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,"{'latitude': 40.947667, 'longitude': -73.8759205}","[delivery, pickup]",$$,"{'address1': '15 Lockwood Ave', 'address2': ''...",19149694600,(914) 969-4600,370.258213
3,InC56Q0l9BjdOaCWtD3xAQ,jjr-highridge-fishery-yonkers-2,JJR Highridge Fishery,https://s3-media3.fl.yelpcdn.com/bphoto/fFD0vH...,False,https://www.yelp.com/biz/jjr-highridge-fishery...,41,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 40.961373219417446, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '1791 Central Park Ave', 'address...",19143373775,(914) 337-3775,3376.655022
4,sDqDQgV4VBIElhjxMplmow,the-bayou-restaurant-mount-vernon,The Bayou Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/1imehj...,False,https://www.yelp.com/biz/the-bayou-restaurant-...,296,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",3.5,"{'latitude': 40.9261252, 'longitude': -73.8357...",[delivery],$$,"{'address1': '580 Gramatan Ave', 'address2': '...",19146682634,(914) 668-2634,4503.962464


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
967,Cs5-wiJ1F9zgU1LpBKFlLg,the-brick-oven-pizza-dobbs-ferry,The Brick Oven Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/95UiJ1...,False,https://www.yelp.com/biz/the-brick-oven-pizza-...,39,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",3.0,"{'latitude': 41.016168, 'longitude': -73.874191}","[delivery, pickup]",$$,"{'address1': '147 Main St', 'address2': '', 'a...",19146936259,(914) 693-6259,7385.272058
968,MKtgdg_YSMGF06OpzBabkA,palmers-crossing-tenafly,Palmers Crossing,https://s3-media2.fl.yelpcdn.com/bphoto/oKSo7c...,False,https://www.yelp.com/biz/palmers-crossing-tena...,39,"[{'alias': 'newamerican', 'title': 'American (...",3.0,"{'latitude': 40.92221, 'longitude': -73.9649699}",[delivery],$$,"{'address1': '145 Dean Dr', 'address2': '', 'a...",12015674800,(201) 567-4800,7782.876137
969,XAS91xoNLi8AcIbdwreQHQ,new-garden-take-out-hartsdale,New Garden take-out,https://s3-media1.fl.yelpcdn.com/bphoto/BuS5IA...,False,https://www.yelp.com/biz/new-garden-take-out-h...,35,"[{'alias': 'chinese', 'title': 'Chinese'}]",3.5,"{'latitude': 41.0185457020998, 'longitude': -7...",[delivery],$,"{'address1': '17 E Hartsdale Ave', 'address2':...",19142889788,(914) 288-9788,10255.452352
970,kN1lAWAFnsqs9jTngYNBZg,well-being-sushi-dumont,Well-Being Sushi,https://s3-media3.fl.yelpcdn.com/bphoto/-jdJr0...,False,https://www.yelp.com/biz/well-being-sushi-dumo...,199,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 40.9419908299266, 'longitude': -7...",[delivery],$$,"{'address1': '10 Knickerbocker Rd', 'address2'...",12013855300,(201) 385-5300,8247.690317
971,FdyvHLxlrjpYM0GSJxuQrg,tenafly-classic-diner-tenafly-3,Tenafly Classic Diner,https://s3-media2.fl.yelpcdn.com/bphoto/q5-X09...,False,https://www.yelp.com/biz/tenafly-classic-diner...,269,"[{'alias': 'diners', 'title': 'Diners'}, {'ali...",3.0,"{'latitude': 40.925537, 'longitude': -73.964866}","[delivery, pickup]",$$,"{'address1': '16 W Railroad Ave', 'address2': ...",12015675522,(201) 567-5522,7679.247834


## Check for duplicates

In [53]:
# check for duplicate IDs
final_df.duplicated(subset='id').sum()

0

## Save the final DataFrame to a .csv

In [55]:
# save the final results to a compressed csv
final_df.to_csv('Core Assignments/final_results_shrimp.csv',index=False)