In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [3]:
# Load API Credentials
with open('/Users/JoseH/.secret/yelp_api.json') as f:   
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [38]:
# set our API call parameters and filename before the first call
LOCATION = 'Washington DC,DC,20001'
TERM = 'pizza'

In [39]:
JSON_FILE = f"Data/results_in_progress_pizza_DC.json"

In [40]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
        
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_pizza_DC.json not found. Saving empty list to file.


In [43]:
#Getting lenght needed for Efficient Extraction
lenght_results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                              )
total = lenght_results['total']
print(f'Total Businesses: {total}')
results_per_page = len(lenght_results['businesses'])
print(f'Total Businesses per page: {results_per_page}')

# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((lenght_results['total'])/ results_per_page)
print(f'Total number of Pages Needed: {n_pages}')

Total Businesses: 788
Total Businesses per page: 20
Total number of Pages Needed: 40


In [44]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/40 [00:00<?, ?it/s]

In [45]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,2GWtmU5XZCes8LMisE2zVA,wiseguy-pizza-washington-3,Wiseguy Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/tMwsFv...,False,https://www.yelp.com/biz/wiseguy-pizza-washing...,1207,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 38.8996423212243, 'longitude': -7...","[delivery, pickup]",$,"{'address1': '300 Massachusetts Ave NW', 'addr...",12024087800,(202) 408-7800,1160.153369
1,QSXp7oMGcmSYOupDHb-ZXg,kouzina-angelinas-pizzeria-shaw-dc-washington-10,Kouzina Angelinas Pizzeria - Shaw DC,https://s3-media3.fl.yelpcdn.com/bphoto/tZwyvC...,False,https://www.yelp.com/biz/kouzina-angelinas-piz...,128,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 38.9167436222272, 'longitude': -7...","[delivery, pickup]",$,"{'address1': '725 Florida Ave NW', 'address2':...",12025887448,(202) 588-7448,869.419229
2,ukk2Ko-J730EfQFqWFX0pQ,andys-pizza-shaw-washington,Andy's Pizza - Shaw,https://s3-media1.fl.yelpcdn.com/bphoto/smyHG6...,False,https://www.yelp.com/biz/andys-pizza-shaw-wash...,137,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 38.917545996101545, 'longitude': ...","[delivery, pickup]",,"{'address1': '2016 9th St NW', 'address2': Non...",12025062043,(202) 506-2043,1015.092289
3,g3mIdtLBk9eoHrFGUGnqYw,bacio-pizzeria-washington,Bacio Pizzeria,https://s3-media4.fl.yelpcdn.com/bphoto/VX_RXL...,False,https://www.yelp.com/biz/bacio-pizzeria-washin...,314,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 38.91494, 'longitude': -77.011701}","[delivery, pickup]",$$,"{'address1': '81 Seaton Pl NW', 'address2': ''...",12022322246,(202) 232-2246,749.410591
4,zV4lywalabn7CDRbUoRhMw,pi-pizzeria-washington-3,Pi Pizzeria,https://s3-media2.fl.yelpcdn.com/bphoto/exCEqB...,False,https://www.yelp.com/biz/pi-pizzeria-washingto...,1831,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 38.897154, 'longitude': -77.024709}","[delivery, pickup]",$$,"{'address1': '910 F St NW', 'address2': '', 'a...",12023935484,(202) 393-5484,1555.491406


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
783,n5xPYURRXHNIoObqvPRuLA,subway-washington-175,Subway,https://s3-media2.fl.yelpcdn.com/bphoto/g-NI58...,False,https://www.yelp.com/biz/subway-washington-175...,12,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",2.0,"{'latitude': 38.9362026306009, 'longitude': -7...","[pickup, delivery]",$,"{'address1': '3520 Connecticut Ave NW', 'addre...",12022372424,(202) 237-2424,4636.285135
784,XGKMeelsF3yPbY-jIZxZRA,grand-central-washington,Grand Central,https://s3-media1.fl.yelpcdn.com/bphoto/iQ44GA...,False,https://www.yelp.com/biz/grand-central-washing...,213,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",3.0,"{'latitude': 38.92098621688569, 'longitude': -...",[delivery],$$,"{'address1': '2447 18th St NW', 'address2': ''...",12029861742,(202) 986-1742,2489.071544
785,tw7Jj6-NxYBKK2h5NVklug,potbelly-sandwich-shop-washington-36,Potbelly Sandwich Shop,https://s3-media1.fl.yelpcdn.com/bphoto/7x_h57...,False,https://www.yelp.com/biz/potbelly-sandwich-sho...,55,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",3.5,"{'latitude': 38.9036068371077, 'longitude': -7...","[pickup, delivery]",$,"{'address1': '1900 L St NW', 'address2': '', '...",12024780026,(202) 478-0026,2368.598603
786,xy2BRUHFGnOlRLfzxtcnzw,jyoti-indian-cuisine-washington,Jyoti Indian Cuisine,https://s3-media2.fl.yelpcdn.com/bphoto/_4UiZt...,False,https://www.yelp.com/biz/jyoti-indian-cuisine-...,440,"[{'alias': 'indpak', 'title': 'Indian'}]",3.5,"{'latitude': 38.92138, 'longitude': -77.04192}","[pickup, delivery]",$$,"{'address1': '2433 18th St NW', 'address2': ''...",12025185892,(202) 518-5892,2454.555915
787,tuqWIWbAIjylU4VIh7dCyw,firehook-bakery-washington-4,Firehook Bakery,https://s3-media2.fl.yelpcdn.com/bphoto/-z5Lzv...,False,https://www.yelp.com/biz/firehook-bakery-washi...,305,"[{'alias': 'bakeries', 'title': 'Bakeries'}, {...",3.5,"{'latitude': 38.9112702, 'longitude': -77.043817}",[delivery],$,"{'address1': '1909 Q St NW', 'address2': '', '...",12026847400,(202) 684-7400,2273.3525


In [46]:
final_df.shape

(788, 16)

In [51]:
# check for duplicate IDs
final_df.duplicated(subset='id').sum()

5

In [53]:
#Delete five duplicated ids
final_df=final_df.drop_duplicates(subset='id')

In [54]:
final_df.shape

(783, 16)

In [55]:
# save the final results to a csv
final_df.to_csv('Data/final_results_pizza_DC.csv',index=False)