In [20]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [21]:
# Load API Credentials
with open('/Users/awot1/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [22]:
# set our API call parameters 
LOCATION = 'renton'
TERM = 'appartments'

In [23]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_appartments_in_renton_washington.json"
JSON_FILE

'Data/results_in_progress_appartments_in_renton_washington.json'

In [24]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
         ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
        
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
         ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)

        ## If JSON_FILE included a folder:
        if len(folder)>0:
                os.makedirs(folder,exist_ok=True)
           ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  
                      
            ## If JSON_FILE included a folder:
  
        

In [25]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



[!] Data/results_in_progress_appartments_in_renton_washington.json already exists. Deleting previous file...
[i] Data/results_in_progress_appartments_in_renton_washington.json not found. Saving empty list to new file.
- 0 previous results found.


2

In [26]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)



  0%|          | 0/2 [00:00<?, ?it/s]

In [27]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance
0,AnErvo25lq0TpcbXq4G32w,keever-and-associates-bellevue,Keever and Associates,https://s3-media3.fl.yelpcdn.com/bphoto/WK1tA1...,False,https://www.yelp.com/biz/keever-and-associates...,1,"[{'alias': 'contractors', 'title': 'General Co...",5.0,"{'latitude': 47.610377, 'longitude': -122.2006...",[],"{'address1': '3320 168th Pl SE', 'address2': '...",12069470503,(206) 947-0503,14424.879447
1,4fknsMN2ar2BMXFN6dkPwg,the-broadway-building-seattle,The Broadway Building,https://s3-media3.fl.yelpcdn.com/bphoto/0pn4_X...,False,https://www.yelp.com/biz/the-broadway-building...,39,"[{'alias': 'apartments', 'title': 'Apartments'}]",4.0,"{'latitude': 47.6160185, 'longitude': -122.320...",[],"{'address1': '1641 Nagle Pl', 'address2': None...",12063231500,(206) 323-1500,18029.312012
2,MKoQeMeVsYgUTbFzeuJosQ,alaire-apartments-renton,Alaire Apartments,https://s3-media2.fl.yelpcdn.com/bphoto/gMpoFH...,False,https://www.yelp.com/biz/alaire-apartments-ren...,29,"[{'alias': 'apartments', 'title': 'Apartments'}]",2.0,"{'latitude': 47.47663867855423, 'longitude': -...",[],"{'address1': '510 Stevens Ave SW', 'address2':...",14254488375,(425) 448-8375,2847.821151
3,AwC3fQKE0_AdltECzoEjKA,harbor-steps-apartments-seattle-2,Harbor Steps Apartments,https://s3-media3.fl.yelpcdn.com/bphoto/GyTqFJ...,False,https://www.yelp.com/biz/harbor-steps-apartmen...,142,"[{'alias': 'apartments', 'title': 'Apartments'}]",3.0,"{'latitude': 47.6063434, 'longitude': -122.338...",[],"{'address1': '1221 1st Ave', 'address2': 'Ste ...",12066820800,(206) 682-0800,17966.956469
4,-8X9W99_hNSowq_LW0Guuw,harrington-square-apartments-renton,Harrington Square Apartments,https://s3-media2.fl.yelpcdn.com/bphoto/6W3VV7...,False,https://www.yelp.com/biz/harrington-square-apa...,45,"[{'alias': 'apartments', 'title': 'Apartments'}]",2.0,"{'latitude': 47.4979, 'longitude': -122.18094}",[],"{'address1': '950 Harrington Ave NE', 'address...",18335436944,(833) 543-6944,1940.253828


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance
24,e-ib3NX88R7m8vQpelgemw,avalon-meydenbauer-bellevue,Avalon Meydenbauer,https://s3-media3.fl.yelpcdn.com/bphoto/dkCFIX...,False,https://www.yelp.com/biz/avalon-meydenbauer-be...,77,"[{'alias': 'apartments', 'title': 'Apartments'}]",3.0,"{'latitude': 47.61228, 'longitude': -122.20107}",[],"{'address1': '10410 NE 2nd St', 'address2': ''...",14254413523,(425) 441-3523,14627.36448
25,SkJe4nmuQ0fYNkmKOV0wAA,professional-insulation-services-spanaway,Professional Insulation Services,https://s3-media1.fl.yelpcdn.com/bphoto/RN2ski...,False,https://www.yelp.com/biz/professional-insulati...,7,"[{'alias': 'insulationinstallation', 'title': ...",4.5,"{'latitude': 47.0522362, 'longitude': -122.354...",[],"{'address1': '5610 224th St E', 'address2': No...",12065789201,(206) 578-9201,49318.174632
26,XNa323OYOaP7_5izydTnhg,aboda-woodinville,ABODA,https://s3-media2.fl.yelpcdn.com/bphoto/2tBXVu...,False,https://www.yelp.com/biz/aboda-woodinville?adj...,40,"[{'alias': 'homecleaning', 'title': 'Home Clea...",2.0,"{'latitude': 47.778496, 'longitude': -122.14422}",[],"{'address1': '6525 240th St SE', 'address2': '...",14258610500,(425) 861-0500,33239.658978
27,oZzHJTh99U9JpsEjxGOocw,wave-broadband-bothell-2,Wave Broadband,https://s3-media1.fl.yelpcdn.com/bphoto/11OKz3...,False,https://www.yelp.com/biz/wave-broadband-bothel...,616,"[{'alias': 'isps', 'title': 'Internet Service ...",1.5,"{'latitude': 47.77766, 'longitude': -122.18349}",[],"{'address1': '3700 Monte Villa Pkwy', 'address...",18004278686,(800) 427-8686,32981.516825
28,qDTsRK7o4VaClN3J_GkqTw,jordan-river-moving-and-storage-kirkland-2,Jordan River Moving & Storage,https://s3-media2.fl.yelpcdn.com/bphoto/wd-qJM...,False,https://www.yelp.com/biz/jordan-river-moving-a...,254,"[{'alias': 'movers', 'title': 'Movers'}]",3.0,"{'latitude': 47.71289185630897, 'longitude': -...",[],"{'address1': '12515 Willows Rd NE', 'address2'...",12062360808,(206) 236-0808,25899.449459


In [33]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()


0

In [32]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_appartments_in_renton.csv.gz', compression='gzip',index=False)