# **Yelp API Core**

_John Andrew Dixon_

---

##### **Imports**

In [124]:
import json, math, os, time
import numpy as np
import pandas as pd
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [125]:
def create_json_file(JSON_FILE, delete_if_exists=False):

    # Check if the specified file exists
    file_exists = os.path.isfile(JSON_FILE)

    # IF THE FILE DOES NOT EXIST
    if not file_exists:

        # Notify that the file is being created.
        print(f"Specified file did not exist: creating {JSON_FILE} now.")

        # Get the folder name where the file will be saved.
        folder = os.path.dirname(JSON_FILE)

        # If the file is contained within a folder create it.
        if len(folder) > 0:
            os.makedirs(folder, exist_ok=True)

        # Save an empty list to the file that was created.
        with open(JSON_FILE, "w") as f:
            json.dump([], f)


    # IF THE FILE DOES EXIST
    else:
        # IF THE USER WANTS TO DELETE THE FILE
        if delete_if_exists:
            print(f"{JSON_FILE} exists. Deleting it now...")
            # Remove the file
            os.remove(JSON_FILE)
            # Recursively call the function with the delete_if_exists argument's default 
            # to create the new file
            create_json_file(JSON_FILE)

        # IF THE USER DOES NOT WANT TO DELETE THE FILE
        else:
            # Notify that it exists
            print(f"{JSON_FILE} exists.")

---

## **Efficient YelpAPI Calls**

In [126]:
# Load YelpAPI credentials from the JSON files
with open('/Users/johna/.secret/yelp_api.json') as f:
    login = json.load(f)

# Instantiate YelpAPI object
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x2477e95f160>

In [127]:
LOCATION = "San Diego, CA"
TERM = "beer"
display(LOCATION, TERM)

'San Diego, CA'

'beer'

In [128]:
JSON_FILE = f"Data/results_in_progress_{LOCATION.replace(' ', '')}_{TERM.replace(' ', '')}.json"
JSON_FILE

'Data/results_in_progress_SanDiego,CA_beer.json'

In [129]:
create_json_file(JSON_FILE, delete_if_exists=True)

Data/results_in_progress_SanDiego,CA_beer.json exists. Deleting it now...
Specified file did not exist: creating Data/results_in_progress_SanDiego,CA_beer.json now.


In [130]:
# Load previous results.
with open(JSON_FILE, "r") as f:
    previous_results = json.load(f)

# Notify how many previous results there were.
n_results = len(previous_results)
print(f"There are {n_results} previous results.")

There are 0 previous results.


In [131]:
# Preform an API call with the results offset.
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                                offset=n_results)
# Show the total amount of results for verification and future reference.
print(f"There are {results['total']} total results.")

There are 4500 total results.


In [132]:
# Create the number of pages by getting the total results and subtracting
# the previous results and then diving this by the amount of results per page
# given by len(results["businesses"]).
n_pages = math.ceil((results["total"] - n_results) / len(results["businesses"]))
print(f"There are {n_pages} pages.")

There are 225 pages.


In [133]:
for i in tqdm_notebook(range(1, n_pages)):

    # Load previous results.
    with open(JSON_FILE, "r") as f:
        previous_results = json.load(f)

    # Save the amount of previous results.
    n_results = len(previous_results)

    if (n_results + 20) > 1000:
        print("Exceeded offset of 1000. Stopping loop")
        break

    # Preform an API call with the results offset.
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM,
                                    offset=n_results)
    
    # Append new results
    previous_results.extend(results["businesses"])

    # Dump previous results to target file
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results, f)

    # adds 200 ms pause
    time.sleep(.2)

  0%|          | 0/224 [00:00<?, ?it/s]

Exceeded offset of 1000. Stopping loop


In [134]:
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,W0ZlPspAw4-QyMAlwHhyyw,hopnonymous-brewing-company-san-diego,Hopnonymous Brewing Company,https://s3-media2.fl.yelpcdn.com/bphoto/8TMmgM...,False,https://www.yelp.com/biz/hopnonymous-brewing-c...,123,"[{'alias': 'breweries', 'title': 'Breweries'}]",5.0,"{'latitude': 32.834408865, 'longitude': -117.1...",[],$,"{'address1': '7705 Convoy Ct', 'address2': Non...",16198474628.0,(619) 847-4628,5686.154762
1,v1GulCBkuV31WR2K3kFQfA,the-gärten-san-diego,The Gärten,https://s3-media3.fl.yelpcdn.com/bphoto/Q-EMyi...,False,https://www.yelp.com/biz/the-g%C3%A4rten-san-d...,10,"[{'alias': 'beergardens', 'title': 'Beer Garde...",4.5,"{'latitude': 32.764478, 'longitude': -117.199122}",[],,"{'address1': '5322 Banks St', 'address2': '', ...",,,11731.220811
2,xQTGzTqu9AO1czRy1yw4rw,harland-brewing-scripps-ranch-san-diego,Harland Brewing - Scripps Ranch,https://s3-media4.fl.yelpcdn.com/bphoto/UwyWMD...,False,https://www.yelp.com/biz/harland-brewing-scrip...,125,"[{'alias': 'breweries', 'title': 'Breweries'}]",4.5,"{'latitude': 32.9020727428145, 'longitude': -1...",[],$$,"{'address1': '10115 Carroll Canyon Rd', 'addre...",18588004566.0,(858) 800-4566,8810.771101
3,R1HTs48IfVhaCL39KMRwkg,mcilhenney-brewing-alpine,McIlhenney Brewing,https://s3-media1.fl.yelpcdn.com/bphoto/cI3dIR...,False,https://www.yelp.com/biz/mcilhenney-brewing-al...,21,"[{'alias': 'brewpubs', 'title': 'Brewpubs'}]",5.0,"{'latitude': 32.83467, 'longitude': -116.76268}",[],$,"{'address1': '2363 Alpine Blvd', 'address2': '...",,,31083.512766
4,fC-Q6SNAARW0eaKlBo737w,pure-project-balboa-park-san-diego-2,Pure Project - Balboa Park,https://s3-media2.fl.yelpcdn.com/bphoto/U7--bW...,False,https://www.yelp.com/biz/pure-project-balboa-p...,308,"[{'alias': 'beerbar', 'title': 'Beer Bar'}, {'...",4.5,"{'latitude': 32.7351148, 'longitude': -117.160...",[],$$,"{'address1': '2865 Fifth Ave', 'address2': Non...",16193232000.0,(619) 323-2000,11547.73205


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,u1O3IvpF2AVzZ1dMVy3FIQ,second-nature-san-diego-2,Second Nature,https://s3-media2.fl.yelpcdn.com/bphoto/9NuNcl...,False,https://www.yelp.com/biz/second-nature-san-die...,467,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 32.8064230252045, 'longitude': -1...","[pickup, delivery]",$$,"{'address1': '5026 Cass St', 'address2': '', '...",18582475236.0,(858) 247-5236,15051.632859
996,RY88vFYtQKnsmMBKgp764A,board-and-brew-poway-poway-2,Board & Brew - Poway,https://s3-media2.fl.yelpcdn.com/bphoto/YmYQqb...,False,https://www.yelp.com/biz/board-and-brew-poway-...,68,"[{'alias': 'beerbar', 'title': 'Beer Bar'}, {'...",3.5,"{'latitude': 32.955528177006634, 'longitude': ...",[],$$,"{'address1': '13501 Poway Rd', 'address2': '',...",,,15585.47968
997,vmki6c3e8OJfT4GNU98gHA,lips-san-diego-san-diego,Lips - San Diego,https://s3-media2.fl.yelpcdn.com/bphoto/5fZM60...,False,https://www.yelp.com/biz/lips-san-diego-san-di...,940,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,"{'latitude': 32.7557199, 'longitude': -117.12923}",[delivery],$$$,"{'address1': '3036 El Cajon Blvd', 'address2':...",16192957900.0,(619) 295-7900,8204.584063
998,u5piYYCVj2RutXlbipzGdA,sótano-suizo-tijuana-2,Sótano Suizo,https://s3-media3.fl.yelpcdn.com/bphoto/gKANCR...,False,https://www.yelp.com/biz/s%C3%B3tano-suizo-tij...,70,"[{'alias': 'pubs', 'title': 'Pubs'}]",4.5,"{'latitude': 32.5277160750546, 'longitude': -1...",[],$$,"{'address1': 'Paseo de los Héroes 9415', 'addr...",526646848834.0,+52 664 684 8834,33575.038722
999,aVQ74QsVsH0oUdl6hC3JUg,the-wine-shop-san-diego,The Wine Shop,https://s3-media2.fl.yelpcdn.com/bphoto/LAjlpD...,False,https://www.yelp.com/biz/the-wine-shop-san-die...,7,"[{'alias': 'wine_bars', 'title': 'Wine Bars'},...",5.0,"{'latitude': 32.75263, 'longitude': -117.19624}",[],,"{'address1': '2548 Congress St', 'address2': '...",,,12318.974634


In [136]:
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [137]:
# save the final results to a compressed csv for storage efficiency
final_df.to_csv(f"Data/final_results_{LOCATION.replace(' ', '')}_{TERM.replace(' ', '')}.csv.gz", compression='gzip', index=False)