# **Yelp API Core**

_John Andrew Dixon_

---

##### **Imports**

In [34]:
import json, math, os, time
import numpy as np
import pandas as pd
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [None]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    # Check if the specified file exists
    file_exists = os.path.isfile(JSON_FILE)

    # If not
    if not file_exists:
        
        # Get the folder name where the file will be saved.
        folder = os.path.dirname(JSON_FILE)
        # If the file is contained within a folder create it.
        if len(folder) > 0:
            os.makedirs(folder, exist_ok=True)

        # Notify that the file is being created.
        print(f"Specified file did not exist: creating {file_exists} now.")

        # Save an empty list to the file that was created.
        with open(JSON_FILE, "w") as f:
            json.dump([], f)
    # If so
    else:
        # Notify that it exists
        print(f"{JSON_FILE} exists.")

---

## **Efficient YelpAPI Calls**

In [52]:
# Load YelpAPI credentials from the JSON files
with open('/Users/johna/.secret/yelp_api.json') as f:
    login = json.load(f)

# Instantiate YelpAPI object
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x2477e95f280>

In [53]:
LOCATION = "San Diego, CA"
TERM = "beer"
display(LOCATION, TERM)

'San Diego, CA'

'beer'

In [61]:
JSON_FILE = f"Data/results_in_progress_{LOCATION.replace(' ', '')}_{TERM.replace(' ', '')}.json"
JSON_FILE

'Data/results_in_progress_SanDiego,CA_beer.json'

In [None]:
create_json_file(JSON_FILE)

In [56]:
# Load previous results.
with open(JSON_FILE, "r") as f:
    previous_results = json.load(f)

# Notify how many previous results there were.
n_results = len(previous_results)
print(f"There are {n_results} previous results.")

There are 0 previous results.


In [57]:
# Preform an API call with the results offset.
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                                offset=n_results)
# Show the total amount of results for verification and future reference.
print(f"There are {results['total']} total results.")

There are 4500 total results.


In [58]:
# Create the number of pages by getting the total results and subtracting
# the previous results and then diving this by the amount of results per page
# given by len(results["businesses"]).
n_pages = math.ceil((results["total"] - n_results) / len(results["businesses"]))
print(f"There are {n_pages} pages.")

There are 225 pages.


In [59]:
for i in tqdm_notebook(range(1, n_pages)):

    # Load previous results.
    with open(JSON_FILE, "r") as f:
        previous_results = json.load(f)

    # Save the amount of previous results.
    n_results = len(previous_results)

    # if (n_results + 20) > 1000:
    #     print("Exceeded offset of 1000. Stopping loop")

    # Preform an API call with the results offset.
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM,
                                    offset=n_results)
    
    # Append new results
    previous_results.extend(results["businesses"])

    # adds 200 ms pause
    time.sleep(.2)

  0%|          | 0/224 [00:00<?, ?it/s]