# **Yelp API Core**

_John Andrew Dixon_

---

##### **Imports**

In [163]:
import json, math, os, time
import numpy as np
import pandas as pd
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

##### **Help Functions**

In [164]:
def create_json_file(JSON_FILE, delete_if_exists=False):

    # Check if the specified file exists
    file_exists = os.path.isfile(JSON_FILE)



    # If the file does not exist
    if not file_exists:

        # Notify that the file is being created.
        print(f"Specified file did not exist: creating {JSON_FILE} now.")

        # Get the folder name where the file will be saved.
        folder = os.path.dirname(JSON_FILE)

        # If the file is contained within a folder create it.
        if len(folder) > 0:
            os.makedirs(folder, exist_ok=True)

        # Save an empty list to the file that was created.
        with open(JSON_FILE, "w") as f:
            json.dump([], f)



    # If the file does exist
    else:

        # If the user wants to delete the file
        if delete_if_exists:
            print(f"{JSON_FILE} exists. Deleting it now...")
            # Remove the file
            os.remove(JSON_FILE)
            # Recursively call the function with the delete_if_exists 
            # argument's default to create the new file
            create_json_file(JSON_FILE)

        # If the user does not want to delete the file
        else:
            # Notify that it exists
            print(f"{JSON_FILE} exists.")

---

## **Efficient YelpAPI Calls**

##### _API Setup_

In [165]:
# Load YelpAPI credentials from the JSON files
with open('/Users/johna/.secret/yelp_api.json') as f:
    login = json.load(f)

# Instantiate YelpAPI object
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x2477fc3bb50>

In [166]:
# Create the location and term strings
LOCATION = "San Diego, CA"
TERM = "beer"
display(LOCATION, TERM)

'San Diego, CA'

'beer'

In [167]:
# Specify the file name with the search terms as part of the name
JSON_FILE = f"Data/results_in_progress_{LOCATION.replace(' ', '')}_{TERM.replace(' ', '')}.json"
JSON_FILE

'Data/results_in_progress_SanDiego,CA_beer.json'

In [168]:
# Call the helper function to aid in creating the file
create_json_file(JSON_FILE, delete_if_exists=True)

Data/results_in_progress_SanDiego,CA_beer.json exists. Deleting it now...
Specified file did not exist: creating Data/results_in_progress_SanDiego,CA_beer.json now.


In [169]:
# Load previous results.
with open(JSON_FILE, "r") as f:
    previous_results = json.load(f)

# Notify how many previous results there were.
n_results = len(previous_results)
print(f"There are {n_results} previous results.")

There are 0 previous results.


In [170]:
# Preform an API call with the results offset.
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                                offset=n_results)

# Show the total amount of results for verification and future reference.
print(f"There are {results['total']} total search results.")

There are 4500 total search results.


In [171]:
# Print the amount of results per page
results_per_page = len(results['businesses'])
print(f"There are {results_per_page} per page.")

There are 20 per page.


In [172]:
# Create the number of pages by getting the total results and subtracting
# the previous results and then diving this by the amount of results per page
# given by len(results["businesses"]).
n_pages = math.ceil((results["total"] - n_results) / results_per_page)
print(f"There are {n_pages} pages.")

There are 225 pages.


In [173]:
# Loop through each page
for i in tqdm_notebook(range(1, n_pages)):

    # Load previous results.
    with open(JSON_FILE, "r") as f:
        previous_results = json.load(f)

    # Save the amount of previous results.
    n_results = len(previous_results)

    # Exit out of the loop is the result  
    if (n_results + results_per_page) > 1000:
        print("Exceeded result limit of 1000. Stopping loop...")
        break

    # Preform an API call with the results offset.
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM,
                                    offset=n_results)
    
    # Append new results
    previous_results.extend(results["businesses"])

    # Dump all current iteration results to target file
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results, f)

    # Add a 200 ms pause for better progress bar visualization
    time.sleep(.2)

  0%|          | 0/224 [00:00<?, ?it/s]

Exceeded result limit of 1000. Stopping loop...


In [174]:
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,v1GulCBkuV31WR2K3kFQfA,the-gärten-san-diego,The Gärten,https://s3-media3.fl.yelpcdn.com/bphoto/Q-EMyi...,False,https://www.yelp.com/biz/the-g%C3%A4rten-san-d...,10,"[{'alias': 'beergardens', 'title': 'Beer Garde...",4.5,"{'latitude': 32.764478, 'longitude': -117.199122}",[],"{'address1': '5322 Banks St', 'address2': '', ...",,,11731.220811,
1,fC-Q6SNAARW0eaKlBo737w,pure-project-balboa-park-san-diego-2,Pure Project - Balboa Park,https://s3-media2.fl.yelpcdn.com/bphoto/U7--bW...,False,https://www.yelp.com/biz/pure-project-balboa-p...,308,"[{'alias': 'beerbar', 'title': 'Beer Bar'}, {'...",4.5,"{'latitude': 32.7351148, 'longitude': -117.160...",[],"{'address1': '2865 Fifth Ave', 'address2': Non...",16193232000.0,(619) 323-2000,11547.73205,$$
2,GkMleu5sddDUBmqKJ7tkdA,taproom-beer-co-san-diego,TapRoom Beer Co,https://s3-media4.fl.yelpcdn.com/bphoto/l1gRK3...,False,https://www.yelp.com/biz/taproom-beer-co-san-d...,195,"[{'alias': 'breweries', 'title': 'Breweries'},...",4.5,"{'latitude': 32.75567, 'longitude': -117.14347}","[delivery, pickup]","{'address1': '2000 El Cajon Blvd', 'address2':...",16195397738.0,(619) 539-7738,8819.092243,$$
3,zDRzccJo9qgKYLvAjuGl8w,groundswell-brewing-company-san-diego,Groundswell Brewing Company,https://s3-media4.fl.yelpcdn.com/bphoto/CC1lcc...,False,https://www.yelp.com/biz/groundswell-brewing-c...,131,"[{'alias': 'breweries', 'title': 'Breweries'}]",4.0,"{'latitude': 32.788978269559365, 'longitude': ...",[],"{'address1': '6304 Riverdale St', 'address2': ...",16197952337.0,(619) 795-2337,3867.685781,$
4,yqH8jZ1NncMN48vEVEOBbg,mikkeller-little-italy-san-diego,Mikkeller Little Italy,https://s3-media3.fl.yelpcdn.com/bphoto/T9r-q-...,False,https://www.yelp.com/biz/mikkeller-little-ital...,111,"[{'alias': 'beerbar', 'title': 'Beer Bar'}]",4.5,"{'latitude': 32.72575, 'longitude': -117.16886}",[],"{'address1': '2021 India St', 'address2': '', ...",18583813500.0,(858) 381-3500,12884.420179,$$


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
995,5Ni6ZRI-SyRh4UOMWPsJsA,sky-lounge-spring-valley-2,Sky Lounge,https://s3-media4.fl.yelpcdn.com/bphoto/E523Nl...,False,https://www.yelp.com/biz/sky-lounge-spring-val...,80,"[{'alias': 'hookah_bars', 'title': 'Hookah Bar...",4.5,"{'latitude': 32.7435853040513, 'longitude': -1...",[delivery],"{'address1': '3515 Sweetwater Springs Blvd', '...",16193035341,(619) 303-5341,14565.359684,$$
996,aHc-QrVN6ZX5SItLddiQag,sabertooth-grill-san-diego,Sabertooth Grill,https://s3-media4.fl.yelpcdn.com/bphoto/B9CdXZ...,False,https://www.yelp.com/biz/sabertooth-grill-san-...,82,"[{'alias': 'tradamerican', 'title': 'American ...",3.0,"{'latitude': 32.73807583669569, 'longitude': -...",[],"{'address1': '2920 Zoo Dr', 'address2': '', 'a...",16192311515,(619) 231-1515,10895.799421,$$
997,1scglHyquV5DxVqZwgiGmw,papananis-deli-san-diego,Papanani's Deli,https://s3-media2.fl.yelpcdn.com/bphoto/NLfoLV...,False,https://www.yelp.com/biz/papananis-deli-san-di...,135,"[{'alias': 'delis', 'title': 'Delis'}, {'alias...",4.0,"{'latitude': 32.72661, 'longitude': -117.2007}","[delivery, pickup]","{'address1': '1450 Harbor Island Dr', 'address...",16192970273,(619) 297-0273,14683.899639,$$
998,eGhOLzFFIhrvF6QjSSpJsg,phils-bbq-san-diego-2,Phil's BBQ,https://s3-media2.fl.yelpcdn.com/bphoto/IijLAP...,False,https://www.yelp.com/biz/phils-bbq-san-diego-2...,15856,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",4.5,"{'latitude': 32.7547792020658, 'longitude': -1...",[delivery],"{'address1': '3750 Sports Arena Blvd', 'addres...",16192266333,(619) 226-6333,13649.045923,$$
999,f_FOATuK6tLUwP3ZytdCrA,harbor-market-san-diego,Harbor Market,https://s3-media1.fl.yelpcdn.com/bphoto/e4A8Hm...,False,https://www.yelp.com/biz/harbor-market-san-die...,44,"[{'alias': 'beer_and_wine', 'title': 'Beer, Wi...",3.5,"{'latitude': 32.716901, 'longitude': -117.171804}","[delivery, pickup]","{'address1': '901 Bayfront Ct', 'address2': ''...",16194321358,(619) 432-1358,13919.718045,


In [175]:
# Just in case, drop duplicates and verify 
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [176]:
# Save the final results to a compressed csv for storage efficiency
final_df.to_csv(f"Data/final_results_{LOCATION.replace(' ', '')}_{TERM.replace(' ', '')}.csv.gz", compression='gzip', index=False)

In [177]:
# Get a quick info of the final DataFrame
final_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             1000 non-null   object 
 1   alias          1000 non-null   object 
 2   name           1000 non-null   object 
 3   image_url      1000 non-null   object 
 4   is_closed      1000 non-null   bool   
 5   url            1000 non-null   object 
 6   review_count   1000 non-null   int64  
 7   categories     1000 non-null   object 
 8   rating         1000 non-null   float64
 9   coordinates    1000 non-null   object 
 10  transactions   1000 non-null   object 
 11  location       1000 non-null   object 
 12  phone          1000 non-null   object 
 13  display_phone  1000 non-null   object 
 14  distance       1000 non-null   float64
 15  price          847 non-null    object 
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 126.0+ KB
