# Yelp 

In [30]:
#Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm_notebook
import os, math, json, time
from yelpapi import YelpAPI

# Credentials and Accessing the API


In [31]:
#Loading in API credentials
with open("/Users/echo/Documents/0424_Data_Enrichment/.secret/yelp_api.json") as f:
    login = json.load(f)
# # Instantiate YelpAPI Variable
yelp_api = YelpAPI(login["api-key"], timeout_s=5.0)

# Define Search

In [32]:
#Setting search definitions
LOCATION = "Austin, TX"
TERM = "dessert"

In [33]:
#Searching yelp api
results = yelp_api.search_query(location=LOCATION, term=TERM)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [37]:
#Getting total results
total_results = results["total"]
total_results

4300

In [38]:
#Getting results per page
results_per_page = len(results["businesses"])
results_per_page

20

# Create JSON File Variable

In [39]:
JSON_FILE = "Data/results_in_austin.json"
JSON_FILE

'Data/results_in_austin.json'

In [44]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    if len(folder) > 0:
        # create the folder
        os.makedirs(folder, exist_ok=True)
    
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    # save an empty list
    with open(JSON_FILE, "w") as f:
        json.dump([],f)
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_austin.json not found. Saving empty list to file.


# Reading in JSON File & Setting Offset

In [46]:
with open(JSON_FILE, "r") as f:
    previous_results = json.load(f)
## set offset based on previous results
n_results = len(previous_results)    

# Number of Pages Needed

In [55]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((total_results/results_per_page))
n_pages

215

# Creating Function

In [48]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    ## If it DOES exist:
    if file_exists == True:
        ## Check if user wants to delete if exists
        if delete_if_exists == True:
            print(f"[!] {JSON_FILE} already exists. Deleting previous file ...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")
                
                
    ## If it does NOT exist:                
    else:
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        ## If JSON_FILE included a folder:
        if len(folder) > 0:
            # create the folder
            os.makedirs(folder, exist_ok=True)
            ## Save empty list to start the json file
            with open(JSON_FILE, "w") as f:
                json.dump([],f)

# Using Custom Function

In [58]:
## Create a new empty json file (delete the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE, "r") as f:
    previous_results = json.load(f)
## set offset based on previous results
n_results = len(previous_results)
print(f"- {n_results} previous results found.")
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION, term=TERM, offset=n_results+1
## How many results total?
total_results = results["total"]
## How many did we get the details for?
results_per_page = len(results["businesses"])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results["total"abs]-n_results/results_per_page))
n_pages

SyntaxError: invalid syntax (682905662.py, line 12)

# Query Loop

In [61]:
for i in tqdm_notebook(range(1,n_pages+1)):
    ## Read in results in progress file and check the length
    with open(JSON_FILE, "r") as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    results = yelp_api.search_query(location=LOCATION, term=TERM, offset=n_results+1)
    ## append new results and save to file    
    previous_results.extend(results["businesses"])
    # display(previous_results)
    with open(JSON_FILE, "w") as f:
        json.dump(previous_results, f)
    time.sleep(.2)    

  0%|          | 0/215 [00:00<?, ?it/s]

YelpAPIError: VALIDATION_ERROR: 1741 is greater than the maximum of 1000

# JSON File Contents
I was able to return the results of 1,741 places before hitting the Yelp API Limit.

In [70]:
df = pd.read_json(JSON_FILE)
df

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,3sLc73JikDYNwE3vd7-cZw,gourdoughs-big-fat-donuts-austin,Gourdough's Big Fat Donuts,https://s3-media1.fl.yelpcdn.com/bphoto/GKh1IY...,False,https://www.yelp.com/biz/gourdoughs-big-fat-do...,3769,"[{'alias': 'donuts', 'title': 'Donuts'}]",4.5,"{'latitude': 30.24968, 'longitude': -97.75474}","[delivery, pickup]",$$,"{'address1': '1503 South 1st St', 'address2': ...",+15129129070,(512) 912-9070,6525.238348
1,l01HLUtX0_D-w0pfIxi2AA,uncle-tetsu-austin,Uncle Tetsu,https://s3-media2.fl.yelpcdn.com/bphoto/UdApoX...,False,https://www.yelp.com/biz/uncle-tetsu-austin?ad...,6,"[{'alias': 'desserts', 'title': 'Desserts'}]",4.5,"{'latitude': 30.336683909673457, 'longitude': ...",[],,"{'address1': '6929 Airport Blvd', 'address2': ...",+17372738639,(737) 273-8639,4559.922850
2,BaOt2n1PawRKbwlDiuh14w,sweet-memes-austin-austin,Sweet Memes - Austin,https://s3-media4.fl.yelpcdn.com/bphoto/3uxbhT...,False,https://www.yelp.com/biz/sweet-memes-austin-au...,94,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,"{'latitude': 30.22602, 'longitude': -97.76184}",[],$$,"{'address1': '3801 S Congress Ave', 'address2'...",+15128933389,(512) 893-3389,9198.419727
3,HPYRea5mrOFv9VvOdP4AsQ,pie-bar-austin,Pie Bar,https://s3-media2.fl.yelpcdn.com/bphoto/IImh3h...,False,https://www.yelp.com/biz/pie-bar-austin?adjust...,92,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,"{'latitude': 30.16954977494928, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '8900 S Congress Ave', 'address2'...",+15125820098,(512) 582-0098,15837.882592
4,hYWsMDz0ms7TOnFTcsxYcw,manolis-ice-cream-pastries-and-cakes-austin,"Manolis Ice Cream, Pastries, & Cakes",https://s3-media2.fl.yelpcdn.com/bphoto/nJYF1s...,False,https://www.yelp.com/biz/manolis-ice-cream-pas...,522,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",5.0,"{'latitude': 30.235612131073413, 'longitude': ...",[delivery],$$,"{'address1': '8907 Circle Dr', 'address2': Non...",+15123875045,(512) 387-5045,17559.809589
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1735,rFG5eziVKsonxHOrI_qVMA,crêpeful-austin-2,crêpeful,https://s3-media2.fl.yelpcdn.com/bphoto/wjbwV_...,False,https://www.yelp.com/biz/cr%C3%AApeful-austin-...,66,"[{'alias': 'creperies', 'title': 'Creperies'}]",5.0,"{'latitude': 30.405795, 'longitude': -97.874045}","[delivery, pickup]",,"{'address1': '6550 Comanche Trl', 'address2': ...",+15125753595,(512) 575-3595,16044.175373
1736,bMUllZks5Vo6ut6NdLRpWw,bésame-austin,Bésame,https://s3-media2.fl.yelpcdn.com/bphoto/wZ74lc...,False,https://www.yelp.com/biz/b%C3%A9same-austin?ad...,45,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 30.2090767, 'longitude': -97.7301...",[],,"{'address1': '3901 Promontory Point Dr', 'addr...",,,11256.209743
1737,yzNUwYQyPgxwoEa--nu5ew,whipped-bakery-and-cafe-leander,Whipped Bakery & Cafe,https://s3-media2.fl.yelpcdn.com/bphoto/VswTPw...,False,https://www.yelp.com/biz/whipped-bakery-and-ca...,193,"[{'alias': 'bakeries', 'title': 'Bakeries'}, {...",4.5,"{'latitude': 30.56350374498521, 'longitude': -...",[],$$,"{'address1': '15609 Ronald Reagan Blvd', 'addr...",+15129867988,(512) 986-7988,28797.413727
1738,3L0v19ibM9bOqE0Ys75YhA,the-salty-donut-austin-2,The Salty Donut,https://s3-media1.fl.yelpcdn.com/bphoto/RvnMVA...,False,https://www.yelp.com/biz/the-salty-donut-austi...,287,"[{'alias': 'donuts', 'title': 'Donuts'}, {'ali...",4.5,"{'latitude': 30.24419, 'longitude': -97.75207}","[delivery, pickup]",,"{'address1': '2000 S Congress Ave', 'address2'...",+15124944148,(512) 494-4148,7120.977110


In [69]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1740 entries, 0 to 1739
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             1740 non-null   object 
 1   alias          1740 non-null   object 
 2   name           1740 non-null   object 
 3   image_url      1740 non-null   object 
 4   is_closed      1740 non-null   bool   
 5   url            1740 non-null   object 
 6   review_count   1740 non-null   int64  
 7   categories     1740 non-null   object 
 8   rating         1740 non-null   float64
 9   coordinates    1740 non-null   object 
 10  transactions   1740 non-null   object 
 11  price          1044 non-null   object 
 12  location       1740 non-null   object 
 13  phone          1740 non-null   object 
 14  display_phone  1740 non-null   object 
 15  distance       1740 non-null   float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 205.7+ KB


# Converting to .csv.gz

In [91]:
#convert the file to .csv.gz
csv_file = JSON_FILE.replace(".json",".csv.gz")
csv_file
#Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression="gzip", index=False)