# Yelp Search in Austin, TX

In [118]:
#Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm_notebook
import os, math, json, time
from yelpapi import YelpAPI

# Credentials and Accessing the API


In [119]:
#Loading in API credentials
with open("/Users/echo/Documents/0424_Data_Enrichment/.secret/yelp_api.json") as f:
    login = json.load(f)
# # Instantiate YelpAPI Variable
yelp_api = YelpAPI(login["api-key"], timeout_s=5.0)

# Define Search

In [120]:
#Setting search definitions
LOCATION = "Austin, TX"
TERM = "dessert"

In [121]:
#Searching yelp api
results = yelp_api.search_query(location=LOCATION, term=TERM)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [122]:
#Getting total results
total_results = results["total"]
total_results

3300

In [123]:
#Getting results per page
results_per_page = len(results["businesses"])
results_per_page

20

# Create JSON File Variable

In [124]:
JSON_FILE = "Data/results_in_austin.json"
JSON_FILE

'Data/results_in_austin.json'

In [125]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    if len(folder) > 0:
        # create the folder
        os.makedirs(folder, exist_ok=True)
    
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    # save an empty list
    with open(JSON_FILE, "w") as f:
        json.dump([],f)
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_austin.json already exists.


# Number of Pages Needed

In [126]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((total_results/results_per_page))
n_pages

165

# Creating Function

In [127]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    ## If it DOES exist:
    if file_exists == True:
        ## Check if user wants to delete if exists
        if delete_if_exists == True:
            print(f"[!] {JSON_FILE} already exists. Deleting previous file ...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")
                
                
    ## If it does NOT exist:                
    else:
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        ## If JSON_FILE included a folder:
        if len(folder) > 0:
            # create the folder
            os.makedirs(folder, exist_ok=True)
            ## Save empty list to start the json file
            with open(JSON_FILE, "w") as f:
                json.dump([],f)

# Using Custom Function

In [128]:
## Create a new empty json file (delete the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE, "r") as f:
    previous_results = json.load(f)
## set offset based on previous results
n_results = len(previous_results)
print(f"- {n_results} previous results found.")
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION, term=TERM, offset=n_results+1)
## How many results total?
total_results = results["total"]
## How many did we get the details for?
results_per_page = len(results["businesses"])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results["total"]-n_results/results_per_page))
n_pages

[!] Data/results_in_austin.json already exists. Deleting previous file ...
[i] Data/results_in_austin.json not found. Saving empty list to new file.
- 0 previous results found.


3300

# Query Loop

In [129]:
for i in tqdm_notebook(range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    print(n_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)

    
    ## append new results and save to file
    previous_results.extend(results['businesses'])

    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

    time.sleep(.2)



  0%|          | 0/3300 [00:00<?, ?it/s]

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
820
840
860
880
900
920
940
960
980
1000
Exceeded 1000 api calls. Stopping loop.


# JSON File Contents
I was able to return the results of 1,741 places before hitting the Yelp API Limit.

In [130]:
df = pd.read_json(JSON_FILE)
df

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,l01HLUtX0_D-w0pfIxi2AA,uncle-tetsu-austin,Uncle Tetsu,https://s3-media2.fl.yelpcdn.com/bphoto/UdApoX...,False,https://www.yelp.com/biz/uncle-tetsu-austin?ad...,11,"[{'alias': 'desserts', 'title': 'Desserts'}]",4.5,"{'latitude': 30.336683909673457, 'longitude': ...",[],"{'address1': '6929 Airport Blvd', 'address2': ...",+17372738639,(737) 273-8639,4559.922850,
1,BaOt2n1PawRKbwlDiuh14w,sweet-memes-austin-austin,Sweet Memes - Austin,https://s3-media4.fl.yelpcdn.com/bphoto/3uxbhT...,False,https://www.yelp.com/biz/sweet-memes-austin-au...,96,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,"{'latitude': 30.22602, 'longitude': -97.76184}",[],"{'address1': '3801 S Congress Ave', 'address2'...",+15128933389,(512) 893-3389,9198.419727,$$
2,3sLc73JikDYNwE3vd7-cZw,gourdoughs-big-fat-donuts-austin,Gourdough's Big Fat Donuts,https://s3-media1.fl.yelpcdn.com/bphoto/GKh1IY...,False,https://www.yelp.com/biz/gourdoughs-big-fat-do...,3769,"[{'alias': 'donuts', 'title': 'Donuts'}]",4.5,"{'latitude': 30.24968, 'longitude': -97.75474}","[pickup, delivery]","{'address1': '1503 South 1st St', 'address2': ...",+15129129070,(512) 912-9070,6525.238348,$$
3,HPYRea5mrOFv9VvOdP4AsQ,pie-bar-austin,Pie Bar,https://s3-media2.fl.yelpcdn.com/bphoto/IImh3h...,False,https://www.yelp.com/biz/pie-bar-austin?adjust...,92,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,"{'latitude': 30.16954977494928, 'longitude': -...","[pickup, delivery]","{'address1': '8900 S Congress Ave', 'address2'...",+15125820098,(512) 582-0098,15837.882592,$$
4,hYWsMDz0ms7TOnFTcsxYcw,manolis-ice-cream-pastries-and-cakes-austin,"Manolis Ice Cream, Pastries, & Cakes",https://s3-media2.fl.yelpcdn.com/bphoto/nJYF1s...,False,https://www.yelp.com/biz/manolis-ice-cream-pas...,523,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",5.0,"{'latitude': 30.235612131073413, 'longitude': ...",[delivery],"{'address1': '8907 Circle Dr', 'address2': Non...",+15123875045,(512) 387-5045,17559.809589,$$
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,g67nMRFiR2cJHk2qinbH_w,shan-china-bistro-and-bar-no-title-2,Shan China Bistro and Bar,https://s3-media1.fl.yelpcdn.com/bphoto/pVtUpX...,False,https://www.yelp.com/biz/shan-china-bistro-and...,67,"[{'alias': 'chinese', 'title': 'Chinese'}, {'a...",4.0,"{'latitude': 30.381386, 'longitude': -97.945108}","[delivery, pickup]","{'address1': '1700 Ranch Rd 620 N', 'address2'...",+15122849909,(512) 284-9909,19881.715843,
996,KOesD8BicfGM3ArkbqaTxA,tandoori-lounge-austin,Tandoori Lounge,https://s3-media3.fl.yelpcdn.com/bphoto/-HF7Jy...,False,https://www.yelp.com/biz/tandoori-lounge-austi...,37,"[{'alias': 'indpak', 'title': 'Indian'}]",4.5,"{'latitude': 30.21426, 'longitude': -97.83208}","[delivery, pickup]","{'address1': '3601 W William Canon Dr', 'addre...",+15126084013,(512) 608-4013,12379.078297,$$
997,0B63WxEUmCX0ZehLKfZySA,masa-y-más-austin,Masa y Más,https://s3-media2.fl.yelpcdn.com/bphoto/qs4pef...,False,https://www.yelp.com/biz/masa-y-m%C3%A1s-austi...,117,"[{'alias': 'tacos', 'title': 'Tacos'}]",4.0,"{'latitude': 30.24966, 'longitude': -97.76647}",[],"{'address1': '1817 S Lamar Blvd', 'address2': ...",+15123541655,(512) 354-1655,6227.830911,
998,cm5YHAOL_T1jhSDFRfn3Ww,d-jour-austin-3,D'Jour,https://s3-media4.fl.yelpcdn.com/bphoto/mnis76...,False,https://www.yelp.com/biz/d-jour-austin-3?adjus...,27,"[{'alias': 'catering', 'title': 'Caterers'}, {...",5.0,"{'latitude': 30.263265437947215, 'longitude': ...",[],"{'address1': '', 'address2': '', 'address3': '...",+15126532381,(512) 653-2381,6250.206821,


In [131]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             1000 non-null   object 
 1   alias          1000 non-null   object 
 2   name           1000 non-null   object 
 3   image_url      1000 non-null   object 
 4   is_closed      1000 non-null   bool   
 5   url            1000 non-null   object 
 6   review_count   1000 non-null   int64  
 7   categories     1000 non-null   object 
 8   rating         1000 non-null   float64
 9   coordinates    1000 non-null   object 
 10  transactions   1000 non-null   object 
 11  location       1000 non-null   object 
 12  phone          1000 non-null   object 
 13  display_phone  1000 non-null   object 
 14  distance       1000 non-null   float64
 15  price          690 non-null    object 
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 118.3+ KB


# Converting to .csv.gz

In [132]:
#convert the file to .csv.gz
csv_file = JSON_FILE.replace(".json",".csv.gz")
csv_file
#Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression="gzip", index=False)