In [2]:
!pip install YelpAPI

Collecting YelpAPI
  Downloading yelpapi-2.5.0-py3-none-any.whl (7.4 kB)
Installing collected packages: YelpAPI
Successfully installed YelpAPI-2.5.0


In [5]:
# imports

import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
# import json to read yelp api credentials
import json

# with open: yelp api credentials (save as variable)
with open('.secret/yelp_api.json') as f:
    login = json.load(f)

login.keys()


dict_keys(['client-id', 'api-key'])

In [7]:
# import yelpapi class
from yelpapi import YelpAPI

# create instance with api key
yelp_api = YelpAPI(login['api-key'], timeout_s = 5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x10471eb80>

In [8]:
# define location and term
LOCATION = 'Los Angeles, CA'
TERM = 'Korean'

In [9]:
# specify folder to save data in
FOLDER = 'Data/'

# make folder (if already exists, okay)
os.makedirs(FOLDER, exist_ok = True)

# specify JSON_FILE to save results to
JSON_FILE = FOLDER+f"results_in_progress_{LOCATION.split(',')[0]}_{TERM}.json"
JSON_FILE

'Data/results_in_progress_Los Angeles_Korean.json'

In [10]:
# define function to create and save an empty JSON file
# use parameter delete_if_exists = True if want to start over
# (if there is a previous file)
def create_json_file(JSON_FILE, delete_if_exists = False):
    
    # check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    # if it DOES exist:
    if file_exists == True:
        
        # check if user wants to delete if exists
        if delete_if_exists == True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file.")
            
            # delete previous file and confirm it no longer exists
            os.remove(JSON_FILE)
            
            # recursive call to function after old file is deleted
            create_json_file(JSON_FILE, delete_if_exists = False)
    
    # if it does NOT exist:
    else:
        
        # inform user and save empty list
        print(f"[!] {JSON_FILE} not found. Saving empty list to new file.")
        
        # create any needed folders specified in the JSON_FILE path
        # get the folder name
        folder = os.path.dirname(JSON_FILE)
        
        # if JSON_FILE path included a folder:
        if len(folder) > 0:
            
            # create the folder
            os.makedirs(folder, exist_ok = True)
            
            # save empty list to start the json file
            with open(JSON_FILE, 'w') as f:
                json.dump([], f)

In [11]:
# create new empty json file (delete if already exists)
create_json_file(JSON_FILE, delete_if_exists = True)


[!] Data/results_in_progress_Los Angeles_Korean.json not found. Saving empty list to new file.


In [12]:
# load previous results and use length of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
previous_results

[]

In [13]:
# set offset based on previous results
n_results = len(previous_results)

print(f"{n_results} previous results found.")

0 previous results found.


In [14]:
# use search_query to perform api call
results = yelp_api.search_query(location = LOCATION,
                               term = TERM,
                               offset = n_results)

# how many results total?
total_results = results['total']
total_results

4400

In [15]:
# how many results did we get in api call?
results_per_page = len(results['businesses'])
results_per_page

20

In [16]:
# calculate number of calls needed for total results
n_pages = math.ceil((results['total'] - n_results) / results_per_page)
n_pages

220

In [17]:
# loop to get all results by calling api multiple times
for i in tqdm_notebook(range(1, n_pages + 1)):
    
    # read in results in progress and check length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
        
    # save number of results to use as offset
    n_results = len(previous_results)
    
    # stop loop if called over 1000 times
    if (n_results + results_per_page) > 1000:
        print("Exceeded 1000 API calls. Stopping loop.")
        break
        
    # call api, use n_results as the offset
    results = yelp_api.search_query(location = LOCATION,
                                   term = TERM,
                                   offset = n_results)
    
    # append new results to previous results and save
    previous_results.extend(results['businesses'])
    
    # display previous results
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results, f)
        
    # pause
    time.sleep(0.2)


  0%|          | 0/220 [00:00<?, ?it/s]

Exceeded 1000 API calls. Stopping loop.


In [18]:
# convert results in progress json file to df
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,h1R2iKYdm2lwukzMJvJqDw,hangari-kalguksu-los-angeles-4,Hangari Kalguksu,https://s3-media3.fl.yelpcdn.com/bphoto/X_U65O...,False,https://www.yelp.com/biz/hangari-kalguksu-los-...,2442,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.5,"{'latitude': 34.0628602582049, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '3470 W 6th St', 'address2': 'Ste...",12133882326,(213) 388-2326,11451.915903
1,3C2wUqSkpY4-N0O5ArVjwQ,yuk-dae-jang-los-angeles-2,Yuk Dae Jang,https://s3-media2.fl.yelpcdn.com/bphoto/5y-hXE...,False,https://www.yelp.com/biz/yuk-dae-jang-los-ange...,720,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.5,"{'latitude': 34.0640411376953, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '3033 W 6th St', 'address2': 'Ste...",12133521331,(213) 352-1331,12305.727721
2,uzAbw27XQTXTivjgf2bN2w,han-bat-sul-lung-tang-los-angeles-2,Han Bat Sul Lung Tang,https://s3-media2.fl.yelpcdn.com/bphoto/YttPox...,False,https://www.yelp.com/biz/han-bat-sul-lung-tang...,2695,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.5,"{'latitude': 34.065414, 'longitude': -118.3095...",[delivery],$$,"{'address1': '4163 W 5th St', 'address2': '', ...",12133839499,(213) 383-9499,10588.083046
3,KtEMG1Aln3vQzte92QJxlw,genwa-korean-bbq-mid-wilshire-los-angeles,Genwa Korean BBQ Mid Wilshire,https://s3-media2.fl.yelpcdn.com/bphoto/n6VHst...,False,https://www.yelp.com/biz/genwa-korean-bbq-mid-...,3073,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.5,"{'latitude': 34.062321315088, 'longitude': -11...","[delivery, pickup]",$$$,"{'address1': '5115 Wilshire Blvd', 'address2':...",13235490760,(323) 549-0760,7958.053069
4,0SerWqGwzNWpTA2RBLVdUg,sun-nong-dan-los-angeles-4,Sun Nong Dan,https://s3-media3.fl.yelpcdn.com/bphoto/wGqvSZ...,False,https://www.yelp.com/biz/sun-nong-dan-los-ange...,2293,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.0,"{'latitude': 34.0630233116146, 'longitude': -1...","[delivery, pickup]",$$$,"{'address1': '3470 W 6th St', 'address2': 'Ste...",12133650303,(213) 365-0303,11461.989315


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,q_NvC1kq3P3RCYlBiFVjDw,mitsuwa-marketplace-northridge-3,Mitsuwa Marketplace,https://s3-media2.fl.yelpcdn.com/bphoto/_QbDph...,False,https://www.yelp.com/biz/mitsuwa-marketplace-n...,77,"[{'alias': 'intlgrocery', 'title': 'Internatio...",3.5,"{'latitude': 34.232888, 'longitude': -118.553025}",[],$$,"{'address1': '8940 Tampa Ave', 'address2': '',...",18186269373,(818) 626-9373,27034.103145
996,rA5Af_n9bG32dVUn6mEb_g,sojuya-cypress-2,Sojuya,https://s3-media2.fl.yelpcdn.com/bphoto/kye1m-...,False,https://www.yelp.com/biz/sojuya-cypress-2?adju...,446,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 33.831278, 'longitude': -118.05494}",[delivery],$$,"{'address1': '4498 Lincoln Ave', 'address2': '...",17148285959,(714) 828-5959,38961.066837
997,oK3If9fgoMhmmHPpMwqolg,w-brazilian-steakhouse-los-angeles,W Brazilian Steakhouse,https://s3-media1.fl.yelpcdn.com/bphoto/Aq7gUc...,False,https://www.yelp.com/biz/w-brazilian-steakhous...,141,"[{'alias': 'brazilian', 'title': 'Brazilian'},...",4.5,"{'latitude': 34.07441, 'longitude': -118.30931}","[delivery, pickup]",$$$,"{'address1': '147 N Western Ave', 'address2': ...",13233807352,(323) 380-7352,11111.771894
998,AyCE1u1Vfxpw_lXr1Te1uw,lab-88-el-monte-5,Lab 88,https://s3-media2.fl.yelpcdn.com/bphoto/WEwIjU...,False,https://www.yelp.com/biz/lab-88-el-monte-5?adj...,278,"[{'alias': 'lounges', 'title': 'Lounges'}, {'a...",4.5,"{'latitude': 34.09166083988975, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '10602 Lower Azusa Rd', 'address2...",16266720034,(626) 672-0034,35212.873772
999,H0eQSsPSEyh8--KaU3x3Dw,shik-do-rak-irvine,Shik Do Rak,https://s3-media2.fl.yelpcdn.com/bphoto/OZMql9...,False,https://www.yelp.com/biz/shik-do-rak-irvine?ad...,1795,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",3.5,"{'latitude': 33.68806, 'longitude': -117.77158}","[delivery, pickup]",$$,"{'address1': '14805 Jeffrey Rd', 'address2': '...",19496537668,(949) 653-7668,69619.615148


In [19]:
# check for duplicates of ids
final_df.duplicated(subset = 'id').sum()

0

In [20]:
# drop duplicates
final_df = final_df.drop_duplicates(subset = 'id')

# check
final_df.duplicated(subset = 'id').sum()

0

In [21]:
# save final results to csv
final_df.to_csv('Data/final_results_Los-Angeles_Korean.csv',
               index = False)