# Efficient Yelp API Calls

### Imports

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
#os for saving and loading files
#json - to work with json files
#math - to round up results
#time - to add a short pause to not overwhelm the server
import os, json, math, time

#to make yelpapi calls
from yelpapi import YelpAPI

#progress bar
from tqdm.notebook import tqdm_notebook

In [2]:
!pip install yelpapi
!pip install tqdm



### Importing Yelp API Credentials

In [3]:
#load API credentials
with open('/Users/Daisy/.secret/yelp_api.json','r') as f: #change the path to match YOUR path!!
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [4]:
#Instantiate YelpAPI Variable
yelp = YelpAPI(login['api-key'], timeout_s = 5.0)

### Define Search Terms and File Paths

In [5]:
#set API call parameters and filename prior to fist call
location = 'San Antonio, TX 78213'
term = 'pizza'

In [6]:
location.split(',')[0]

'San Antonio'

In [7]:
#specify folder for saving data
FOLDER = 'Data/'

#creating folder (if exists_ok so you don't get error if exists)
os.makedirs(FOLDER, exist_ok =True)

#specify JSON_FILE filename (can include folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"


In [8]:
JSON_FILE

'Data/San Antonio-pizza.json'

### Checking if Json File Exists and Create it if it doesn't

In [9]:
# check if json file exists
file_exists = os.path.isfile(JSON_FILE)
## If does not exist:
if file_exists == False:
    #create needed folders and get folder name only
    folder = os.path.dirname(JSON_FILE)
    
    # If JSON File included a folder:
    if len(folder)>0:
        os.makedirs(folder, exist_ok = True)
        
    #inform user and save empty list
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    #save the first page of results
    with open(JSON_FILE, 'w') as f:
        json.dump([],f)

# if it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/San Antonio-pizza.json already exists.


### Make first API call and first page of data

In [10]:
#use yelp_api variable's 'search_query' method to perform API call
results = yelp.search_query(location=location,term=term)

In [11]:
#checking type
type(results)

dict

In [12]:
#verifying the keys
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [13]:
#checking how many total results and saving as a variable
total_results = results['total']
total_results

509

In [14]:
#checking how many businesses results
results_perpage = len(results['businesses'])
results_perpage

20

In [15]:
#checking how many pages of results will need rounding up
n_pages = math.ceil(total_results/ results_perpage)
n_pages

26

In [16]:

for i in tqdm_notebook(range(1,n_pages+1)):
    try:
        time.sleep(.2)
        
        #read in results in progress file and check the length
        with open(JSON_FILE, 'r')as f:
            previous_results = json.load(f)
        
        #save # of results to use as offset
        n_results = len(previous_results)
        
        #use n_results as the offset
        results = yelp.search_query(location=location, term=term,
                                     offset = n_results+1)
        
        #append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE,'w') as f:
            json.dump(previous_results,f)
    
    #what to do if we get an error/exemption
    except Exception as e:
        print('[i] ERROR',e)
    

  0%|          | 0/26 [00:00<?, ?it/s]

### Convert JSON file to dataframe

In [17]:
df = pd.read_json(JSON_FILE)

In [18]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,mE8oeT-264q0MRKhVo7j8A,dough-pizzeria-napoletana-san-antonio,Dough Pizzeria Napoletana,https://s3-media2.fl.yelpcdn.com/bphoto/nMZExv...,False,https://www.yelp.com/biz/dough-pizzeria-napole...,1775,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 29.5195143, 'longitude': -98.5069...",[delivery],$$,"{'address1': '6989 Blanco Rd', 'address2': '',...",12109796565,(210) 979-6565,2093.693197
1,Ht62-ZGuv8gzRJ_pO-kV_w,via-313-pizza-gateway-san-antonio,Via 313 Pizza - Gateway,https://s3-media1.fl.yelpcdn.com/bphoto/_sRd4q...,False,https://www.yelp.com/biz/via-313-pizza-gateway...,177,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 29.520115, 'longitude': -98.5745}",[],$$,"{'address1': '8435 Wurzbach Rd', 'address2': '...",12109851313,(210) 985-1313,4420.514542
2,3QZsFaM_sdzQ8uRITu3MwQ,yaghis-pizzeria-san-antonio,Yaghi's Pizzeria,https://s3-media1.fl.yelpcdn.com/bphoto/C2aWKe...,False,https://www.yelp.com/biz/yaghis-pizzeria-san-a...,284,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 29.5272754228422, 'longitude': -9...",[delivery],$,"{'address1': '5910 Babcock Rd', 'address2': 'S...",12105588787,(210) 558-8787,7234.937657
3,UJaura9CFZAvGt-KfJohDQ,poppys-pizza-san-antonio-2,Poppys Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/Yx4Rat...,False,https://www.yelp.com/biz/poppys-pizza-san-anto...,351,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 29.522676, 'longitude': -98.506518}","[pickup, delivery]",$,"{'address1': '7115 Blanco Rd', 'address2': 'St...",12103664000,(210) 366-4000,2160.173616
4,dNQQiYoZUcnl20G3pxUzag,goombas-pizzeria-san-antonio-3,Goomba's Pizzeria,https://s3-media2.fl.yelpcdn.com/bphoto/VpBM-d...,False,https://www.yelp.com/biz/goombas-pizzeria-san-...,231,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 29.5325103526716, 'longitude': -9...","[pickup, delivery]",$,"{'address1': '9825 Ih 10 W', 'address2': '', '...",12105611600,(210) 561-1600,3414.669145


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 508 entries, 0 to 507
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             508 non-null    object 
 1   alias          508 non-null    object 
 2   name           508 non-null    object 
 3   image_url      508 non-null    object 
 4   is_closed      508 non-null    bool   
 5   url            508 non-null    object 
 6   review_count   508 non-null    int64  
 7   categories     508 non-null    object 
 8   rating         508 non-null    float64
 9   coordinates    508 non-null    object 
 10  transactions   508 non-null    object 
 11  price          442 non-null    object 
 12  location       508 non-null    object 
 13  phone          508 non-null    object 
 14  display_phone  508 non-null    object 
 15  distance       508 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 60.2+ KB


### Save file as .csv.gz

In [21]:
#convert from json file to csv.gz file
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/San Antonio-pizza.csv.gz'

In [22]:
#save it as a compressed csv to save space
df.to_csv(csv_file,compression = 'gzip', index=False)