# <center><u>Efficient Yelp API Calls (Core)</u>
* Authored By: Eric N. Valdez
* Date: 1/22/2024

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# additional imports
import json, os, math, time

# to make yelpapi calls
from yelpapi import YelpAPI

#progress bar from tqdm_notebook
from tqdm.notebook import tqdm_notebook

In [2]:
!pip install yelpapi
!pip install tqdm



## <u>Load Credentials and Create Yelp API Object

In [3]:
# Load API Credentials
with open ('/Users/Valde/.secret/yelp_api.json', 'r') as f:
    login = json.load(f)

In [4]:
login.keys()

dict_keys(['client-id', 'api-key'])

In [9]:
# login.items()
# login['api-key']

'sAM-z6jxwvNnaowWcZeLz55gNMgXaLor4PpDJQ_wmeid29pENW6sAoT-agduSnu8ylcW5WGw4VTjWCnjjAK2YPHA2dMr2hOPbv3PmT4Rmgy2n7Xf5Cd5LpyPd1tAZXYx'

In [10]:
# Instantiate YelpAPI Variable
yelp = YelpAPI(login['api-key'], timeout_s = 5.0)

## <u>Define Search Term and File Paths

In [11]:
# Set our API call Parameters and filename before the first call
location = 'Phillips Ranch, CA 91766'
term = 'sushi'

In [14]:
location.split(',') [0]

'Phillips Ranch'

In [17]:
# Specify folder for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"

In [18]:
JSON_FILE

'Data/Phillips Ranch-sushi.json'

## <u>Check if Json File exists and Create it if it doesn't

In [20]:
# Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)

# If it does not exist: 
if file_exists == False:
    
    # CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    # If JSON_FILE included a folder:
    if len(folder)>0:
        # Create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    # INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    # save the first page of results
    with open(JSON_FILE, 'w') as f:
        json.dump([], f)
        
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Phillips Ranch-sushi.json already exists.


## <u>Load JSON_FILE and account for previous resultstime?

### Make the first API call to get the first page of data
* We will use this first result to check:
    * How many total results there are?
    * Where is the actual data we want to save?
    * How many results do we get at a time?

In [21]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp.search_query(term = term, location = location)

In [22]:
type(results)

dict

In [23]:
len(results)

3

In [24]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [25]:
results['total']

548

In [26]:
results['region']

{'center': {'longitude': -117.74734497070312, 'latitude': 34.061718406896176}}

In [27]:
results['businesses']

[{'id': 'JlN5BhjbErkp6myx-8Adkw',
  'alias': 'kazama-sushi-claremont',
  'name': 'Kazama Sushi',
  'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/Rl8QJx9lKwOdXI1j-2AemQ/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/kazama-sushi-claremont?adjust_creative=7c6m2EFuzdcQsaqbzKeHUw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=7c6m2EFuzdcQsaqbzKeHUw',
  'review_count': 1482,
  'categories': [{'alias': 'sushi', 'title': 'Sushi Bars'},
   {'alias': 'japanese', 'title': 'Japanese'},
   {'alias': 'beerbar', 'title': 'Beer Bar'}],
  'rating': 4.5,
  'coordinates': {'latitude': 34.095166, 'longitude': -117.719514},
  'transactions': ['delivery'],
  'price': '$$',
  'location': {'address1': '101 N Indian Hill Blvd',
   'address2': 'Ste C1-104',
   'address3': '',
   'city': 'Claremont',
   'zip_code': '91711',
   'country': 'US',
   'state': 'CA',
   'display_address': ['101 N Indian Hill Blvd',
    'Ste C1-104',
    'Claremont, CA 91711']},
  'phon

In [29]:
# How many results total?
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,JlN5BhjbErkp6myx-8Adkw,kazama-sushi-claremont,Kazama Sushi,https://s3-media1.fl.yelpcdn.com/bphoto/Rl8QJx...,False,https://www.yelp.com/biz/kazama-sushi-claremon...,1482,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 34.095166, 'longitude': -117.719514}",[delivery],$$,"{'address1': '101 N Indian Hill Blvd', 'addres...",19094502505,(909) 450-2505,4516.914865
1,eh26nTBs70zvxlL8mFOq6g,roll-to-go-sushi-pomona-3,Roll To Go Sushi,https://s3-media2.fl.yelpcdn.com/bphoto/dVd3sW...,False,https://www.yelp.com/biz/roll-to-go-sushi-pomo...,2,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",5.0,"{'latitude': 34.06199689430613, 'longitude': -...","[pickup, delivery]",,"{'address1': '642 E Holt Ave', 'address2': '',...",16264782188,(626) 478-2188,844.554815
2,v4mgrnnrqMCCEyLnzd5tgw,spicy-fish-pomona-2,Spicy Fish,https://s3-media2.fl.yelpcdn.com/bphoto/3-fr-k...,False,https://www.yelp.com/biz/spicy-fish-pomona-2?a...,160,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 34.05437, 'longitude': -117.76271}",[delivery],$,"{'address1': '1084 W Mission Blvd', 'address2'...",19097668254,(909) 766-8254,1634.369337
3,n49shr4AyXEdZHwrM-kjgQ,kisetsu-restaurant-claremont,Kisetsu Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/8hhBo3...,False,https://www.yelp.com/biz/kisetsu-restaurant-cl...,68,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 34.1063217, 'longitude': -117.732...",[],$$$,"{'address1': '950 W Foothill Blvd', 'address2'...",19096256250,(909) 625-6250,5134.57794
4,KCsnVV2LkHqNV7wu5r4FZw,heemo-sushi-chino-hills-chino-hills,Heemo Sushi-Chino Hills,https://s3-media2.fl.yelpcdn.com/bphoto/hYBwPV...,False,https://www.yelp.com/biz/heemo-sushi-chino-hil...,159,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 34.01625920335954, 'longitude': -...",[],$$,"{'address1': '12959 Peyton Dr', 'address2': No...",19092229331,(909) 222-9331,5058.071402
5,ySzi-7ZESSMMteQQmHXHDA,kaya-asian-cuisine-pomona,Kaya Asian Cuisine,https://s3-media2.fl.yelpcdn.com/bphoto/n43zul...,False,https://www.yelp.com/biz/kaya-asian-cuisine-po...,358,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 34.05398, 'longitude': -117.77543}","[pickup, delivery]",$$,"{'address1': '1640 W Mission Blvd', 'address2'...",19096222689,(909) 622-2689,2859.324205
6,NjgpL_T9TmEHxWVv2-RH5A,culichi-roll-pomona-3,Culichi Roll,https://s3-media4.fl.yelpcdn.com/bphoto/3joqVy...,False,https://www.yelp.com/biz/culichi-roll-pomona-3...,272,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 34.06270300956817, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '171 E Holt Ave', 'address2': 'St...",19097668006,(909) 766-8006,150.753304
7,4JWqfEnsDsRS3BeH0uyliw,sushi-one-spot-montclair-3,Sushi One Spot,https://s3-media4.fl.yelpcdn.com/bphoto/fKiaLM...,False,https://www.yelp.com/biz/sushi-one-spot-montcl...,306,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 34.083119, 'longitude': -117.696473}",[],$$,"{'address1': '9405 Monte Vista Ave', 'address2...",19096260070,(909) 626-0070,5255.51553
8,CxVU-Pap9AVkHTSZGPOlAQ,senor-sushi-pomona-2,Senor Sushi,https://s3-media2.fl.yelpcdn.com/bphoto/_AVDH_...,False,https://www.yelp.com/biz/senor-sushi-pomona-2?...,114,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",4.0,"{'latitude': 34.0553180510785, 'longitude': -1...",[],$$,"{'address1': '101 W Mission Blvd', 'address2':...",19094616412,(909) 461-6412,766.212414
9,bM61lLChoc5_5x2gyWXu4Q,oni-sushi-claremont-2,Oni Sushi,https://s3-media3.fl.yelpcdn.com/bphoto/uy9dxS...,False,https://www.yelp.com/biz/oni-sushi-claremont-2...,328,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 34.09686, 'longitude': -117.71889}","[pickup, delivery]",$$,"{'address1': '300 N Indian Hill Blvd', 'addres...",19096212772,(909) 621-2772,4686.277205


* Where is the actual data we want to save?

In [30]:
# How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

* Calculate how many pages of results needed to cover the total_results

In [31]:
(results['total'])/results_per_page

27.4

In [32]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total'])/results_per_page)
n_pages

28

In [34]:
for i in tqdm_notebook(range(1,n_pages+1)):
    # The block of code we want to TRY to run
    try:

        

        time.sleep(.2)


    
        # Read in results in progress file and check the length
        with open (JSON_FILE, 'r') as f:
            previous_results = json.load(f)
            
        # Save number of results for to use as offset
        n_results = len(previous_results)
        
        
        # Use n_results as the OFFSET 
        results = yelp.search_query(location = location, term = term,
                                   offset = n_results+1)

        # Append new results and save to file
        previous_results.extend(results['businesses'])

        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)



            
    # What to do if we get an error/exception.
    except Exception as e:
        print(' [!] ERROR', e)
        

  0%|          | 0/28 [00:00<?, ?it/s]

## <u>Open the Final JSON File with Pandas

In [36]:
df = pd.read_json(JSON_FILE)

In [37]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,eh26nTBs70zvxlL8mFOq6g,roll-to-go-sushi-pomona-3,Roll To Go Sushi,https://s3-media2.fl.yelpcdn.com/bphoto/dVd3sW...,False,https://www.yelp.com/biz/roll-to-go-sushi-pomo...,2,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",5.0,"{'latitude': 34.06199689430613, 'longitude': -...","[delivery, pickup]","{'address1': '642 E Holt Ave', 'address2': '',...",16264782188,(626) 478-2188,844.554815,
1,v4mgrnnrqMCCEyLnzd5tgw,spicy-fish-pomona-2,Spicy Fish,https://s3-media2.fl.yelpcdn.com/bphoto/3-fr-k...,False,https://www.yelp.com/biz/spicy-fish-pomona-2?a...,160,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 34.05437, 'longitude': -117.76271}",[delivery],"{'address1': '1084 W Mission Blvd', 'address2'...",19097668254,(909) 766-8254,1634.369337,$
2,n49shr4AyXEdZHwrM-kjgQ,kisetsu-restaurant-claremont,Kisetsu Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/8hhBo3...,False,https://www.yelp.com/biz/kisetsu-restaurant-cl...,68,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 34.1063217, 'longitude': -117.732...",[],"{'address1': '950 W Foothill Blvd', 'address2'...",19096256250,(909) 625-6250,5134.57794,$$$
3,KCsnVV2LkHqNV7wu5r4FZw,heemo-sushi-chino-hills-chino-hills,Heemo Sushi-Chino Hills,https://s3-media2.fl.yelpcdn.com/bphoto/hYBwPV...,False,https://www.yelp.com/biz/heemo-sushi-chino-hil...,159,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 34.01625920335954, 'longitude': -...",[],"{'address1': '12959 Peyton Dr', 'address2': No...",19092229331,(909) 222-9331,5058.071402,$$
4,ySzi-7ZESSMMteQQmHXHDA,kaya-asian-cuisine-pomona,Kaya Asian Cuisine,https://s3-media2.fl.yelpcdn.com/bphoto/n43zul...,False,https://www.yelp.com/biz/kaya-asian-cuisine-po...,358,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 34.05398, 'longitude': -117.77543}","[delivery, pickup]","{'address1': '1640 W Mission Blvd', 'address2'...",19096222689,(909) 622-2689,2859.324205,$$


In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 547 entries, 0 to 546
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             547 non-null    object 
 1   alias          547 non-null    object 
 2   name           547 non-null    object 
 3   image_url      547 non-null    object 
 4   is_closed      547 non-null    bool   
 5   url            547 non-null    object 
 6   review_count   547 non-null    int64  
 7   categories     547 non-null    object 
 8   rating         547 non-null    float64
 9   coordinates    547 non-null    object 
 10  transactions   547 non-null    object 
 11  location       547 non-null    object 
 12  phone          547 non-null    object 
 13  display_phone  547 non-null    object 
 14  distance       547 non-null    float64
 15  price          492 non-null    object 
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 64.8+ KB


In [39]:
# convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json', '.csv.gz')
csv_file

'Data/Phillips Ranch-sushi.csv.gz'

In [40]:
# Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)

## Compare filesize with os module's os.path.getsize

In [41]:
size_json = os.path.getsize(JSON_FILE)
size_csv_gz = os.path.getsize(JSON_FILE.replace('.json','.csv.gz'))

print(f'JSON FILE: {size_json:,} Bytes')
print(f'CSV.GZ FILE: {size_csv_gz:,} Bytes')

print(f'the csv.gz is {size_json/size_csv_gz} times smaller!')

JSON FILE: 542,424 Bytes
CSV.GZ FILE: 80,203 Bytes
the csv.gz is 6.7631385359649885 times smaller!
