# Import Library

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

# Load Credentials and Create Yelp API Object

In [2]:
# Load API Credentials
with open('/Users/ericakitano/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)

In [3]:
# check the keys stored in the yelp_api.json file (dictionary)
login.keys()

dict_keys(['client-id', 'api-key'])

In [4]:
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

# API Call Parameters (Search Terms)



In [5]:
# set API call parameters 
LOCATION = 'Sunnyvale, CA'
TERM = 'pancakes'

In [6]:
# check
LOCATION.split(',')[0]

'Sunnyvale'

# File Path

In [7]:
## Specify folder for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)


# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{LOCATION.split(',')[0]}-{TERM}.json"

In [8]:
# check
JSON_FILE

'Data/Sunnyvale-pancakes.json'

# Check if Json File exists and Create it if it doesn't

In [9]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:  
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE, 'w') as f:
          json.dump([], f)
        
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Sunnyvale-pancakes.json not found. Saving empty list to file.


# Make the first API call to get the first page of data

In [10]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(term = TERM, location = LOCATION)

In [11]:
# check the dtype of "results"
type(results)

dict

In [12]:
# check the keys in "results" 
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [13]:
# check the total number of results
results['total']

963

In [14]:
# Place the results' businesses data into a dataframe
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,ZpkgFuJ3CFKgNZTObTDw6Q,hanabusa-café-sunnyvale,Hanabusa Café,https://s3-media1.fl.yelpcdn.com/bphoto/TO3kER...,False,https://www.yelp.com/biz/hanabusa-caf%C3%A9-su...,599,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",3.5,"{'latitude': 37.378639, 'longitude': -122.04662}",[],$$,"{'address1': '201 S Mary Ave', 'address2': 'St...",,,2363.696746
1,TANWKEUxyZvNnWu_Yv0ARg,the-breakfast-club-san-jose,The Breakfast Club,https://s3-media3.fl.yelpcdn.com/bphoto/Yhf4z3...,False,https://www.yelp.com/biz/the-breakfast-club-sa...,3696,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 37.323197697534354, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '1432 W San Carlos St', 'address2...",14085647150.0,(408) 564-7150,12132.233897
2,hl2yriEDrXYTj1XFjaImGg,fambrinis-cafe-palo-alto,Fambrini's Cafe,https://s3-media2.fl.yelpcdn.com/bphoto/JaBB-P...,False,https://www.yelp.com/biz/fambrinis-cafe-palo-a...,992,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 37.42404, 'longitude': -122.14513}","[delivery, pickup]",$$,"{'address1': '2500 El Camino Real', 'address2'...",16508581268.0,(650) 858-1268,11370.99053
3,wNLTGsHB8eNv0itXv-CpXA,bloom-santa-clara,Bloom,https://s3-media4.fl.yelpcdn.com/bphoto/BJYm0E...,False,https://www.yelp.com/biz/bloom-santa-clara?adj...,999,"[{'alias': 'tradamerican', 'title': 'American ...",4.5,"{'latitude': 37.32831, 'longitude': -121.9661}","[delivery, pickup]",$$,"{'address1': '202 Saratoga Ave', 'address2': '...",14083452667.0,(408) 345-2667,8515.446126
4,ngfWoe6BTj57tS6PkI-wqA,holders-country-inn-cupertino-cupertino,Holder's Country Inn - Cupertino,https://s3-media2.fl.yelpcdn.com/bphoto/88p4yS...,False,https://www.yelp.com/biz/holders-country-inn-c...,778,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 37.32491464630353, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '10088 N Wolfe Rd', 'address2': '...",14082442798.0,(408) 244-2798,7263.086286
5,MBsmVkpfos-50Ai9XBnraQ,mochi-waffle-corner-mountain-view-2,Mochi Waffle Corner,https://s3-media3.fl.yelpcdn.com/bphoto/vJU4KK...,False,https://www.yelp.com/biz/mochi-waffle-corner-m...,75,"[{'alias': 'waffles', 'title': 'Waffles'}, {'a...",4.5,"{'latitude': 37.37566, 'longitude': -122.06196}","[delivery, pickup]",$,"{'address1': '805 E El Camino Real', 'address2...",16505673737.0,(650) 567-3737,3732.298367
6,f37oYcZBET2K2B-gYlpB-g,uncle-johns-pancake-house-winchester-campbell,Uncle John's Pancake House - Winchester,https://s3-media1.fl.yelpcdn.com/bphoto/enyQ6N...,False,https://www.yelp.com/biz/uncle-johns-pancake-h...,1214,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 37.2852098196745, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '2125 S Winchester Blvd', 'addres...",14087249835.0,(408) 724-9835,13310.868328
7,xKIvht45p4ANXI9SQIu6Qg,lele-cake-los-gatos,Lele Cake,https://s3-media3.fl.yelpcdn.com/bphoto/56Oug-...,False,https://www.yelp.com/biz/lele-cake-los-gatos?a...,249,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 37.23602248578784, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '14178 Blossom Hill Rd', 'address...",16692916986.0,(669) 291-6986,19632.872239
8,F3Pycq08OR10mUYizSSlgQ,uncle-johns-pancake-house-the-alameda-san-jose,Uncle John's Pancake House - The Alameda,https://s3-media2.fl.yelpcdn.com/bphoto/gUQahd...,False,https://www.yelp.com/biz/uncle-johns-pancake-h...,406,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.5,"{'latitude': 37.33153, 'longitude': -121.91265}","[delivery, pickup]",$$,"{'address1': '1205 The Alameda', 'address2': '...",14088994071.0,(408) 899-4071,11763.895798
9,qD3nKDbli6uwkuouHAsDRQ,sweet-maple-palo-alto,Sweet Maple,https://s3-media4.fl.yelpcdn.com/bphoto/TyZlv4...,False,https://www.yelp.com/biz/sweet-maple-palo-alto...,745,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 37.443992, 'longitude': -122.163031}",[restaurant_reservation],$$,"{'address1': '150 University Ave', 'address2':...",16505210764.0,(650) 521-0764,13693.70216


In [15]:
## Check how many results were returned in my initial API call
results_per_page = len(results['businesses'])
results_per_page

20

In [16]:
# Calculate how many pages of results needed to cover the total_results
n_pages = math.ceil((results['total'])/ results_per_page)
n_pages

49

# Loop to obtain all results for the search terms

In [18]:
for i in tqdm_notebook(range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try:
        
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = yelp_api.search_query(location = LOCATION, term = TERM,
                                   offset = n_results+1)

        ## append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)


            
    ## What to do if we get an error/exception.
    except Exception as e:
        print(' [!] ERROR', e)

  0%|          | 0/49 [00:00<?, ?it/s]

# Open the Final JSON File with Pandas

In [19]:
df = pd.read_json(JSON_FILE)

In [20]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,TANWKEUxyZvNnWu_Yv0ARg,the-breakfast-club-san-jose,The Breakfast Club,https://s3-media3.fl.yelpcdn.com/bphoto/Yhf4z3...,False,https://www.yelp.com/biz/the-breakfast-club-sa...,3696,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 37.323197697534354, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '1432 W San Carlos St', 'address2...",14085647150,(408) 564-7150,12132.233897
1,hl2yriEDrXYTj1XFjaImGg,fambrinis-cafe-palo-alto,Fambrini's Cafe,https://s3-media2.fl.yelpcdn.com/bphoto/JaBB-P...,False,https://www.yelp.com/biz/fambrinis-cafe-palo-a...,992,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 37.42404, 'longitude': -122.14513}","[delivery, pickup]",$$,"{'address1': '2500 El Camino Real', 'address2'...",16508581268,(650) 858-1268,11370.99053
2,wNLTGsHB8eNv0itXv-CpXA,bloom-santa-clara,Bloom,https://s3-media4.fl.yelpcdn.com/bphoto/BJYm0E...,False,https://www.yelp.com/biz/bloom-santa-clara?adj...,999,"[{'alias': 'tradamerican', 'title': 'American ...",4.5,"{'latitude': 37.32831, 'longitude': -121.9661}","[delivery, pickup]",$$,"{'address1': '202 Saratoga Ave', 'address2': '...",14083452667,(408) 345-2667,8515.446126
3,ngfWoe6BTj57tS6PkI-wqA,holders-country-inn-cupertino-cupertino,Holder's Country Inn - Cupertino,https://s3-media2.fl.yelpcdn.com/bphoto/88p4yS...,False,https://www.yelp.com/biz/holders-country-inn-c...,778,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 37.32491464630353, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '10088 N Wolfe Rd', 'address2': '...",14082442798,(408) 244-2798,7263.086286
4,MBsmVkpfos-50Ai9XBnraQ,mochi-waffle-corner-mountain-view-2,Mochi Waffle Corner,https://s3-media3.fl.yelpcdn.com/bphoto/vJU4KK...,False,https://www.yelp.com/biz/mochi-waffle-corner-m...,75,"[{'alias': 'waffles', 'title': 'Waffles'}, {'a...",4.5,"{'latitude': 37.37566, 'longitude': -122.06196}","[delivery, pickup]",$,"{'address1': '805 E El Camino Real', 'address2...",16505673737,(650) 567-3737,3732.298367


In [21]:
# check the dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 962 entries, 0 to 961
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             962 non-null    object 
 1   alias          962 non-null    object 
 2   name           962 non-null    object 
 3   image_url      962 non-null    object 
 4   is_closed      962 non-null    bool   
 5   url            962 non-null    object 
 6   review_count   962 non-null    int64  
 7   categories     962 non-null    object 
 8   rating         962 non-null    float64
 9   coordinates    962 non-null    object 
 10  transactions   962 non-null    object 
 11  price          890 non-null    object 
 12  location       962 non-null    object 
 13  phone          962 non-null    object 
 14  display_phone  962 non-null    object 
 15  distance       962 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 113.8+ KB


In [22]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Sunnyvale-pancakes.csv.gz'

In [23]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)