In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
from yelpapi import YelpAPI

In [3]:
with open('/Users/benja/.secret/yelp_api.json') as f: #change the path to match YOUR path!!
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [4]:
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1bb27f07820>

In [5]:
# set our API call parameters 
LOCATION = 'Minneapolis, MN'
TERM = 'Sushi'

In [6]:
# Specifying JSON_FILE filename 
YELP_FILE = "Data/results_in_progress_mpls_sushi.json"
YELP_FILE

'Data/results_in_progress_mpls_sushi.json'

In [7]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(YELP_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(YELP_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {YELP_FILE} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(YELP_FILE,'w') as f:
        json.dump([],f)  
# If it exists, inform user
else:
    print(f"[i] {YELP_FILE} already exists.")

[i] Data/results_in_progress_mpls_sushi.json already exists.


In [8]:
## Load previous results and use len of results for offset
with open(YELP_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [9]:
# use yelp_api variable's search_query method to perform API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [10]:
results['total']

416

In [11]:
mpls_sushi = pd.DataFrame(results['businesses'])
mpls_sushi.head(2) 

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,IQpWgpVj6Z4BzvcdATBI_Q,momo-sushi-minneapolis-2,Momo Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/TwKhmv...,False,https://www.yelp.com/biz/momo-sushi-minneapoli...,246,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 45.00734, 'longitude': -93.24697}","[pickup, delivery]",$$,"{'address1': '1839 Central Ave NE', 'address2'...",16127899190,(612) 789-9190,5765.263936
1,AsYyD4Ya27UNqT5iFvCMaw,kataki-minneapolis,Kataki,https://s3-media1.fl.yelpcdn.com/bphoto/UL12kW...,False,https://www.yelp.com/biz/kataki-minneapolis?ad...,49,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.5,"{'latitude': 44.886911, 'longitude': -93.277679}","[pickup, delivery]",$$,"{'address1': '6401 Nicollet Ave', 'address2': ...",16128665034,(612) 866-5034,8804.665838


In [12]:
## how many businesses in our results
len(results['businesses'])

20

In [13]:
# add offset to our original api call
search_results = yelp_api.search_query(location='Minneapolis, MN',
                                       term='Sushi',
                                       offset = 20)

In [14]:
mpls_sushi20 = pd.DataFrame(search_results['businesses'])
mpls_sushi20.head(2) 

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,3hS2Oyi0pBQKUatLWZzO3A,ten-sushi-japanese-restaurant-maple-grove,Ten Sushi Japanese Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/Oavsu_...,False,https://www.yelp.com/biz/ten-sushi-japanese-re...,219,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 45.12677, 'longitude': -93.487985}","[delivery, pickup]",$$,"{'address1': '16362 County Rd 30', 'address2':...",17637105522,(763) 710-5522,23710.356077
1,Ri9rdddF6ma4CGedCQkJkg,kkinaco-nikkei-and-pisco-bar-hopkins,K'kinaco Nikkei & Pisco Bar,https://s3-media3.fl.yelpcdn.com/bphoto/Wk9Gt-...,False,https://www.yelp.com/biz/kkinaco-nikkei-and-pi...,46,"[{'alias': 'peruvian', 'title': 'Peruvian'}, {...",4.5,"{'latitude': 44.92423172068961, 'longitude': -...",[],,"{'address1': '922 Mainstreet', 'address2': '',...",19527379866,(952) 737-9866,10656.781754


In [15]:
## concatenate the previous results and new results. 
businesses = pd.concat([mpls_sushi, mpls_sushi20],
                      ignore_index=True)
display(businesses.head(3), businesses.tail(3))

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,IQpWgpVj6Z4BzvcdATBI_Q,momo-sushi-minneapolis-2,Momo Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/TwKhmv...,False,https://www.yelp.com/biz/momo-sushi-minneapoli...,246,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 45.00734, 'longitude': -93.24697}","[pickup, delivery]",$$,"{'address1': '1839 Central Ave NE', 'address2'...",16127899190,(612) 789-9190,5765.263936
1,AsYyD4Ya27UNqT5iFvCMaw,kataki-minneapolis,Kataki,https://s3-media1.fl.yelpcdn.com/bphoto/UL12kW...,False,https://www.yelp.com/biz/kataki-minneapolis?ad...,49,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.5,"{'latitude': 44.886911, 'longitude': -93.277679}","[pickup, delivery]",$$,"{'address1': '6401 Nicollet Ave', 'address2': ...",16128665034,(612) 866-5034,8804.665838
2,mTnoCM3BrLttWb7m9P5SQQ,wakame-sushi-and-asian-bistro-minneapolis,Wakame Sushi & Asian Bistro,https://s3-media3.fl.yelpcdn.com/bphoto/U6IaCo...,False,https://www.yelp.com/biz/wakame-sushi-and-asia...,857,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 44.946845, 'longitude': -93.322218}",[],$$,"{'address1': '3070 Excelsior Blvd', 'address2'...",16128862484,(612) 886-2484,3250.890712


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
37,nDIYo73lwqlojmOP79ui3w,sapporo-shakopee,Sapporo,https://s3-media2.fl.yelpcdn.com/bphoto/p03bX0...,False,https://www.yelp.com/biz/sapporo-shakopee?adju...,58,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 44.78176357813101, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '1114 Vierling Dr E', 'address2':...",19522338800.0,(952) 233-8800,26741.875041
38,n-YJcCcqEteJwGD5WqihtA,masu-sushi-and-robata-minneapolis,Masu Sushi & Robata,https://s3-media1.fl.yelpcdn.com/bphoto/4OOyPP...,False,https://www.yelp.com/biz/masu-sushi-and-robata...,494,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",3.5,"{'latitude': 44.9883318186803, 'longitude': -9...","[delivery, pickup]",$$,"{'address1': '330 E Hennepin Ave', 'address2':...",16123326278.0,(612) 332-6278,3759.943803
39,H06C8jnpYxuKdHSgWv_TUQ,wave-sushi-minneapolis,Wave Sushi,https://s3-media2.fl.yelpcdn.com/bphoto/A9zOAR...,False,https://www.yelp.com/biz/wave-sushi-minneapoli...,4,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",4.0,"{'latitude': 44.9772343, 'longitude': -93.2670...",[],,"{'address1': '200 S 6th St', 'address2': '', '...",,,2245.149435


In [16]:
def create_json_file(YELP_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(YELP_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {YELP_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(YELP_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(YELP_FILE,delete_if_exists=False)
        else:
            print(f"[i] {YELP_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {YELP_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(YELP_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(YELP_FILE,'w') as f:
            json.dump([],f)  

In [18]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(YELP_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(YELP_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[!] Data/results_in_progress_mpls_sushi.json already exists. Deleting previous file...
[i] Data/results_in_progress_mpls_sushi.json not found. Saving empty list to new file.
- 0 previous results found.


21

In [19]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(YELP_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(YELP_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/21 [00:00<?, ?it/s]

In [20]:
# load final results
final_df = pd.read_json(YELP_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,IQpWgpVj6Z4BzvcdATBI_Q,momo-sushi-minneapolis-2,Momo Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/TwKhmv...,False,https://www.yelp.com/biz/momo-sushi-minneapoli...,246,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 45.00734, 'longitude': -93.24697}","[pickup, delivery]",$$,"{'address1': '1839 Central Ave NE', 'address2'...",16127899190.0,(612) 789-9190,5765.263936
1,AsYyD4Ya27UNqT5iFvCMaw,kataki-minneapolis,Kataki,https://s3-media1.fl.yelpcdn.com/bphoto/UL12kW...,False,https://www.yelp.com/biz/kataki-minneapolis?ad...,49,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.5,"{'latitude': 44.886911, 'longitude': -93.277679}","[pickup, delivery]",$$,"{'address1': '6401 Nicollet Ave', 'address2': ...",16128665034.0,(612) 866-5034,8804.665838
2,mTnoCM3BrLttWb7m9P5SQQ,wakame-sushi-and-asian-bistro-minneapolis,Wakame Sushi & Asian Bistro,https://s3-media3.fl.yelpcdn.com/bphoto/U6IaCo...,False,https://www.yelp.com/biz/wakame-sushi-and-asia...,857,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 44.946845, 'longitude': -93.322218}",[],$$,"{'address1': '3070 Excelsior Blvd', 'address2'...",16128862484.0,(612) 886-2484,3250.890712
3,kcf7Bc1KKk-qoGJ2QIQVvw,billy-sushi-minneapolis,Billy Sushi,https://s3-media3.fl.yelpcdn.com/bphoto/jkj72q...,False,https://www.yelp.com/biz/billy-sushi-minneapol...,259,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 44.984156, 'longitude': -93.26878...",[],$$$,"{'address1': '116 N First Ave', 'address2': No...",,,2675.306395
4,ddpjLv0P6iu7p1dRGCPWWw,sushi-takatsu-minneapolis,Sushi Takatsu,https://s3-media1.fl.yelpcdn.com/bphoto/WVD-r_...,False,https://www.yelp.com/biz/sushi-takatsu-minneap...,147,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 44.9760103, 'longitude': -93.2709...","[pickup, delivery]",$,"{'address1': '733 Marquette Ave', 'address2': ...",16123395981.0,(612) 339-5981,1880.037584


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
411,ndOy72EyaLboBvrQf-qSZA,lunds-and-byerlys-saint-paul,Lunds & Byerlys,https://s3-media2.fl.yelpcdn.com/bphoto/uIKe3R...,False,https://www.yelp.com/biz/lunds-and-byerlys-sai...,55,"[{'alias': 'grocery', 'title': 'Grocery'}]",3.5,"{'latitude': 44.91786128653253, 'longitude': -...",[],$$$,"{'address1': '2170 Ford Pkwy', 'address2': '',...",16516985845,(651) 698-5845,9377.875334
412,XQVIX_M2ZRJido8r-qkiRw,hy-vee-plymouth-2,Hy-Vee,https://s3-media3.fl.yelpcdn.com/bphoto/5OlULn...,False,https://www.yelp.com/biz/hy-vee-plymouth-2?adj...,22,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 45.01966789825101, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '16705 County Rd 24', 'address2':...",17633837060,(763) 383-7060,17016.027174
413,RjHYK_hiEFD44uADIsGWaw,cub-white-bear-lake-white-bear-lake,Cub - White Bear Lake,https://s3-media1.fl.yelpcdn.com/bphoto/TJ8UP5...,False,https://www.yelp.com/biz/cub-white-bear-lake-w...,14,"[{'alias': 'grocery', 'title': 'Grocery'}]",3.0,"{'latitude': 45.038845, 'longitude': -93.020922}",[],$$,"{'address1': '1920 Buerkle Rd', 'address2': ''...",16517777899,(651) 777-7899,22699.77782
414,hmxrb4bK-ommQSDOrWvzwQ,cub-maple-grove-maple-grove,Cub - Maple Grove,https://s3-media3.fl.yelpcdn.com/bphoto/KvAsk7...,False,https://www.yelp.com/biz/cub-maple-grove-maple...,21,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 45.1027486, 'longitude': -93.4482...",[],$$,"{'address1': '8150 Wedgewood Ln N', 'address2'...",17634948364,(763) 494-8364,19656.837594
415,3I5O0GUO57uOCcLKqiaIJQ,ihop-maplewood-2,IHOP,https://s3-media2.fl.yelpcdn.com/bphoto/e0AFyZ...,False,https://www.yelp.com/biz/ihop-maplewood-2?adju...,62,"[{'alias': 'breakfast_brunch', 'title': 'Break...",2.0,"{'latitude': 45.02836, 'longitude': -93.01963}","[pickup, delivery]",$$,"{'address1': '1935 Beam Ave', 'address2': '', ...",16517481700,(651) 748-1700,22406.296989


In [21]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

0

In [22]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0