In [1]:
import json
with open('/Users/marcolaureano2/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [2]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [3]:
pip install yelpapi

Note: you may need to restart the kernel to use updated packages.


In [4]:
from yelpapi import YelpAPI
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x19b59c610>

In [5]:
# define search
LOCATION = 'LA, CA'
TERM = 'Taco'

In [6]:
# create results-in-progress JSON file
JSON_LA_TACOS = 'Data/results_in_progress_LA_tacos.json'
JSON_LA_TACOS

'Data/results_in_progress_LA_tacos.json'

In [13]:
def create_json_file(JSON_LA_TACOS,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_LA_TACOS)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_LA_TACOS} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_LA_TACOS)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_LA_TACOS,delete_if_exists=False)
        else:
            print(f"[i] {JSON_LA_TACOS} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_LA_TACOS} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_LA_TACOS)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_LA_TACOS,'w') as f:
            json.dump([],f)  


In [14]:
# Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_LA_TACOS, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_LA_TACOS,'r') as f:
    previous_results = json.load(f)
    
# set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
# How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages


[!] Data/results_in_progress_LA_tacos.json already exists. Deleting previous file...
[i] Data/results_in_progress_LA_tacos.json not found. Saving empty list to new file.
- 0 previous results found.


615

In [15]:
# use yelp_api variable's search_query method to perform API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM, offset=n_results)
results.keys()


dict_keys(['businesses', 'total', 'region'])

In [16]:
# How many results
total_results = results['total']
total_results

12300

In [17]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [18]:
# import additional packages for controlling loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

615

In [19]:
# import progressbar from time
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 

  0%|          | 0/615 [00:00<?, ?it/s]

In [20]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_LA_TACOS, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_LA_TACOS,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/615 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [21]:
# Convert json to df
results_df = pd.read_json(JSON_LA_TACOS)
display(results_df.head(), results_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,QKovUc1TmSNtZh0j5ZEagw,leos-tacos-truck-los-angeles,Leo's Tacos Truck,https://s3-media1.fl.yelpcdn.com/bphoto/ca0rQt...,False,https://www.yelp.com/biz/leos-tacos-truck-los-...,2332,"[{'alias': 'foodtrucks', 'title': 'Food Trucks...",4.5,"{'latitude': 34.046438, 'longitude': -118.345718}",[],$,"{'address1': '1515 S La Brea Ave', 'address2':...",13233462001,(323) 346-2001,2804.549875
1,tpg43fTxsQ92XkRngtlK3Q,villas-tacos-los-angeles,Villa's Tacos,https://s3-media3.fl.yelpcdn.com/bphoto/7fDXSG...,False,https://www.yelp.com/biz/villas-tacos-los-ange...,403,"[{'alias': 'tacos', 'title': 'Tacos'}]",4.5,"{'latitude': 34.108734, 'longitude': -118.196636}",[],$$,"{'address1': '5455 N Figueroa St', 'address2':...",18187418011,(818) 741-8011,12603.715792
2,BDRVlHnK4l0T0ANb7M-Eqg,guisados-los-angeles-3,Guisados,https://s3-media1.fl.yelpcdn.com/bphoto/JqGxcZ...,False,https://www.yelp.com/biz/guisados-los-angeles-...,2429,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 34.0702395145121, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '1261 W Sunset Blvd', 'address2':...",12132507600,(213) 250-7600,6604.197847
3,TZFTVm9WbsWILRdY98SYtA,tacos-el-compita-los-angeles-4,Tacos El Compita,https://s3-media1.fl.yelpcdn.com/bphoto/0KHZ-N...,False,https://www.yelp.com/biz/tacos-el-compita-los-...,387,"[{'alias': 'streetvendors', 'title': 'Street V...",4.5,"{'latitude': 34.0485400612365, 'longitude': -1...","[delivery, pickup]",$,"{'address1': '4477 W Pico Blvd', 'address2': '...",13239350490,(323) 935-0490,1788.74597
4,FvG80RRM9jYBK8T-NL_-sg,papi-tacos-and-churros-los-angeles-2,Papi Tacos & Churros,https://s3-media3.fl.yelpcdn.com/bphoto/ixVx5j...,False,https://www.yelp.com/biz/papi-tacos-and-churro...,306,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,"{'latitude': 34.0951, 'longitude': -118.32645}",[],$$,"{'address1': '1320 Vine St', 'address2': None,...",13235369800,(323) 536-9800,3763.355024


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,SsOlRN_7cKsHlek_RyohvQ,la-tostaderia-los-angeles,La Tostaderia,https://s3-media4.fl.yelpcdn.com/bphoto/D6tHvi...,False,https://www.yelp.com/biz/la-tostaderia-los-ang...,452,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 34.050713, 'longitude': -118.248882}","[pickup, delivery]",$$,"{'address1': '317 S Broadway', 'address2': '',...",12136283430.0,(213) 628-3430,6787.457874
996,PrvGlIM0L99VY-AgkFJRMw,juana-la-cubana-los-angeles,Juana La Cubana,https://s3-media4.fl.yelpcdn.com/bphoto/LvEJto...,False,https://www.yelp.com/biz/juana-la-cubana-los-a...,19,"[{'alias': 'mexican', 'title': 'Mexican'}, {'a...",4.0,"{'latitude': 34.0310740470886, 'longitude': -1...",[],$,"{'address1': '1946 S Hill St', 'address2': '',...",,,6018.341894
997,ODtZklkDPudH3_Zze5wQ8w,tacos-el-guero-arcadia-4,Tacos El Guero,https://s3-media3.fl.yelpcdn.com/bphoto/OxU0ZQ...,False,https://www.yelp.com/biz/tacos-el-guero-arcadi...,6,"[{'alias': 'tacos', 'title': 'Tacos'}]",5.0,"{'latitude': 34.124260743233584, 'longitude': ...",[],,"{'address1': '729 W Naomi Ave', 'address2': No...",13237403541.0,(323) 740-3541,25385.571318
998,tqjJYK3KyBjHRDsMT9kCpw,oi-asian-fusion-los-angeles-4,Oi Asian Fusion,https://s3-media2.fl.yelpcdn.com/bphoto/HVIgNp...,False,https://www.yelp.com/biz/oi-asian-fusion-los-a...,634,"[{'alias': 'filipino', 'title': 'Filipino'}, {...",4.5,"{'latitude': 34.1015109972374, 'longitude': -1...","[pickup, delivery]",$,"{'address1': '4734 Hollywood Blvd', 'address2'...",12134581945.0,(213) 458-1945,5148.452044
999,ZzUhGHxbs506GVy-WLb4mw,la-morenita-restaurant-los-angeles,La Morenita Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/M32SBA...,False,https://www.yelp.com/biz/la-morenita-restauran...,103,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,"{'latitude': 34.07328, 'longitude': -118.2149}","[pickup, delivery]",$,"{'address1': '2620 N Broadway', 'address2': ''...",13232211687.0,(323) 221-1687,9897.004117


In [23]:
# Check for duplicates
results_df.duplicated(subset='id').sum()

154

In [24]:
# drop duplicates
results_df = results_df.drop_duplicates(subset='id')
results_df.duplicated(subset='id').sum()

0

In [25]:
# save results_df to a csv
results_df.to_csv('Data/final_results_LA_tacos.csv.gz',
compression='gzip', index=False)