# Assignment

For this assignment, you will be working with the Yelp API.

As before, you will use the Yelp API to search your favorite city for a cuisine type of your choice.

Extract all of the results from your search and compile them into one dataframe using a for loop as shown in the lesson "Code for Efficient API Extraction"

Save your notebook, commit the change to your repository and submit the repository URL for this assignment.

# Import Libraries

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [3]:
with open('/Users/jonnu/.secret/yelp_api.json') as f:
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [4]:
LOCATION = 'San Antonio, TX'
TERM = 'Tacos'

# Create Results-in-Progress .json File

In [5]:
json_file = "Data/results_in_progress_satx_tacos.json"
json_file

'Data/results_in_progress_satx_tacos.json'

In [6]:
# Code copied from Coding Dojo Learning Platform
# If error occurs and need to restart results .json file

def create_json_file(JSON_FILE,  delete_if_exists=False):

    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)

    ## If it DOES exist:
    if file_exists == True:

        ## Check if user wants to delete if exists
        if delete_if_exists==True:

            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")            


    ## If it does NOT exist:
    else:

        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")

        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)

        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  

In [7]:
create_json_file(json_file,  delete_if_exists=True)

[i] Data/results_in_progress_satx_tacos.json not found. Saving empty list to new file.


# Create Variables for Results and Pages

In [10]:
with open(json_file,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

- 0 previous results found.


155

#  Run For Loop to Retrieve and Save Results
- Account for Yelp Result limit of 1000

In [12]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(json_file, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(json_file,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/155 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


# Final Results as DataFrame

In [13]:
final_df = pd.read_json(json_file)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,zulD0Ns_524wMuCAFlXxRA,tlahco-mexican-kitchen-san-antonio,Tlahco Mexican kitchen,https://s3-media4.fl.yelpcdn.com/bphoto/PAEto8...,False,https://www.yelp.com/biz/tlahco-mexican-kitche...,755,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 29.5006724654481, 'longitude': -9...","[delivery, pickup]",$$,"{'address1': '6702 San Pedro Ave', 'address2':...",12104550135.0,(210) 455-0135,2931.033269
1,fD626-6wEZgfGP2YT3wSIA,taquitos-west-ave-san-antonio,Taquitos West Ave,https://s3-media2.fl.yelpcdn.com/bphoto/yWNgVE...,False,https://www.yelp.com/biz/taquitos-west-ave-san...,755,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 29.48637, 'longitude': -98.52573}",[delivery],$,"{'address1': '2818 West Ave', 'address2': None...",12105259888.0,(210) 525-9888,1118.116301
2,j2Ym1bd-tRyM-G5qXZ1yCw,petes-tako-house-san-antonio,Pete's Tako House,https://s3-media1.fl.yelpcdn.com/bphoto/EzV5R6...,False,https://www.yelp.com/biz/petes-tako-house-san-...,1297,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.5,"{'latitude': 29.4328575134277, 'longitude': -9...","[delivery, pickup]",$,"{'address1': '502 Brooklyn Ave', 'address2': '...",12102242911.0,(210) 224-2911,6094.469626
3,Omclc_9AiOvb2gCEQSHLjg,tacos-vitali-san-antonio-2,Tacos Vitali,https://s3-media1.fl.yelpcdn.com/bphoto/PJq-W1...,False,https://www.yelp.com/biz/tacos-vitali-san-anto...,56,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,"{'latitude': 29.54676444696915, 'longitude': -...",[],$$,"{'address1': '6433 Babcock Rd', 'address2': ''...",,,12020.951105
4,jJQBNqgL4pz4xg4KqZd37w,taqueria-el-trompo-san-antonio,Taqueria El Trompo,https://s3-media3.fl.yelpcdn.com/bphoto/Mehe4p...,False,https://www.yelp.com/biz/taqueria-el-trompo-sa...,230,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 29.5096948824644, 'longitude': -9...",[],$$,"{'address1': '7863 Callaghan Rd', 'address2': ...",12106268032.0,(210) 626-8032,4956.083578


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,C-LHPeo9N6hLY4yBQ4-nUg,chabela-san-antonio,Chabela,https://s3-media4.fl.yelpcdn.com/bphoto/_W_Skb...,False,https://www.yelp.com/biz/chabela-san-antonio?a...,2,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.0,"{'latitude': 29.488223, 'longitude': -98.576172}","[delivery, pickup]",,"{'address1': '6031 Callaghan Rd', 'address2': ...",12103682226.0,(210) 368-2226,5794.377903
996,Ij9TVorQnOuLwn5ZJo7cCg,taco-palenque-new-braunfels-new-braunfels,Taco Palenque - New Braunfels,https://s3-media2.fl.yelpcdn.com/bphoto/E5lklB...,False,https://www.yelp.com/biz/taco-palenque-new-bra...,129,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,"{'latitude': 29.70091, 'longitude': -98.09207}",[delivery],$,"{'address1': '1270 Ih 35 S', 'address2': '', '...",18302140561.0,(830) 214-0561,47904.773354
997,QJLHaDnZoeHO1-q0jy7cfg,teka-molino-mobile-san-antonio,Teka Molino Mobile,https://s3-media3.fl.yelpcdn.com/bphoto/AoC-Fl...,False,https://www.yelp.com/biz/teka-molino-mobile-sa...,9,"[{'alias': 'foodtrucks', 'title': 'Food Trucks'}]",4.0,"{'latitude': 29.4265551, 'longitude': -98.4937...",[],$$,"{'address1': '211 N Main', 'address2': '', 'ad...",,,6319.650794
998,ghtR1FxRDdbcxJhTmFMh4g,sandia-mexican-restaurant-san-antonio,Sandia Mexican Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/ETLvsA...,False,https://www.yelp.com/biz/sandia-mexican-restau...,77,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.5,"{'latitude': 29.4345272, 'longitude': -98.5996...",[delivery],$,"{'address1': '106 S Callaghan Rd', 'address2':...",12104323400.0,(210) 432-3400,9432.725272
999,bC2wMY9dYAFEqO5i1_4XYA,lolas-mexican-food-seguin-2,Lola's Mexican Food,https://s3-media4.fl.yelpcdn.com/bphoto/qBHluc...,False,https://www.yelp.com/biz/lolas-mexican-food-se...,42,"[{'alias': 'mexican', 'title': 'Mexican'}]",5.0,"{'latitude': 29.6050350662487, 'longitude': -9...",[],$,"{'address1': '3470 E US Hwy 90', 'address2': '...",18303798812.0,(830) 379-8812,59981.413229


## Check for Duplicates
- Need to use subset='id' since some Yelp dataframe columns contain lists

In [14]:
final_df.duplicated(subset='id').sum()

3

In [15]:
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

# Save as .csv.gz File
- Compressed to account for data upload limits

In [16]:
final_df.to_csv('Data/final_results_SATX_tacos.csv.gz', compression='gzip',index=False)

In [None]:
pd.read_csv('Data/final_results_SATX_tacos.csv.gz', )