# James Jones
### 11-16-2022

In [8]:
# Import libraies necessary for efficient API calls
import numpy as np
import pandas as pd
import seaborn as sns
import os, json, math, time
from yelpapi import YelpAPI
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm_notebook

In [4]:
# Load API credentials
with open('/Users/jamesjones/.secret/yelp_api.json') as f:
    login = json.load(f)

# Instantiate YelpAPI
yelp_api = YelpAPI(login['api-key'], timeout_s = 5.0)    

In [5]:
# Decide what to search. I will search hot chicken in nashville
LOCATION = 'Nashvile, TN'
TERM = "Hot Chicken"

In [6]:
# Specify a json filename to create 
JSON_FILE = "Data/results_in_progress_Nashville_Hot.json"
JSON_FILE

'Data/results_in_progress_Nashville_Hot.json'

In [7]:
# Check if file exists, and create it if not
    # This is for practice, because this file does not already exist

file_exists = os.path.isfile(JSON_FILE)

if file_exists == False:
    folder = os.path.dirname(JSON_FILE)
    
    if len(folder) > 0:
        os.makedirs(folder, exist_ok = True)
    
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    with open(JSON_FILE, 'w') as f:
        json.dump([], f)
else:
    print(f'[i] {JSON_FILE} already exists.')

[i] Data/results_in_progress_Nashville_Hot.json not found. Saving empty list to file.


In [13]:
# Use our yelp_api variable's search_query method to perform our API call
    # Currently 0 as no call has been made yet

with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [15]:
# What is the total number of results?
total_results = results['total']
total_results

923

#### Yelp API will let us make 1000 calls at a time with the free version of their API, therefore we should be able to gather all of these results

In [17]:
# How many results are there per page? (will be 20 by stabdard pagination)
results_per_page = len(results['businesses'])
results_per_page

20

In [19]:
# Now, let's use 'math.ceil' to find, and round up our total number of pages
n_pages = math.ceil((results['total'] - n_results) / results_per_page)
n_pages

47

In [22]:
# Now, let's create a loop to gather all of the information connected to:
    # Hot Chicken in Nashville, TN

for i in tqdm_notebook(range(1, n_pages)):
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
        
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
        
    previous_results.extend(results['businesses'])
        
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results, f)
    
    time.sleep(0.2) # Adds a 200ms stop so we don't bog down the sever as much

  0%|          | 0/46 [00:00<?, ?it/s]

In [23]:
# Convert .json to a DF for visualization
chicken_df = pd.read_json(JSON_FILE)
display(chicken_df.head(4), chicken_df.tail(4))

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,AVf4RO3bh2AAoZeCiPTQNg,hattie-bs-hot-chicken-nashville-melrose-nashvi...,Hattie B's Hot Chicken - Nashville Melrose,https://s3-media1.fl.yelpcdn.com/bphoto/YqfYFz...,False,https://www.yelp.com/biz/hattie-bs-hot-chicken...,970,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",4.5,"{'latitude': 36.1290083208725, 'longitude': -8...",[delivery],$$,"{'address1': '2222 8th Ave S', 'address2': '',...",16157224700,(615) 722-4700,983.70979
1,GXFMD0Z4jEVZBCsbPf4CTQ,hattie-b-s-hot-chicken-nashville-midtown-nashv...,Hattie B’s Hot Chicken - Nashville - Midtown,https://s3-media1.fl.yelpcdn.com/bphoto/0KANbu...,False,https://www.yelp.com/biz/hattie-b-s-hot-chicke...,6687,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",4.5,"{'latitude': 36.1513871522975, 'longitude': -8...",[delivery],$$,"{'address1': '112 19th Ave S', 'address2': '',...",16158025700,(615) 802-5700,2429.649393
2,xB3MxRBF6l8rd9xxJVnLhw,hattie-b-s-hot-chicken-nashville-lower-broadwa...,Hattie B’s Hot Chicken - Nashville - Lower Bro...,https://s3-media1.fl.yelpcdn.com/bphoto/2Fdane...,False,https://www.yelp.com/biz/hattie-b-s-hot-chicke...,405,"[{'alias': 'chickenshop', 'title': 'Chicken Sh...",4.5,"{'latitude': 36.160336468836555, 'longitude': ...",[],$$,"{'address1': '5069 Broadway Pl', 'address2': '...",16155768700,(615) 576-8700,2541.584388
3,16tjKOvMw_nOgNViq2LwOQ,hattie-bs-hot-chicken-nashville-west-nashville-2,Hattie B's Hot Chicken - Nashville West,https://s3-media2.fl.yelpcdn.com/bphoto/rfNW9g...,False,https://www.yelp.com/biz/hattie-bs-hot-chicken...,1446,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",4.5,"{'latitude': 36.15157, 'longitude': -86.85082}",[delivery],$$,"{'address1': '5209 Charlotte Ave', 'address2':...",16155763900,(615) 576-3900,6932.44368


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
916,3_WGRjRrnZX2b4ZVpjmL6w,arnolds-country-kitchen-nashville,Arnold's Country Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/bmvzep...,False,https://www.yelp.com/biz/arnolds-country-kitch...,1168,"[{'alias': 'southern', 'title': 'Southern'}, {...",4.5,"{'latitude': 36.1514, 'longitude': -86.77958}","[delivery, pickup]",$$,"{'address1': '605 8th Ave S', 'address2': '', ...",16152564455,(615) 256-4455,1573.667137
917,6Pu5MKIWbn6FO-oebSSrYQ,the-southern-steak-and-oyster-nashville-2,The Southern Steak & Oyster,https://s3-media3.fl.yelpcdn.com/bphoto/gd7QME...,False,https://www.yelp.com/biz/the-southern-steak-an...,2429,"[{'alias': 'southern', 'title': 'Southern'}, {...",4.0,"{'latitude': 36.1598191068257, 'longitude': -8...",[delivery],$$$,"{'address1': '150 3rd Ave S', 'address2': 'Ste...",16157241762,(615) 724-1762,2460.859842
918,zwGzwkVeYXE-tRisb8if7A,smokin-thighs-wedgewood-nashville-2,Smokin Thighs - Wedgewood,https://s3-media3.fl.yelpcdn.com/bphoto/ewWJJ-...,False,https://www.yelp.com/biz/smokin-thighs-wedgewo...,576,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",4.0,"{'latitude': 36.133133, 'longitude': -86.769974}","[delivery, pickup]",$$,"{'address1': '611 Wedgewood Ave', 'address2': ...",16156012582,(615) 601-2582,709.31739
919,GST3wg-wej15vHeCvaXE6w,acme-feed-and-seed-nashville,Acme Feed & Seed,https://s3-media2.fl.yelpcdn.com/bphoto/1SdisP...,False,https://www.yelp.com/biz/acme-feed-and-seed-na...,1849,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,"{'latitude': 36.16187, 'longitude': -86.77432}",[delivery],$$,"{'address1': '101 Broadway', 'address2': None,...",16159150888,(615) 915-0888,2705.629935


In [24]:
# Check these results for duplicate id's
chicken_df.duplicated(subset = 'id').sum()

900

### This seems exorbitant, but, having lived in Nashville, I am inclined to believe that this is correct (or mostly correct). If this was the case for Pizza in New York, I would have more reservations

In [25]:
# Drop duplicate id's and confirm that there are no more duplicates
chicken_df = chicken_df.drop_duplicates(subset = 'id')
chicken_df.duplicated(subset = 'id').sum()

0

In [26]:
# Now save this final df to the file we previously created
    # This will be a small file (since we dropped so many duplicates)
    # We will, however, still compress this as standard of practice

chicken_df.to_csv('Data/final_results_Nashville_Hot.csv.gz',
                  compression = 'gzip', index = False)