In [1]:
#import necessary dependencies
import pandas as pd
import requests
import json
from pprint import pprint
from urllib.error import HTTPError
from urllib.parse import quote
from urllib.parse import urlencode
import sys
import time

In [2]:
#determine global variables
api_key = "YOUR CODE HERE"
api_host = 'https://api.yelp.com'
match_path = '/v3/businesses/matches'
detail_path = '/v3/businesses/'
detail_list = []

In [3]:
#use a function to pull the yelp ids
def request(host, path, api_key, url_params=None):
    """Given your API_KEY, send a GET request to the API.
    Args:
        host (str): The domain host of the API.
        path (str): The path of the API after the domain.
        API_KEY (str): Your API Key.
        url_params (dict): An optional set of query parameters in the request.
    Returns:
        dict: The JSON response from the request.
    Raises:
        HTTPError: An error occurs from the HTTP request.
    """
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }

    response = requests.request('GET', url, headers=headers, params=url_params)

    return response.json()

In [4]:
#use a function to get the business details
def get_business(api_key, business_id):
    """Query the Business API by a business ID.
    Args:
        business_id (str): The ID of the business to query.
    Returns:
        dict: The JSON response from the request.
    """
    business_path = detail_path + business_id

    return request(api_host, business_path, api_key)

In [5]:
#import the csv you plan to search with
#MAKE SURE TO CHANGE THIS TO THE CSV YOU NEED TO READ
#file name to use: parsed/#la_restaurants.csv (replace # with the file number you are calling)

r_target = pd.read_csv('YOUR FILE HERE')



In [6]:
total_calls = int(r_target['program_name'].count())
print(total_calls)

2496


In [7]:
#iterate over the DataFrame to call the API for each business in the csv
for i in r_target.index:
    
    #display which pull the loop is on in order to track progress
    pull = i+1
    completed = round((pull/total_calls)*100,1)
    sys.stdout.write(f"\rCalling {pull} of {total_calls} -- {completed}% complete")
    
    
    #pull the search criteria for the API
    name = r_target.loc[i,'program_name']
    add1 = r_target.loc[i,'facility_address']
    city = r_target.loc[i,'facility_city']
    state = "CA"
    country = "US"
    url_params = {'name': name,
                 'address1': add1,
                 'city': city,
                 'state': state,
                 'country': country}
    
    #call the api to find the business
    get_id = request(api_host, match_path, api_key, url_params)
    
    #extract the yelp id for the business, if there is one
    try:
        if len(get_id['businesses']) > 0:
            y_id = get_id['businesses'][0]['id']
        else:
            y_id = "none"
    except Exception as e: 
            y_id = "key_error"
        
       
    #if the api found the business, call the api to pull the necessary details
    #use the restaurant address as a unqiue identifier so the resulting DataFrame can be merged
    if y_id != "none" and y_id !="key_error":
        detail = get_business(api_key, y_id)
        if 'price' in detail:
            price = detail['price']
        else:
            price = "blank"
        if 'rating' in detail:
            rating = detail['rating']
        else:
            rating = "blank"
        if 'review_count' in detail:
            review_count = detail['review_count']
        else:
            review_count = 'blank'
            
        r_details = {'facility_address': add1,
                    'yelp_id': y_id,
                    'price': price,
                    'rating': rating,
                    'review_count': review_count,
                    'food_type': [d['title'] for d in detail['categories']],
                    'latitude': detail['coordinates']['latitude'],
                    'longitude': detail['coordinates']['longitude'],
                    'in_business': detail['is_closed']}
        detail_list.append(r_details)
        
        #clear the previous status update so the new one may be displayed
        sys.stdout.flush()

Calling 2500 of 2496 -- 100.2% complete

In [8]:
#store the yelp id query results in a dataframe
details_pd = pd.DataFrame(detail_list)
details_pd.head()

Unnamed: 0,facility_address,food_type,in_business,latitude,longitude,price,rating,review_count,yelp_id
0,10000 W PICO BLVD,[Country Clubs],False,34.051907,-118.40699,blank,4.5,17,_9cy8o3ZaRcLSrgv2BZjuA
1,1760 N HILLHURST AVE,"[Breakfast & Brunch, Coffee & Tea, American (N...",False,34.103134,-118.287297,$$,4.0,2433,eoOJbi94rj9277dG7WIv0A
2,1750 N HILLHURST AVE,"[Coffee & Tea, Tapas/Small Plates, Cocktail Bars]",False,34.102836,-118.287373,$$,4.0,260,iYLyVP1uRs56wZ3FK_lCdw
3,662 N SEPULVEDA BLVD,"[American (New), Seafood, Cocktail Bars]",False,34.07731,-118.468819,$$$,4.0,171,7s0s_rBFKPxt2B9xBMmQsw
4,1700 HILLHURST AVE,"[Mediterranean, American (New), Bars]",False,34.101936,-118.287244,$$,3.5,789,_LJ2Ub-G-UBQI9pHQvE-bA


In [9]:
#merge the DataFrames
yelp_scrub = r_target.merge(details_pd, on='facility_address')

In [10]:
#for the love of all things good, save your calls to a csv!
#MAKE SURE TO CHANGE THE NUMBER ON THE END TO THE CORRESPONDING NUMBER OF THE CSV YOU INITIALLY READ
#file name to use: "called/la_restaurants_yelped#.csv" (replace # with the file number you are calling)
yelp_scrub.to_csv("YOUR FILE HERE")