In [2]:
import pandas as pd
import requests
import numpy as np
import os

In [95]:
address = pd.read_csv('../data/name_address.csv')
concat_address = address.loc[:,'facility_address'] +', ' + address.loc[:,'facility_city'] +', '+ 'CA' +' '+address.loc[:,'facility_zip'].astype(str) 
address = pd.concat([address, concat_address.rename('concat_address')], axis = 1)

In [174]:
address.head()

Unnamed: 0,facility_name,program_name,facility_address,facility_city,facility_zip,concat_address
0,GEN KOREAN BBQ,GEN KOREAN BBQ,24301 CRENSHAW BLVD,TORRANCE,90505,"24301 CRENSHAW BLVD, TORRANCE, CA 90505"
1,DENNY'S,DENNY'S,15553 E VALLEY BLVD,LA PUENTE,91744,"15553 E VALLEY BLVD, LA PUENTE, CA 91744"
2,TEA LAB,TEA LAB,18912 E GALE AVE STE #A,ROWLAND HEIGHTS,91748,"18912 E GALE AVE STE #A, ROWLAND HEIGHTS, CA 9..."
3,EL BOTANITAS,EL BOTANITAS,3614 MLK JR BLVD,LYNWOOD,90262,"3614 MLK JR BLVD, LYNWOOD, CA 90262"
4,POLLY'S PIES,POLLY'S PIES,17198 S NORWALK BLVD,CERRITOS,90703,"17198 S NORWALK BLVD, CERRITOS, CA 90703"


In [141]:
# API AND HEADER CODE
api_key = ''
headers = {'Authorization': 'Bearer %s' % api_key}


In [236]:
# Function to extract Yelp ID from Yelp Business Match and insert into Yelp Business Details API

def get_business_details(name, address_1, city, state, country):
    ''' 
    Obtains ratings, review count, latitude/longitude, price, and restaurant categories 
    from Yelp Business Details Endpoint using the Yelp Business ID obtained from 
    Yelp Business Matches Endpoint
    
    name: name of business - string input
    address_1: Street Address e.g. 2436 Columbine Circle - string input
    city: city in abbreviated form e.g. CA - string input
    country: country - string input
    
    Yelp Only Allows for 5000 requests per day. This code calls upon the YELP API twice.
    '''
    
    # Get Yelp ID from Business Matches Endpoint
    url_bm = 'https://api.yelp.com/v3/businesses/matches'
    
    # Parameters for Business Match in Dictionary Format
    parameters_bm = {\
                 'name': name,\
                 'address1' : address_1,\
                 'city': city,\
                 'state': state,\
                 'country': country
             }
    
    # Call on Yelp Business Match API
    r_bm = requests.get(url = url_bm, params = parameters_bm, headers = headers)
    json_data_bm = r_bm.json()
    
    # Extract Yelp ID. 
    
    if (json_data_bm.get('businesses')==[]) == True:
        yelp_id = None
    else:
        yelp_id = json_data_bm['businesses'][0]['id']

    
    # Call on Yelp Business Details API to get necessary info
    url_bd = 'https://api.yelp.com/v3/businesses/{}'.format(yelp_id)
    
    
    # Yelp ID was not obtained, output would display None. If it has a yelp ID, fetch data from Yelp API
    if yelp_id == None:
        output = {
            'name': None,
            'address': None,
            'review_count': None,
            'rating' : None,
            'price': None,
            'categories': None,
            'latitude': None,
            'longitude': None,
            'yelp_id': None,
            'hours': None
    
}
    else:
        
        # Request info from Yelp Business Details Endpoint
        r_bd = requests.get(url = url_bd, headers = headers)
        json_data_bd = r_bd.json()
        
        answer = json_data_bd
        categories = [sub['title'] for sub in answer.get('categories')]
        output = {
            'name': answer.get('name'),
            'address': ', '.join([str(elem)for elem in answer.get('location').get('display_address')]),
            'review_count': answer.get('review_count'),
            'rating' : answer.get('rating'),
            'price': answer.get('price'),
            'categories': ', '.join([str(elem) for elem in [sub['title'] for sub in answer.get('categories')]]),
            'latitude': answer.get('coordinates').get('latitude'),
            'longitude': answer.get('coordinates').get('longitude'),
            'yelp_id': answer.get('id')
    
}
    output['name_input'] = name
    output['address_input'] = address_1
    output['city_input'] = city
    
    return output

In [232]:
# Testing if code works
get_business_details(address.iloc[2,1], address.iloc[2,2], address.iloc[2,3], 'CA', 'US')

{'name': "Denny's",
 'address': '15553 Valley Blvd, La Puente, CA 91744',
 'review_count': 92,
 'rating': 2.5,
 'price': '$',
 'categories': 'Diners, Breakfast & Brunch, American (Traditional)',
 'latitude': 34.0207864,
 'longitude': -117.9586692,
 'yelp_id': 'U8Gfec-186rQ93-3jbLdCQ',
 'name_input': "DENNY'S",
 'address_input': '15553 E VALLEY BLVD',
 'city_input': 'LA PUENTE'}

In [238]:
# Create For Loop To Iterate Over 

# Create a list out of the columns of interest
# Since there is a daily limit of 5000 calls per day will. Need to slice the list in appropriate increments

res_name = (address['program_name']).tolist()[100:501]
res_address = (address['facility_address']).tolist()[100:501]
res_city = (address['facility_city'].tolist())[100:501]

# Output File Name
output_filename = '../data/yelp_data_100to501.csv'

results = []

for name, address_1, city in zip(res_name, res_address, res_city):
    bd_results = get_business_details(name, address_1, city, 'CA', 'US')
    results.append(bd_results)
    
    if len(results) % 500 == 0:
        pd.DataFrame(results).to_csv("{}_bak".format(output_filename))
        
# Convert list result to dataframe
pd.DataFrame(results).to_csv(output_filename, encoding = 'utf8')


In [None]:
## Ignore Beyond This Point!

In [226]:
res_name = (address['program_name']).tolist()
res_address = (address['facility_address']).tolist()
res_city = (address['facility_city'].tolist())
res_address

['24301 CRENSHAW BLVD',
 '15553 E VALLEY BLVD',
 '18912 E GALE AVE STE #A',
 '3614 MLK JR BLVD',
 '17198 S NORWALK BLVD',
 '1320 N VINE ST',
 '7445 SUNSET BLVD # B',
 '1250 E HARVARD RD',
 '2114 S SAWTELLE BLVD # 211',
 '9811 SANTA MONICA BLVD',
 '18427 S AVALON BLVD',
 '307 LINCOLN BLVD',
 '305 E 8TH ST 103',
 '618 SHOPPERS LN',
 'OLVERA ST E17',
 '1460 3RD ST',
 '20311 VALLEY BLVD E',
 '12440 E IMPERIAL HWY',
 '1133 GLENOAKS BLVD #A',
 '9727 CULVER BLVD',
 '2350 E 223RD ST',
 '700 S SANTA FE AVE',
 '1802 E HUNTINGTON DR STE #B',
 '134 S JAPANESE VILLAGE PLZ',
 '3321 W BURBANK BLVD',
 '738 ROSE AVE',
 '1455 W 3RD ST',
 '1555 W SEPULVEDA BLVD',
 '2881 GAGE AVE',
 '3020 W MAIN ST',
 '13720 E RAMONA BLVD',
 '2615 E CARSON ST',
 '22300 S AVALON CIR',
 '4315 N PECK RD',
 '23705 HAWTHORNE BLVD',
 '5523 E SOUTH ST',
 '6333 W 3RD ST 336',
 '1201 W VENICE BLVD',
 '317 S BROADWAY ST 101E1',
 'OLVERA ST 26',
 '21812 VENTURA BLVD',
 '970 N WESTERN AVE',
 '1001 N S F BLVD UNIT 110',
 '15330 AMAR R

In [204]:
# Business Match Parameters to get Yelp ID 
url_bm = 'https://api.yelp.com/v3/businesses/matches'

parameters_bm = {\
                 'name': address.iloc[1,1],\
                 'address1' : address.iloc[1,2],\
                 'city': address.iloc[1,3],\
                 'state': 'CA',\
                 'country': 'US'
             }

In [205]:
# Use Requests.get and convert to JSON from Yelp Business Match Endpoint
r_bm = requests.get(url = url_bm, params = parameters_bm, headers = headers)
json_data_bm = r_bm.json()

In [220]:
json_data_bm


{'businesses': [{'id': 'U8Gfec-186rQ93-3jbLdCQ',
   'alias': 'dennys-la-puente',
   'name': "Denny's",
   'coordinates': {'latitude': 34.0207864, 'longitude': -117.9586692},
   'location': {'address1': '15553 Valley Blvd',
    'address2': '',
    'address3': '',
    'city': 'La Puente',
    'zip_code': '91744',
    'country': 'US',
    'state': 'CA',
    'display_address': ['15553 Valley Blvd', 'La Puente, CA 91744']},
   'phone': '+16269683930',
   'display_phone': '(626) 968-3930'}]}

In [219]:
json_data_bm['businesses']

[{'id': 'U8Gfec-186rQ93-3jbLdCQ',
  'alias': 'dennys-la-puente',
  'name': "Denny's",
  'coordinates': {'latitude': 34.0207864, 'longitude': -117.9586692},
  'location': {'address1': '15553 Valley Blvd',
   'address2': '',
   'address3': '',
   'city': 'La Puente',
   'zip_code': '91744',
   'country': 'US',
   'state': 'CA',
   'display_address': ['15553 Valley Blvd', 'La Puente, CA 91744']},
  'phone': '+16269683930',
  'display_phone': '(626) 968-3930'}]

In [152]:
# Extract Yelp ID and append yelp ID to addresses
ans_bm = json_data_bm['businesses'][0]
yelp_id = ans_bm.get('id')

In [153]:
#Business Details Parameters to get rating, review count, price, categories, latitude, and longitude
url_bd = 'https://api.yelp.com/v3/businesses/{}'.format(yelp_id)

In [154]:
# Request from Yelp Business Details Endpoint
r_bd = requests.get(url = url_bd, headers = headers)
json_data_bd = r_bd.json()

In [191]:
json_data_bm['businesses'] == []


True

In [172]:
# Business Details Output
answer = json_data_bd
categories = [sub['title'] for sub in answer.get('categories')]
output = {
    'name': answer.get('name'),
    'address': ', '.join([str(elem)for elem in answer.get('location').get('display_address')]),
    'review_count': answer.get('review_count'),
    'rating' : answer.get('rating'),
    'price': answer.get('price'),
    'categories': ', '.join([str(elem) for elem in [sub['title'] for sub in answer.get('categories')]]),
    'latitude': answer.get('coordinates').get('latitude'),
    'longitude': answer.get('coordinates').get('longitude'),
    'yelp_id': answer.get('id'),
    'hours': answer.get('hours')[0].get('hours_type')
    
}

In [173]:
output

{'name': "Denny's",
 'address': '15553 Valley Blvd, La Puente, CA 91744',
 'review_count': 92,
 'rating': 2.5,
 'price': '$',
 'categories': 'Diners, Breakfast & Brunch, American (Traditional)',
 'latitude': 34.0207864,
 'longitude': -117.9586692,
 'yelp_id': 'U8Gfec-186rQ93-3jbLdCQ',
 'hours': 'REGULAR'}