In [1]:
import pandas as pd
import requests
import numpy as np
import os

In [2]:
address = pd.read_csv('../data/name_address.csv')
concat_address = address.loc[:,'facility_address'] +', ' + address.loc[:,'facility_city'] +', '+ 'CA' +' '+address.loc[:,'facility_zip'].astype(str) 
address = pd.concat([address, concat_address.rename('concat_address')], axis = 1)

In [3]:
address.head()

Unnamed: 0,facility_name,program_name,facility_address,facility_city,facility_zip,concat_address
0,GEN KOREAN BBQ,GEN KOREAN BBQ,24301 CRENSHAW BLVD,TORRANCE,90505,"24301 CRENSHAW BLVD, TORRANCE, CA 90505"
1,DENNY'S,DENNY'S,15553 E VALLEY BLVD,LA PUENTE,91744,"15553 E VALLEY BLVD, LA PUENTE, CA 91744"
2,TEA LAB,TEA LAB,18912 E GALE AVE STE #A,ROWLAND HEIGHTS,91748,"18912 E GALE AVE STE #A, ROWLAND HEIGHTS, CA 9..."
3,EL BOTANITAS,EL BOTANITAS,3614 MLK JR BLVD,LYNWOOD,90262,"3614 MLK JR BLVD, LYNWOOD, CA 90262"
4,POLLY'S PIES,POLLY'S PIES,17198 S NORWALK BLVD,CERRITOS,90703,"17198 S NORWALK BLVD, CERRITOS, CA 90703"


In [11]:
# API AND HEADER CODE
api_key = ''
headers = {'Authorization': 'Bearer %s' % api_key}


In [12]:
# Function to extract Yelp ID from Yelp Business Match and insert into Yelp Business Details API

def get_business_details(name, address_1, city, state, country):
    ''' 
    Obtains ratings, review count, latitude/longitude, price, and restaurant categories 
    from Yelp Business Details Endpoint using the Yelp Business ID obtained from 
    Yelp Business Matches Endpoint
    
    name: name of business - string input
    address_1: Street Address e.g. 2436 Columbine Circle - string input
    city: city in abbreviated form e.g. CA - string input
    country: country - string input
    
    Yelp Only Allows for 5000 requests per day. This code calls upon the YELP API twice.
    '''
    
    # Get Yelp ID from Business Matches Endpoint
    url_bm = 'https://api.yelp.com/v3/businesses/matches'
    
    # Parameters for Business Match in Dictionary Format
    parameters_bm = {\
                 'name': name,\
                 'address1' : address_1,\
                 'city': city,\
                 'state': state,\
                 'country': country
             }
    
    # Call on Yelp Business Match API
    r_bm = requests.get(url = url_bm, params = parameters_bm, headers = headers)
    json_data_bm = r_bm.json()
    
    # Extract Yelp ID. 
    
    if 'businesses' in json_data_bm.keys():
    
        if json_data_bm.get('businesses')==[]:
            yelp_id = None
        else:
            yelp_id = json_data_bm['businesses'][0]['id']
    else:
        yelp_id = None
        
        #try except 

    
    # Call on Yelp Business Details API to get necessary info
    url_bd = 'https://api.yelp.com/v3/businesses/{}'.format(yelp_id)
    
    
    # Yelp ID was not obtained, output would display None. If it has a yelp ID, fetch data from Yelp API
    if yelp_id == None:
        output = {
            'name': None,
            'address': None,
            'review_count': None,
            'rating' : None,
            'price': None,
            'categories': None,
            'latitude': None,
            'longitude': None,
            'yelp_id': None,
            'hours': None
    
}
    else:
        
        # Request info from Yelp Business Details Endpoint
        r_bd = requests.get(url = url_bd, headers = headers)
        json_data_bd = r_bd.json()
        
        answer = json_data_bd
        
        if 'name' in answer.keys():
            output =  {'name': answer.get('name')}
        else:
            output = {'name': None}
            
        if 'address' in answer.keys(): 
            output['address'] =  ', '.join([str(elem)for elem in answer.get('location').get('display_address')])
        else:
            output['address'] = None
        
        if 'categories' in answer.keys(): 
            categories = [sub['title'] for sub in answer.get('categories')]
            output['categories'] =  ', '.join([str(elem) for elem in [sub['title'] for sub in answer.get('categories')]])
        else:
            output['categories'] = None
        
        if 'review_count' in answer.keys():
            output['review_count'] = answer.get('review_count')
        else:
            output['review_count'] = None
        
        if 'rating' in answer.keys():
            output['rating'] = answer.get('rating')
        else:
            output['rating'] = None
        
        if 'price' in answer.keys():
            output['price'] = answer.get('price')
        else:
            output['price'] = None
            
        if 'coordinates' in answer.keys():
            output['latitude_yelp'] = answer.get('coordinates').get('latitude')
            output['longitude_yelp'] = answer.get('coordinates').get('longitude')
        else:
            output['latitude'] = None
            output['longitude'] = None
            
    output['yelp_id'] = yelp_id
    output['program_name'] = name
    output['facility_address'] = address_1
    output['facility_city'] = city
    
    return output

In [14]:
# Testing if code works
get_business_details(address.iloc[1001,1], address.iloc[1001,2], address.iloc[1001,3], 'CA', 'US')

{'name': None,
 'address': None,
 'review_count': None,
 'rating': None,
 'price': None,
 'categories': None,
 'latitude': None,
 'longitude': None,
 'yelp_id': None,
 'hours': None,
 'name_input': 'FREEBIRDS WORLD BURRITO',
 'address_input': '4020 LINCOLN BLVD STE #A',
 'city_input': 'MARINA DEL REY'}

In [13]:
# Create For Loop To Iterate Over 

# Create a list out of the columns of interest
# Since there is a daily limit of 5000 calls per day will. Need to slice the list in appropriate increments

res_name = (address['program_name']).tolist()[18540:18760]
res_address = (address['facility_address']).tolist()[18540:18760]
res_city = (address['facility_city'].tolist())[18540:18760]

# Output File Name
output_filename = '../data/yelp_data_18540to18760.csv'

results = []

for name, address_1, city in zip(res_name, res_address, res_city):
    bd_results = get_business_details(name, address_1, city, 'CA', 'US')
    results.append(bd_results)
    
    if len(results) % 500 == 0:
        pd.DataFrame(results).to_csv("{}_bak".format(output_filename))
        
# Convert list result to dataframe
pd.DataFrame(results).to_csv(output_filename, encoding = 'utf8')


In [None]:
###### Compile all data into one file
file_list = []
path = '../data/'
compiled_filename = '../data/compiled_yelp_data.csv'
for file in os.listdir(path):
    if file.startswith('yelp_data'):
        path_to_file = str(path+file)
        df = pd.read_csv(path_to_file, encoding = 'latin-1')
        file_list.append(df)
    
compiled_yelp_data = pd.concat(file_list, ignore_index = True)
compiled_yelp_data.to_csv(compiled_filename)

In [23]:
os.listdir(path)


['address.csv',
 'geocode_address.csv',
 'geocode_address.csv_bak',
 'name.csv',
 'name_address.csv',
 'yelp_data_0to100.csv',
 'yelp_data_1000to2391.csv',
 'yelp_data_100to1000.csv',
 'yelp_data_10401to12901.csv',
 'yelp_data_12901to13120.csv',
 'yelp_data_13120to13340.csv',
 'yelp_data_2391to2541.csv',
 'yelp_data_2541to5041.csv',
 'yelp_data_5041to5401.csv',
 'yelp_data_5401to7901.csv',
 'yelp_data_7901to10401.csv']

In [32]:
str(path+file)

'../data/yelp_data_5401to7901.csv'

In [None]:
## Ignore Beyond This Point!

In [29]:
a=pd.read_csv('../data/yelp_data_0to100.csv')

In [30]:
a.head()

Unnamed: 0.1,Unnamed: 0,name,address,categories,review_count,rating,price,latitude,longitude,yelp_id,name_input,address_input,city_input,hours
0,0,Gen Korean BBQ House,,"Korean, Barbeque, Asian Fusion",5398.0,4.0,$$,33.805338,33.805338,C6rP97tf5yi7INJCUH9A5w,GEN KOREAN BBQ,24301 CRENSHAW BLVD,TORRANCE,
1,1,Denny's,,"Diners, Breakfast & Brunch, American (Traditio...",92.0,2.5,$,34.020786,34.020786,U8Gfec-186rQ93-3jbLdCQ,DENNY'S,15553 E VALLEY BLVD,LA PUENTE,
2,2,Ten Ren's Tea Time,,"Bubble Tea, Taiwanese",272.0,3.5,$,33.995954,33.995954,,TEA LAB,18912 E GALE AVE STE #A,ROWLAND HEIGHTS,
3,3,El Botanitas,,"Mexican, Seafood, Cocktail Bars",40.0,2.5,$$,33.931955,33.931955,k14x6AsgKlzbEB33wVCUwQ,EL BOTANITAS,3614 MLK JR BLVD,LYNWOOD,
4,4,Polly's Pies Restaurant,,"Bakeries, American (Traditional), Breakfast & ...",346.0,3.5,$$,33.87495,33.87495,ouxSotRraoDXHGHvquM-fA,POLLY'S PIES,17198 S NORWALK BLVD,CERRITOS,
