In [3]:
import logging # Facilitates logging 
import requests # Facilitates making http requests
import json # Facilitates handling of json objects
import get_records # Facilitates reading phones from the restaurant requests table
import sqlalchemy as db
import psycopg2
import pandas as pd

In [30]:
import importlib as imp
get_records = imp.reload(get_records)

In [4]:
database_fields_level_1 = [
        'id',
        'name',
        'phone',
        'price',
        'review_count',
        'rating',
        'categories',
        'is_closed'
        ]

database_fields_level_2 = [
        'country',
        'state',
        'city',
        'zip_code',
        'address1'
        ]

database_fields_level_3 = [
        'longitude',
        'latitude'
        ]

In [20]:
create_status_update_table_statement = """
CREATE TEMPORARY TABLE status_update (row_id INTEGER, request_status VARCHAR, error_message VARCHAR) 
ON COMMIT DROP
"""

insert_status_update_table_statement = """
INSERT INTO status_update (row_id, request_status, error_message) 
VALUES(%s, %s, %s)
"""

update_restaurants_requests_statement = """
UPDATE restaurants_requests
SET 
    phone_request_status = status_update.request_status,
    phone_error_message = status_update.error_message
FROM status_update
WHERE 
    status_update.row_id = restaurants_requests.row_id;
"""

In [6]:
class Yelp_Match:
    """ This class facilitates requests to Yelp by phone number or address. """

    def __init__(self, api_key, match_type = 'phone'):

        # Make sure the match type is valid
        assert match_type in ('phone', 'address')

        # Initiate instance of a class with API key, logger and urls
        self.api_key = api_key
        self.headers = {'Authorization': 'Bearer %s' % self.api_key}
        if (match_type == 'phone'):
            self.search_url = 'https://api.yelp.com/v3/businesses/search/phone'
        self.logger = logging.getLogger(__name__)

    def set_output_folder_path(self, output_folder_path):
        # Set the folder path for output files
        self.output_folder_path = output_folder_path
        self.logger.debug('Output folder set to ' + output_folder_path)

    def search_by_phone(self, phone):
        # Send the phone number request
        params = {'phone' : phone}
        response = requests.get(url = self.search_url,
                params = params,
                headers = self.headers
                )
        self.json = response.json()
        return self.json

    def json_to_file(self, item_number):
        # Write json response to disk
        output_file_path = (self.output_folder_path +
                'item' + item_number + '.json')
        with open(output_file_path, 'w+') as output_file:
            json.dump(self.json, output_file)
            self.logger.debug('Information for item ' + item_number + ' obtained.')
        return None

In [7]:
def request_phone_save(row):
    """ This function makes a request and parses response based on phone. """

    item_number = row['row_id']
    sg_id = row['sname_place_id']
    phone = row['r_phone_number']
    response = searcher.search_by_phone(phone)
    searcher.json_to_file(str(item_number))

    if (not ('businesses' in response)):
        # If no businesses in response mark result as no success
        values = {}
        values['row_id'] = item_number
        values['sname_place_id'] = sg_id
        values['request_status'] = 'zero'
        values['error_message'] = None
        if 'error' in response:
            # If an error in response, save error message
            values['request_status'] = 'error'
            values['error_message'] = response['error']['description']
        for f in database_fields_level_1:
            values[f] = None
        for f in database_fields_level_2:
            values[f] = None
        for f in database_fields_level_3:
            values[f] = None
        return pd.DataFrame([values])

    if (len(response['businesses']) == 0):
        values = {}
        values['row_id'] = item_number
        values['sname_place_id'] = sg_id
        values['request_status'] = 'zero'
        values['error_message'] = None
        for f in database_fields_level_1:
            values[f] = None
        for f in database_fields_level_2:
            values[f] = None
        for f in database_fields_level_3:
            values[f] = None
        return pd.DataFrame([values])

    # Parse the businesses in responses
    businesses = []
    for b in response['businesses']:
        values = {}
        values['row_id'] = item_number
        values['sname_place_id'] = sg_id
        values['request_status'] = 'success'
        values['error_message'] = None
        for f in database_fields_level_1:
            # Get the level - 1 fields
            if (f == 'price'):
                try:
                    values[f] = len(b[f])
                except KeyError:
                    values[f] = -1
                except:
                    logger.error('Error in price handling. ', exc_info = True) 
            else:
                values[f] = b[f]
        for f in database_fields_level_2:
            # Get the level - 2 fields
            values[f] = b['location'][f]
        for f in database_fields_level_3:
            # Get the level - 3 fields
            values[f] = b['coordinates'][f]
        businesses.append(values)
    return pd.DataFrame(businesses)

In [8]:
api_key = '***REMOVED***'
n_records = 15
output_folder_path = '/home/user/projects/urban/data/input/Yelp/phone_address/phone/'
log_file_path = '/home/user/projects/urban/code/yelp-more-download/logs/yelp_phone_requests.log'

In [9]:
# Inititate an instance of a Yelp_Match class
searcher = Yelp_Match(api_key, match_type = 'phone')
searcher.set_output_folder_path(output_folder_path)

In [10]:
# Set the logging options
logging.basicConfig(level=logging.DEBUG)
logger = searcher.logger
logger.setLevel(logging.INFO)
# Write log to file (DEBUG)
fh = logging.FileHandler(log_file_path)
fh.setLevel(logging.INFO)
# Write log to console (ERROR)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
# Create formatter and add it to the handlers
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
# Add the handlers to the logger
logger.addHandler(fh)
logger.addHandler(ch)

In [11]:
logger.info('Making requests based.')

2019-12-14 13:03:47,906 - __main__ - INFO - Making requests based.
INFO:__main__:Making requests based.


In [71]:
# Connect to the database via SQLalchemy
engine = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname1}')
connection = engine.connect()

# Connect via psycopg2
conn = psycopg2.connect('dbname=dataname1 user={user} password={user_pass}')

In [72]:
requests_table = get_records.get_request_phones(n_records, engine, conn)

In [21]:
requests_table.loc[1, 'r_phone_number'] = '+170498T32760'

In [73]:
requests_table

Unnamed: 0,row_id,sname_place_id,r_phone_number,r_location_name,r_zip_code,r_street_address,r_city,r_state,y_id,phone_request_status,address_request_status,phone_error_message,address_error_message
0,22,sg:8ac66769f353436bbf82ee45e5db1f24,15159869182,McDonald's,50111,101 gateway drive,grimes,ia,,success,needed,,
1,24,sg:39abe1ba075243fa8a51ee8d256bd7f6,17634948844,Highlander,55311,7801 county road 101 rush creek golf club,maple grove,mn,,success,needed,,
2,27,sg:d7ff7fbdd361445992c009d82edf901a,14054701261,Industry Gastro Lounge,73142,2800 north west 140th street,oklahoma city,ok,,success,needed,,
3,46,sg:3c22536af466408fa4019fb65e5bb44b,19077477827,Subway,99835,327 seward street,sitka,ak,5FutJzRMyhdPdzmQUIJdxg,success,needed,,
4,47,sg:7ca8916a836942baa1939493bcba5c4f,14809213500,Noodles & Company,85281,2000 east rio salado parkway,tempe,az,,success,needed,,


In [74]:
result = requests_table.apply(request_phone_save, axis = 1)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.yelp.com:443
DEBUG:urllib3.connectionpool:https://api.yelp.com:443 "GET /v3/businesses/search/phone?phone=%2B15159869182 HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.yelp.com:443
DEBUG:urllib3.connectionpool:https://api.yelp.com:443 "GET /v3/businesses/search/phone?phone=%2B15159869182 HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.yelp.com:443
DEBUG:urllib3.connectionpool:https://api.yelp.com:443 "GET /v3/businesses/search/phone?phone=%2B17634948844 HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.yelp.com:443
DEBUG:urllib3.connectionpool:https://api.yelp.com:443 "GET /v3/businesses/search/phone?phone=%2B14054701261 HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.yelp.com:443
DEBUG:urllib3.connectionpool:https://api.yelp.com:443 "GET /v3/businesses/search/phone?

In [75]:
result = pd.concat(result.values)

In [76]:
result

Unnamed: 0,row_id,sname_place_id,request_status,error_message,id,name,phone,price,review_count,rating,categories,is_closed,country,state,city,zip_code,address1,longitude,latitude
0,22,sg:8ac66769f353436bbf82ee45e5db1f24,success,,LhuWY0VAMc3oAMPCzhfncw,McDonald's,15159869182,1,8,2.5,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",False,US,IA,Grimes,50111,101 Gateway Dr,-93.779335,41.687938
0,24,sg:39abe1ba075243fa8a51ee8d256bd7f6,success,,V5EQM-PKSYeh3Ly2HHPnSA,Highlander,17634948844,-1,9,3.0,"[{'alias': 'newamerican', 'title': 'American (...",False,US,MN,Maple Grove,55311,7801 County Road 101,-93.5176,45.09795
0,27,sg:d7ff7fbdd361445992c009d82edf901a,success,,ZzE5Worwf02UqHoD3r6tWg,Industry Gastro Lounge,14054701261,2,104,2.5,"[{'alias': 'gastropubs', 'title': 'Gastropubs'...",False,US,OK,Oklahoma City,73142,2800 NW 140th St,-97.565437,35.613757
0,46,sg:3c22536af466408fa4019fb65e5bb44b,success,,5FutJzRMyhdPdzmQUIJdxg,Subway Restaurants,19077477827,1,5,3.0,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",False,US,AK,Sitka,99835,327 Seward St,-135.33464,57.05153
0,47,sg:7ca8916a836942baa1939493bcba5c4f,success,,CO8MpD1bSkBPArXKBKkIfA,Noodles & Company,14809213500,2,160,3.5,"[{'alias': 'noodles', 'title': 'Noodles'}, {'a...",False,US,AZ,Tempe,85281,2000 E Rio Salado Pkwy,-111.901593,33.433435


In [77]:
failures = result.loc[result['request_status'] != 'success']
cur = conn.cursor()
cur.execute(create_status_update_table_statement)
rows = zip(failures.row_id, failures.request_status, failures.error_message)
cur.executemany(insert_status_update_table_statement, rows)
cur.execute(update_restaurants_requests_statement)
conn.commit()

In [78]:
conn.close()

In [79]:
success = result.loc[result['request_status'] == 'success']
success.drop(['request_status', 'error_message'], axis = 1, inplace = True)
success.reset_index(drop = True, inplace = True)
success.to_sql('more_yelp_restaurants',
        con = engine,
        index = False,
        if_exists = 'append',
        dtype = {'categories' : db.types.JSON})

In [80]:
engine.dispose()
conn.close()