## GraphQL API

While GraphQL seemed promising, it was much easier to blow through the maximum allotted requests compared to the REST API. Perhaps in a professional setting, GraphQL would be the better choice.

We are including our code in this notebook in case anyone wants to see how our functions could be adapted to this format.

In [1]:
import pandas as pd 
import numpy as np
import json
import requests
import time, datetime
import math

from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

In [17]:
# NOTHING IN THIS CELL NEEDS TO GET CHANGED 
# IF YOU HAVE YOUR API KEY IN 'creds.json' IN ./Assets

# format your json file as a dictionary containing api key with DOUBLE QUOTES
# {"api": "your_super_long_api_key"}
CREDS_FILE = open('../Assets/creds.json')

# load credentials into variable
YELP_CREDENTIALS = json.loads(CREDS_FILE.read())
API_KEY = YELP_CREDENTIALS['api']
HEADERS = {'Authorization': f'Bearer {API_KEY}',
    'Content-Type': 'application/json',
    'Accept-Language': 'en-US'}

# this is the url we use to make broad business searches
# https://www.yelp.com/developers/documentation/v3/business_search

TRANSPORT = RequestsHTTPTransport(url='https://api.yelp.com/v3/graphql', headers=HEADERS, use_json=True)

client = Client(transport=TRANSPORT, fetch_schema_from_transport=True)

HTTPError: 500 Server Error: Internal Server Error for url: https://api.yelp.com/v3/graphql

In [11]:
def get_coordinates(zipcode):
    '''
    Converts a five-digit USPS zip code to geographic coordinates.
    
    Arguments:
        zipcode:            a five-digit string
        
    Returns:
        a tuple containing (latitude, longitude), in decimal degrees
    ''' 
    zipcoords = json.loads(open('../Assets/zip_code_coordinates.json').read())
    if len(zipcode) == 5 and zipcode.isnumeric():
        latitude = zipcoords[zipcode]['latitude']
        longitude = zipcoords[zipcode]['longitude']
    return latitude, longitude

In [12]:
# https://en.wikipedia.org/wiki/Earth_radius

def get_earth_radius(lat):
    '''
    Calculates radius of Earth at given latitude, assuming oblate spheroid geometry.
    
    Arguments:
        lat (float):        latitude, in decimal degrees
        
    Returns:
        radius (float) in meters
    '''
    
    lat = math.radians(lat)
    
    # geocentric radius formula
    a = 6_378_137               # equatorial radius
    b = 6_356_752.3             # polar radius
    numer = (a**2 * math.cos(lat))**2 + (b**2 * math.sin(lat))**2
    denom = (a * math.cos(lat))**2 + (b * math.sin(lat))**2
    
    return math.sqrt(numer / denom) 

In [13]:
def get_new_point(lat1, lon1, distance, bearing):
    '''
    Generates destination geographic coordinate based on starting point, 
    spherical distance traveled, and bearing.
    Use only for short distances (< 100_000).
    
    Arguments:
        lat1 (float):       starting latitude, in decimal degrees
        lon1 (float):       starting longitude, in decimal degrees
        distance (float):   distance to closest point, in meters
        bearing (float):    direction from starting point, in decimal degrees
        
    Returns:
        tuple containing new coordinates
        
    Dependencies:
        get_earth_radius()
    '''
    
    er = get_earth_radius(lat1)
    
    # converting to radians
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    
    # formulae taken from https://www.movable-type.co.uk/scripts/latlong.html
    lat2 = math.asin(
            math.sin(lat1) * math.cos(distance / er)
            + math.cos(lat1) * math.sin(distance / er) * math.cos(bearing))
    
    lon2 = lon1 + math.atan2(
            math.sin(bearing) * math.sin(distance / er) * math.cos(lat1),
            math.cos(distance / er) - math.sin(lat1) * math.sin(lat2))
    
    return math.degrees(lat2), math.degrees(lon2)

In [14]:
# picking seven smaller circles to "equal" one big circle
# https://en.wikipedia.org/wiki/Haversine_formula
# https://stackoverflow.com/questions/639695/
# https://stackoverflow.com/questions/7222382/
# https://www2.stetson.edu/~efriedma/circovcir/

def get_six_points(lat1, lon1, distance, rotation=0):
    '''
    Generates 6 additional geographic coordinates hexagonally arranged some distance from starting point.
    
    Arguments:
        lat1 (float):       starting latitude, in decimal degrees
        lon1 (float):       starting longitude, in decimal degrees
        distance (float):   distance to closest point, in meters
        rotation (float):   rotational offset, in degrees (recommended range -30 to 30)
        
    Returns:
        list of dictionaries with {'latitude': lat2, 'longitude': lon2}, excluding starting point
        
    Dependencies:
        get_new_point()
        get_earth_radius()
    '''    
    points = []
    
    # six directions
    bearings = [math.radians(deg) for deg in range(rotation, 360, 60)]

    for bearing in bearings:
        point = {}
        lat2, lon2 = get_new_point(lat1, lon1, distance, bearing)
        point['latitude'] = lat2
        point['longitude'] = lon2
        points.append(point)

    return points

In [15]:
def get_eighteen_points(lat1, lon1, distance, rotation=0):
    '''
    Generates 18 additional geographic coordinates hexagonally arranged some distance from starting point.
    
    Arguments:
        lat1 (float):       starting latitude, in decimal degrees
        lon1 (float):       starting longitude, in decimal degrees
        distance (float):   distance to closest point, in meters
        rotation (float):   rotational offset, in degrees (recommended range -30 to 30)
        
    Returns:
        list of dictionaries with {'latitude': lat2, 'longitude': lon2}, excluding starting point
        
    Dependencies:
        get_six_points()
        get_new_point()
        get_earth_radius()
    '''    
    points = []
    
    # distance between centers of circles
    distance2 = distance * 2               # along six "axes" of inner hexagon
    distance3 = math.sqrt(3) * distance     # between six "axes" of inner hexagon

    # inner hexagon, outer hexagon, in-between hexagon
    points.extend(get_six_points(lat1, lon1, distance, rotation))
    points.extend(get_six_points(lat1, lon1, distance2, rotation))
    points.extend(get_six_points(lat1, lon1, distance3, rotation + 30))

    return points

In [4]:
# https://stackoverflow.com/questions/45965007/

def gql_make_query(params):
    
    latitude  = params['latitude']
    longitude = params['longitude']
    radius    = int(params['radius'])
    
    try:
        offset = params['offset']
    except:
        offset = 0
    
    query = gql('''
    {\n'''
        f'search(latitude: {latitude},\n'
                f'longitude: {longitude},\n'
                f'radius: {radius},\n'
                f'offset: {offset},\n'
               '''limit: 50,
                  sort_by: "distance") {
            business {
                name
                id
                coordinates {                      
                        latitude                      
                        longitude                     
                    }
                price
                review_count
                rating
                location {
                    postal_code
                    city
                }
                categories {
                    alias
                }
            }
        }
    }
    ''')
    
    return query

In [5]:
def gql_get_total(params):
    
    latitude  = params['latitude']
    longitude = params['longitude']
    radius    = params['radius']
    
    query = gql('''
    {\n'''
        f'search(latitude: {latitude},\n'
                f'longitude: {longitude},\n'
                'limit: 50,\n'
                f'radius: {radius}) ' '''{
            total
        }
    }
    ''')
    
    return client.execute(query)['search']['total']

In [6]:
def gql_add_businesses(params, business_list):
    '''
    Checks if a response is good and adds businesses to a list container.
    
    Arguments:
        response:           the API request object
        business_list:      an empty (or existing) list container
        
    Returns:
        the modified list container
    '''    
    query = gql_make_query(params)
    
    bus_obj = client.execute(query)['search']['business']

    for business in list(bus_obj):

        # each dictionary will contain all of the desired information from each business
        business_list.append(business)
        
    return business_list

In [7]:
def gql_offset_iterator(params, business_list, total, start=0):
    
    for offset in range(start, total, 50):
        params['offset'] = offset

        business_list = gql_add_businesses(params, business_list)
        time.sleep(.5)
        
        if offset % 100 == 0:
            print(f'{offset} out of {total}')
        
    return business_list

In [8]:
def gql_get_businesses(params, force=False):
    '''
    Grabs as many businesses as allowed by the Yelp API given geographic coordinates and search radius.
    
    Arguments:
        lat:            latitude, in decimal degrees
        lon:            longitude, in decimal degrees
        radius:         search radius, in meters
        force:          forces the function to gather results even if over 1000 businesses are returned
        
    Returns:
        a list containing dictionaries of each business listing
    
    Dependencies:
        add_businesses()
        offset_iterator()
    '''
    businesses = []

    total = gql_get_total(params)
    
    if (total > 1000) & force == False:
        return None
    else:
        start = time.time()
        display_total = min(total, 1000)
        businesses = gql_offset_iterator(params, businesses, display_total)
        end = time.time()
        print(f'finished! {display_total} out of {total}')
        print(f'performance: {round((end - start) / (display_total / 50), 2)} seconds per request.')
        return businesses    

In [9]:
def gql_shotgun(lat, lon, radius, spray=6, rotation=0):
    '''
    Grabs businesses using get_businesses(), but aggregates search queries with
    smaller radii to circumvent 1000-business limit imposed by API.
    
    Arguments:
        lat:            latitude, in decimal degrees
        lon:            longitude, in decimal degrees
        radius:         search radius to emulate, in meters
        spray:          parameter to use get_six_points() or get_eighteen_points()
        rotation:       parameter to rotate the geographic coordinates, in degrees
        
    Returns:
        a list containing dictionaries of each business listing
    
    Dependencies:
        get_businesses()
        add_businesses()
        get_six_points()
        get_eighteen_points()
        get_new_point()
        get_earth_radius()
        remove_duplicates()
    '''
    start = time.time()
    params = {
        'latitude': lat,
        'longitude': lon,
        'radius': radius,
        'offset': 0
    }
    
    total = gql_get_total(params)
    
    if total <= 1000:
        print('query does not exceed 1000 listings. please use get_businesses() instead.')
        return None
    
    else:
        print(f'query returns approximately {total, -3} listings.')
        print('attempting shotgun')
        
        businesses = []

        points = [{'latitude': lat, 'longitude': lon}]

        if spray == 6:
            distance = radius * math.sqrt(3) / 2
            points.extend(get_six_points(lat, lon, distance, rotation))
        elif spray == 18:
            distance = radius * math.sqrt(3) / math.sqrt(13)
            points.extend(get_eighteen_points(lat, lon, distance, rotation))

        small_radius = distance / math.sqrt(3)

        for i, point in enumerate(points):
            params = {
                'latitude': point['latitude'],
                'longitude': point['longitude'],
                'radius': small_radius,
                'offset': 0
            }
            
            print(f'=========== {i+1} of {len(points)} ============')
            pellet = gql_get_businesses(params, force=True)
            businesses.extend(pellet)
            time.sleep(.5)

        cleaned = remove_duplicates(businesses)
        end = time.time()
        print(f'elapsed time: {round((end - start) / 60, 1)} min.')
        return cleaned        

In [18]:
lat, lon = get_coordinates('90004')
temp_list_3 = gql_shotgun(lat, lon, 4000, spray=6)

NameError: name 'client' is not defined