In [1]:
from api_key import YELP_API_KEY

To get a Yelp API KEY follow (these info)[https://www.yelp.com/developers/documentation/v3/authentication]

In [2]:
import pandas as pd
import requests
import json

class Client():
    endpoint = 'https://api.yelp.com/v3/businesses/search?'
    valid_seach_keys = ['term','location','latitude','longitude','radius','categories','locale','limit','offset','sort_by','price','open_now','open_at','attributes']

    def __init__(self, api_key):
        self.api_key = api_key
    
    def request(self, endpoint):
        #print(f'GET requesting: {endpoint}')
        headers = {"Authorization":f"Bearer {self.api_key}"}
        r = requests.get(endpoint, headers=headers)
        return r
        
    def search_businesses(self, **kwargs):
        for k in kwargs.keys():
            assert k in self.valid_seach_keys
        search_values = [f'{k}={v}' for k,v in kwargs.items()]
        url = self.endpoint + '&'.join(search_values)
        r = self.request(url)
        return r
    
def save_to_json(obj, fpath='yelp.json'):
    with open(fpath, 'w') as f:
        json.dump(obj, f)

def load_from_json(fpath='yelp.json'):
    with open(fpath, 'r') as f:
        content = json.load(f)
    return content

In [3]:
latitude, longitude = 55.863937, -4.270185  # M8 bridge
radius = 5000
categories = 'restaurants'
limit = 50  # max=50

In [4]:
c = Client(api_key=YELP_API_KEY)

In [None]:
offset = 0
last_search_results = limit
MAX_SIZE = 6000
all_businesses = []

while last_search_results >= limit and offset < MAX_SIZE:
    r = c.search_businesses(
        latitude=latitude, 
        longitude=longitude, 
        radius=radius, 
        categories=categories, 
        offset=offset, 
        limit=limit
    )
    if 'error' in r.json():
        print(r.json())
        break
    
    found_businesses = r.json()['businesses']
    last_search_results = len(found_businesses)
    offset += last_search_results
    
    all_businesses.extend(found_businesses)
    print(f'added: {last_search_results}, curr len={len(all_businesses)}')

save_to_json(obj=all_businesses, fpath='data/yelp.json')

# Convert json to table

In [10]:
all_businesses = load_from_json(fpath='data/yelp.json')

def convert_item_to_record(b):
    EXCLUDE = ['display_address']
    record = {}
    for k,v in b.items():
        if k in EXCLUDE: continue
        if isinstance(v, (int, float, bool, str)):
            record[k] = v
        elif isinstance(v, dict):
            sub_record = convert_item_to_record(v)
            record.update(sub_record)
    return record

In [11]:
records = [convert_item_to_record(b) for b in all_businesses]
df = pd.DataFrame(records)

ordered_col = [
    'name', 'rating', 'review_count',  'price',
    'latitude', 'longitude', 'zip_code', 
    'id', 'alias',
    'address1', 'address2', 'address3',  'city', 'state', 'country',
    'display_phone', 'image_url', 'phone', 'url'
]

In [12]:
df[ordered_col].to_csv('data/yelp.csv', index=None)