In [1]:
#Import the required dependencies
import requests
import pandas as pd
import json
from urllib.parse import urlparse
import time

#Import the API Key
from config import yelp_key

In [2]:
#Define the header
headers = {
        'Authorization': 'Bearer %s' %yelp_key
    }

#Create a list to store the data retrieved
complete_business_data = []

#Starting the URL for yelp businesses API call
url = "https://api.yelp.com/v3/businesses/search"

#Set counters
record_count = 0

#Carry out the process until we get an offset of 1000, according to the yelp documentation

for offset in range(0,1000,50): #previous code stoped at retrieval 9(450), then at 5 therefore 850
    parameters = {'location': 'San Francisco',
                    'term':'restaurants',
                    'limit':50,
                    'radius':30000,
                   'offset':offset
                  }
    record_count+=1
    print("-----------------------------------------------------")
    print(f"Starting retrieval number {record_count}")
    

    #Make a request to Yelp API
    response = requests.get(url= url, params= parameters, headers= headers)
    business_data = response.json()

    print(business_data)
    #Iterate through every response from the API to store the data retrieved
    for business in business_data['businesses']:

        try:
            business_id = business['id']
            business_name = business['name']
            business_image = business['image_url']
            business_url_yelp = business['url']
            business_review_count = business['review_count']
            business_category = business['categories'][0]['title']
            business_rating = business['rating']
            business_price = business['price']
            business_lat = business['coordinates']['latitude']
            business_lng = business['coordinates']['longitude']
            business_address = business['location']['address1']
            business_city =  business['location']['city']
            business_state =  business['location']['state']
            business_country =  business['location']['country']
            business_phone = business['display_phone']

            complete_business_data.append({ 'ID': business_id,
                                           'Name':business_name,
                                          'Image':business_image,
                                          'Url':business_url_yelp,
                                           'Review count': business_review_count,
                                          'Category':business_category,
                                           'Rating': business_rating,
                                          'Price':business_price,
                                          'Latitude':business_lat,
                                          'Longitude':business_lng,
                                          'Address':business_address,
                                          'City':business_city,
                                           'State': business_state,
                                           'Country':business_country,
                                          'Phone':business_phone})
        except:
            print('This restaurant has missing information. Skipping...')
            pass

    #Make a request to the Yelp businesses reviews API
    for dict in complete_business_data:
        dict['Review'] = []
        res_url = dict['Url']
        res_id = dict['ID']
        endpoint = 'https://api.yelp.com/v3/businesses/{}/reviews'.format(res_id)
        response = requests.get(url= endpoint, headers= headers)
        business_reviews = response.json() #https://www.yelp-support.com/article/How-is-the-order-of-reviews-determined?}

        #Parse the urls from the restaurant and the review    
        for review in business_reviews['reviews']:
                review_text = review['text']
                review_url = review['url']
                parse_res_url = urlparse(res_url)
                parse_rev_url = urlparse(review_url)

                #Compare URL from restaurants and reviews to pair them up
                if parse_res_url[2] == parse_rev_url[2]:
                    dict['Review'].append(review_text)

    print(f"Finished retrieval number {record_count}")                             
    time.sleep(5)

print("Data retrieval complete")

-----------------------------------------------------
Starting retrieval number 1
{'businesses': [{'id': 'HHtpR0RslupSQ99GIIwW5A', 'alias': 'marufuku-ramen-san-francisco-5', 'name': 'Marufuku Ramen', 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/ouK2VmW0SrI70jsJpTxJhw/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/marufuku-ramen-san-francisco-5?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 4122, 'categories': [{'alias': 'ramen', 'title': 'Ramen'}], 'rating': 4.5, 'coordinates': {'latitude': 37.78511637816802, 'longitude': -122.43200834862841}, 'transactions': ['delivery', 'pickup'], 'price': '$$', 'location': {'address1': '1581 Webster St', 'address2': 'Ste 235', 'address3': None, 'city': 'San Francisco', 'zip_code': '94115', 'country': 'US', 'state': 'CA', 'display_address': ['1581 Webster St', 'Ste 235', 'San Francisco, CA 94115']}, 'phone': '+14158729786', 'd

Finished retrieval number 1
-----------------------------------------------------
Starting retrieval number 2
{'businesses': [{'id': 'RNy3_hU1N2qyS5PVc9RaYQ', 'alias': 'perilla-san-francisco-2', 'name': 'Perilla', 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/gEn6tWCzzZtCmhrjpfcUZA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/perilla-san-francisco-2?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 1477, 'categories': [{'alias': 'vietnamese', 'title': 'Vietnamese'}], 'rating': 4.0, 'coordinates': {'latitude': 37.76415, 'longitude': -122.46711}, 'transactions': ['pickup', 'delivery'], 'price': '$$', 'location': {'address1': '836 Irving St', 'address2': '', 'address3': '', 'city': 'San Francisco', 'zip_code': '94122', 'country': 'US', 'state': 'CA', 'display_address': ['836 Irving St', 'San Francisco, CA 94122']}, 'phone': '+14155649980', 'display_phone': '(415) 564

Finished retrieval number 2
-----------------------------------------------------
Starting retrieval number 3
{'businesses': [{'id': 'XhJ6ZYagcWNHpJ1H1KvNlQ', 'alias': 'a-mano-san-francisco-2', 'name': 'a Mano', 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/ezG_zJwov2QNZwyK14EkwA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/a-mano-san-francisco-2?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 1382, 'categories': [{'alias': 'italian', 'title': 'Italian'}, {'alias': 'cocktailbars', 'title': 'Cocktail Bars'}, {'alias': 'pizza', 'title': 'Pizza'}], 'rating': 4.0, 'coordinates': {'latitude': 37.776963880538, 'longitude': -122.423868469312}, 'transactions': ['delivery'], 'price': '$$', 'location': {'address1': '450 Hayes St', 'address2': None, 'address3': '', 'city': 'San Francisco', 'zip_code': '94102', 'country': 'US', 'state': 'CA', 'display_address': ['450 Hayes 

Finished retrieval number 3
-----------------------------------------------------
Starting retrieval number 4
{'businesses': [{'id': 'zj09K037XrZ_51Yh5qsIAw', 'alias': 'kui-shin-bo-san-francisco-2', 'name': 'Kui Shin Bo', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/Tk90nxYDH5tW0c2G0Tq88A/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/kui-shin-bo-san-francisco-2?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 889, 'categories': [{'alias': 'japanese', 'title': 'Japanese'}, {'alias': 'sushi', 'title': 'Sushi Bars'}], 'rating': 4.0, 'coordinates': {'latitude': 37.785367, 'longitude': -122.429465}, 'transactions': ['delivery', 'pickup'], 'price': '$$', 'location': {'address1': '22 Peace Plz', 'address2': 'Fl 2', 'address3': None, 'city': 'San Francisco', 'zip_code': '94115', 'country': 'US', 'state': 'CA', 'display_address': ['22 Peace Plz', 'Fl 2', 'San Francisco, C

Finished retrieval number 4
-----------------------------------------------------
Starting retrieval number 5
{'businesses': [{'id': 'DQy0uYB0kym-s3oZZRxs5Q', 'alias': 'house-of-pancakes-san-francisco', 'name': 'House Of Pancakes', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/naBJf3lEfqGJyqGhB54T0w/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/house-of-pancakes-san-francisco?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 1121, 'categories': [{'alias': 'noodles', 'title': 'Noodles'}, {'alias': 'taiwanese', 'title': 'Taiwanese'}, {'alias': 'pancakes', 'title': 'Pancakes'}], 'rating': 4.0, 'coordinates': {'latitude': 37.742904, 'longitude': -122.476349}, 'transactions': ['delivery'], 'price': '$$', 'location': {'address1': '937 Taraval St', 'address2': '', 'address3': '', 'city': 'San Francisco', 'zip_code': '94116', 'country': 'US', 'state': 'CA', 'display_addres

Finished retrieval number 5
-----------------------------------------------------
Starting retrieval number 6
{'businesses': [{'id': 'KarMlfJURqPo70kQZ4LzFg', 'alias': 'tarragon-cafe-san-francisco', 'name': 'Tarragon Cafe', 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/lnYmJA9sKbu6DyLMTWV2ZQ/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/tarragon-cafe-san-francisco?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 69, 'categories': [{'alias': 'cafes', 'title': 'Cafes'}], 'rating': 4.5, 'coordinates': {'latitude': 37.77139, 'longitude': -122.43014}, 'transactions': [], 'price': '$$', 'location': {'address1': '200 Fillmore St', 'address2': None, 'address3': '', 'city': 'San Francisco', 'zip_code': '94117', 'country': 'US', 'state': 'CA', 'display_address': ['200 Fillmore St', 'San Francisco, CA 94117']}, 'phone': '+14156243318', 'display_phone': '(415) 624-3318', 'dis

Finished retrieval number 6
-----------------------------------------------------
Starting retrieval number 7
{'businesses': [{'id': '4eaK2-Q0dEXWMr5SOoV_Sg', 'alias': 'roam-artisan-burgers-san-francisco-3', 'name': 'Roam Artisan Burgers', 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/WWc1dK4hMO6knOQe-y_6yg/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/roam-artisan-burgers-san-francisco-3?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 1321, 'categories': [{'alias': 'burgers', 'title': 'Burgers'}], 'rating': 4.0, 'coordinates': {'latitude': 37.787682, 'longitude': -122.433853}, 'transactions': ['delivery', 'pickup'], 'price': '$$', 'location': {'address1': '1923 Fillmore St', 'address2': '', 'address3': '', 'city': 'San Francisco', 'zip_code': '94115', 'country': 'US', 'state': 'CA', 'display_address': ['1923 Fillmore St', 'San Francisco, CA 94115']}, 'phone': '+

Finished retrieval number 7
-----------------------------------------------------
Starting retrieval number 8
{'businesses': [{'id': 'mydKjdG8gJOcRQ9cZwkQpQ', 'alias': 'ryokos-san-francisco', 'name': "Ryoko's", 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/w3i_dVIjGTyIeiYXUzumkA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/ryokos-san-francisco?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 3250, 'categories': [{'alias': 'sushi', 'title': 'Sushi Bars'}, {'alias': 'japanese', 'title': 'Japanese'}, {'alias': 'bars', 'title': 'Bars'}], 'rating': 4.0, 'coordinates': {'latitude': 37.7881987776243, 'longitude': -122.412059158087}, 'transactions': ['delivery'], 'price': '$$', 'location': {'address1': '619 Taylor St', 'address2': '', 'address3': '', 'city': 'San Francisco', 'zip_code': '94102', 'country': 'US', 'state': 'CA', 'display_address': ['619 Taylor St', 'San Fr

Finished retrieval number 8
-----------------------------------------------------
Starting retrieval number 9
{'businesses': [{'id': 'mvPT__X1KlyDx78Ol2E4dQ', 'alias': 'brazuca-grill-san-francisco-2', 'name': 'Brazuca Grill', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/DdYj6Ya3w6qLoUbIu0oowA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/brazuca-grill-san-francisco-2?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 26, 'categories': [{'alias': 'brazilian', 'title': 'Brazilian'}, {'alias': 'foodtrucks', 'title': 'Food Trucks'}, {'alias': 'acaibowls', 'title': 'Acai Bowls'}], 'rating': 4.5, 'coordinates': {'latitude': 37.75804, 'longitude': -122.39167}, 'transactions': [], 'price': '$$', 'location': {'address1': '998 Indiana St', 'address2': None, 'address3': None, 'city': 'San Francisco', 'zip_code': '94107', 'country': 'US', 'state': 'CA', 'display_address': ['99

Finished retrieval number 9
-----------------------------------------------------
Starting retrieval number 10
{'businesses': [{'id': 'vqY8QoWhNTiqy4lhld54zQ', 'alias': 'wilder-san-francisco', 'name': 'Wilder', 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/0FGXZYWsdanvdf7cIOZVkA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/wilder-san-francisco?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 113, 'categories': [{'alias': 'newamerican', 'title': 'American (New)'}, {'alias': 'comfortfood', 'title': 'Comfort Food'}, {'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'}], 'rating': 4.0, 'coordinates': {'latitude': 37.798799, 'longitude': -122.435654}, 'transactions': ['restaurant_reservation', 'pickup', 'delivery'], 'price': '$$', 'location': {'address1': '3154 Fillmore St', 'address2': None, 'address3': '', 'city': 'San Francisco', 'zip_code': '94123', 'count

Finished retrieval number 10
-----------------------------------------------------
Starting retrieval number 11
{'businesses': [{'id': 'RxIFKVvc8iVBPIVpe55Byg', 'alias': 'bon-nene-san-francisco', 'name': 'Bon, Nene', 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/d0vHFnUrWeT8HcFVx9rCNQ/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/bon-nene-san-francisco?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 311, 'categories': [{'alias': 'japanese', 'title': 'Japanese'}, {'alias': 'asianfusion', 'title': 'Asian Fusion'}], 'rating': 4.0, 'coordinates': {'latitude': 37.75762490446, 'longitude': -122.411629484421}, 'transactions': ['delivery', 'pickup'], 'price': '$$', 'location': {'address1': '2850 21st St', 'address2': '', 'address3': None, 'city': 'San Francisco', 'zip_code': '94110', 'country': 'US', 'state': 'CA', 'display_address': ['2850 21st St', 'San Francisco, CA 9

Finished retrieval number 11
-----------------------------------------------------
Starting retrieval number 12
{'businesses': [{'id': 'p6bLw7PS63Kbge_wyIOP8w', 'alias': 'garibaldis-san-francisco-2', 'name': 'Garibaldis', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/zRYOYgtiLopeZcF2mcgi-w/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/garibaldis-san-francisco-2?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 581, 'categories': [{'alias': 'newamerican', 'title': 'American (New)'}], 'rating': 4.0, 'coordinates': {'latitude': 37.7883799, 'longitude': -122.44729}, 'transactions': ['delivery'], 'price': '$$$', 'location': {'address1': '347 Presidio Ave', 'address2': '', 'address3': '', 'city': 'San Francisco', 'zip_code': '94115', 'country': 'US', 'state': 'CA', 'display_address': ['347 Presidio Ave', 'San Francisco, CA 94115']}, 'phone': '+14155638841', 'display_phon

Finished retrieval number 12
-----------------------------------------------------
Starting retrieval number 13
{'businesses': [{'id': 'gc-92vNLNa-Wyn-W3T5Lcg', 'alias': 'cafe-réveille-san-francisco-9', 'name': 'Cafe Réveille', 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/6tcpuSiydzGz_QavWlgaLw/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/cafe-r%C3%A9veille-san-francisco-9?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 108, 'categories': [{'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'}, {'alias': 'cafes', 'title': 'Cafes'}], 'rating': 4.0, 'coordinates': {'latitude': 37.79485, 'longitude': -122.421361}, 'transactions': [], 'price': '$$', 'location': {'address1': '1998 Polk St', 'address2': '', 'address3': None, 'city': 'San Francisco', 'zip_code': '94109', 'country': 'US', 'state': 'CA', 'display_address': ['1998 Polk St', 'San Francisco, CA 94109

Finished retrieval number 13
-----------------------------------------------------
Starting retrieval number 14
{'businesses': [{'id': 'SdI0uCelI4ihCwGyMLv03A', 'alias': 'little-skillet-san-francisco-2', 'name': 'Little Skillet', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/G0zO3gXTdS-heSMnIIFtsg/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/little-skillet-san-francisco-2?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 2102, 'categories': [{'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'}, {'alias': 'sandwiches', 'title': 'Sandwiches'}, {'alias': 'waffles', 'title': 'Waffles'}], 'rating': 3.5, 'coordinates': {'latitude': 37.778851, 'longitude': -122.393937}, 'transactions': ['delivery', 'pickup'], 'price': '$$', 'location': {'address1': '360 Ritch St', 'address2': '', 'address3': '', 'city': 'San Francisco', 'zip_code': '94107', 'country': 'US', 'stat

Finished retrieval number 14
-----------------------------------------------------
Starting retrieval number 15
{'businesses': [{'id': '7Q3QIZz5UX-JSTSKKkRqDg', 'alias': 'waterbar-san-francisco', 'name': 'Waterbar', 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/9Q0auyPrzkJyZcWLcchZRA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/waterbar-san-francisco?adjust_creative=iuMW-YN0A5e1JsA5cAgIDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=iuMW-YN0A5e1JsA5cAgIDQ', 'review_count': 3526, 'categories': [{'alias': 'seafood', 'title': 'Seafood'}, {'alias': 'newamerican', 'title': 'American (New)'}, {'alias': 'raw_food', 'title': 'Live/Raw Food'}], 'rating': 3.5, 'coordinates': {'latitude': 37.7904941686413, 'longitude': -122.389039464859}, 'transactions': ['pickup', 'delivery'], 'price': '$$$', 'location': {'address1': '399 The Embarcadero', 'address2': '', 'address3': '', 'city': 'San Francisco', 'zip_code': '94105', 'country': 'US', 'state': 'CA',

KeyError: 'reviews'

In [3]:
#Check the length of the data retrieved
len(complete_business_data)

624

In [4]:
#Store the data in a dataframe
restaurants_df = pd.DataFrame(complete_business_data)
restaurants_df.head(2)

Unnamed: 0,ID,Name,Image,Url,Review count,Category,Rating,Price,Latitude,Longitude,Address,City,State,Country,Phone,Review
0,HHtpR0RslupSQ99GIIwW5A,Marufuku Ramen,https://s3-media4.fl.yelpcdn.com/bphoto/ouK2Vm...,https://www.yelp.com/biz/marufuku-ramen-san-fr...,4122,Ramen,4.5,$$,37.785116,-122.432008,1581 Webster St,San Francisco,CA,US,(415) 872-9786,"[Long long overdue review, but I loved this pl..."
1,f-m7-hyFzkf0HSEeQ2s-9A,Fog Harbor Fish House,https://s3-media2.fl.yelpcdn.com/bphoto/by8Hh6...,https://www.yelp.com/biz/fog-harbor-fish-house...,8324,Seafood,4.5,$$,37.808988,-122.410297,39 Pier,San Francisco,CA,US,(415) 969-2010,[Great food and not a long wait. Perfect on a...


In [5]:
#Store the information in a CSV file
restaurants_df.to_csv('Restaurants_reviews_SF_full_',index=False,header=True)