<a href="https://colab.research.google.com/github/elizabethavargas/cloud-hw1/blob/master/yelp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Query Restaurant Info from Yelp API by Cuisine
Add Yelp API Key as a secret: In the left panel of Colab, click the "ðŸ”‘" icon.


In [1]:
# import and installs
!pip install requests
import requests
from google.colab import userdata

# load api key from secrets
YELP_API_KEY = userdata.get('YELP_API_KEY')

if YELP_API_KEY is None:
    print("Error: YELP_API_KEY not found in Colab secrets. Please add it.")
else:
    print("Yelp API Key loaded successfully.")

Yelp API Key loaded successfully.


In [3]:
import requests
import time

API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'

headers = {
    'Authorization': f'Bearer {YELP_API_KEY}'
}

cuisines = [
    'newamerican',
    'indpak',
    'chinese',
    'mexican',
    'italian'
]

all_businesses_data = []
seen_ids = set()  # prevent duplicates across cuisines

MAX_RESULTS = 240
LIMIT = 50

for cuisine in cuisines:
    print(f"\nFetching cuisine: {cuisine}")

    for offset in range(0, MAX_RESULTS, LIMIT):  # 0, 50, 100, 150, 200
        # adjust limit so offset + limit never exceeds 240
        current_limit = min(LIMIT, MAX_RESULTS - offset)

        params = {
            'term': 'restaurant',
            'location': 'Manhattan',
            'categories': cuisine,
            'limit': current_limit,
            'offset': offset
        }

        response = requests.get(
            f'{API_HOST}{SEARCH_PATH}',
            headers=headers,
            params=params
        )

        if response.status_code != 200:
            print(f"Error: {response.status_code} - {response.text}")
            break

        data = response.json()
        businesses = data.get('businesses', [])

        if not businesses:
            break

        print(f"Fetched {len(businesses)} businesses at offset {offset} (limit={current_limit})")

        for biz in businesses:
            if biz['id'] not in seen_ids:
                all_businesses_data.append(biz)
                seen_ids.add(biz['id'])

        time.sleep(0.5)

print(f"\nTotal unique businesses collected: {len(all_businesses_data)}")


Fetching cuisine: newamerican
Fetched 50 businesses at offset 0 (limit=50)
Fetched 50 businesses at offset 50 (limit=50)
Fetched 50 businesses at offset 100 (limit=50)
Fetched 50 businesses at offset 150 (limit=50)
Fetched 40 businesses at offset 200 (limit=40)

Fetching cuisine: indpak
Fetched 50 businesses at offset 0 (limit=50)
Fetched 50 businesses at offset 50 (limit=50)
Fetched 50 businesses at offset 100 (limit=50)
Fetched 50 businesses at offset 150 (limit=50)
Fetched 40 businesses at offset 200 (limit=40)

Fetching cuisine: chinese
Fetched 50 businesses at offset 0 (limit=50)
Fetched 50 businesses at offset 50 (limit=50)
Fetched 50 businesses at offset 100 (limit=50)
Fetched 50 businesses at offset 150 (limit=50)
Fetched 40 businesses at offset 200 (limit=40)

Fetching cuisine: mexican
Fetched 50 businesses at offset 0 (limit=50)
Fetched 50 businesses at offset 50 (limit=50)
Fetched 50 businesses at offset 100 (limit=50)
Fetched 50 businesses at offset 150 (limit=50)
Fetched 

In [5]:
all_businesses_data[33]

{'id': 'tnxb_qM5e7J5wIfYwJB3Ig',
 'alias': 'the-elgin-new-york',
 'name': 'The Elgin',
 'image_url': 'https://s3-media0.fl.yelpcdn.com/bphoto/q2u2H8F1KhHX9PZL-99YjQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/the-elgin-new-york?adjust_creative=p6mhJwsttIzMzP5Ln3malA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=p6mhJwsttIzMzP5Ln3malA',
 'review_count': 418,
 'categories': [{'alias': 'cocktailbars', 'title': 'Cocktail Bars'},
  {'alias': 'beerbar', 'title': 'Beer Bar'},
  {'alias': 'newamerican', 'title': 'New American'}],
 'rating': 4.3,
 'coordinates': {'latitude': 40.758339026848695,
  'longitude': -73.98082697379752},
 'transactions': ['delivery', 'pickup'],
 'price': '$$$',
 'location': {'address1': '64 West 48th St',
  'address2': None,
  'address3': '',
  'city': 'New York',
  'zip_code': '10036',
  'country': 'US',
  'state': 'NY',
  'display_address': ['64 West 48th St', 'New York, NY 10036']},
 'phone': '+12122212100',
 'display_phone

In [6]:
# save all_business_data as pickle
import pickle
with open('yelp_restaurants.pickle', 'wb') as f:
    pickle.dump(all_businesses_data, f)