In [20]:
# most used short keys
# ctrl + / = turn into a comment
# ctrl + enter = run code
# ctrl + m, b = add a new code line below
# ctrl + m, a = add a new code line below
# ctrl + m, m = turn code block into a text block
# ctrl + m, y = turn code block into a text block



# Load Imports and Libraries

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
# install packages
!pip install yelpapi --quiet


In [23]:
# standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as pit
import seaborn as sns

# additional imports
import os,json,math,time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [24]:
# load yelp API credentials
with open ('/content/drive/MyDrive/Colab Notebooks/credentials/yelp_api.json')as file:
  yelp_credentials = json.load(file)

  yelp_api = YelpAPI(yelp_credentials['api-key'], timeout_s = 5.0)

# Defining the Search Terms and File Path

In [25]:
# Define API call parameters and output file path
LOCATION = 'Greenville, SC'
TERM = 'Sushi'
JSON_FILE = '/content/drive/MyDrive/Colab Notebooks/Data/results_SC_sushi.json'

# display the file path where the data will be saved
print(f'Data will be saved to: {JSON_FILE}')
# when you write a print() with f'your message here' this allows you to print
# the value in a variable {variable here}

Data will be saved to: /content/drive/MyDrive/Colab Notebooks/Data/results_SC_sushi.json


# Check if JSON file exists else create

In [26]:
# check if JSON_FILE exists otherwise create
if not os.path.isfile(JSON_FILE):

  # create the directory if it doesn't exists
  os.makedirs(os.path.dirname(JSON_FILE), exist_ok=True)

  # inform the user and save an empty list to file
  print(f'[i] {JSON_FILE} not found. Saving empty list to file')
  with open(JSON_FILE, 'w') as file:
    json.dump([], file)

else:
  # inform the user if the file already exists
  print(f'[i] {JSON_FILE} already exists.')

[i] /content/drive/MyDrive/Colab Notebooks/Data/results_SC_sushi.json not found. Saving empty list to file


In [27]:
# load previous results and set offset based on the number of results
with open(JSON_FILE, 'r') as file:
  previous_results = json.load(file)

n_results = len(previous_results)

print(f'{n_results} previous results found')

0 previous results found


# Making the first API call to get the first page of data

In [28]:
# use your yelp_api variable search_query method to perform your API call
results = yelp_api.search_query(location = LOCATION,
                                term = TERM,
                                offset = n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [29]:
# how many results did we get?
total_results = results['total']

# call the results
total_results

111

In [30]:
business_results = results['businesses']

# specify the filename where you want to save the data
json_file_path = JSON_FILE

#  save the business data as JSON file
with open(JSON_FILE, 'w') as file:
  json.dump(business_results, file, indent = 4)

In [31]:
# how many details did we get?
results_per_page = len(business_results)
print(f'number of results retrieved per page', results_per_page)

number of results retrieved per page 20


In [32]:
# use the math.ceil to round up the total number of pages
n_pages = math.ceil(total_results/results_per_page)
print(f'total number of pages: {n_pages}')

total number of pages: 6


In [33]:
# create a loop to extract the data
for i in tqdm_notebook(range(1, total_results + 1)):
  try:
    time.sleep(0.2) # short delay to respect API rate limits

    # load existing results to append new data
    with open(JSON_FILE, 'r') as file:
      previous_results = json.load(file)

    # Fetch new results
    new_results = yelp_api.search_query(location = LOCATION,
                                        term = TERM,
                                        offset = len(previous_results))

    # append and save the updated results
    updated_results = previous_results + new_results['businesses']
    with open(JSON_FILE, 'w') as file:
      json.dump(updated_results, file)

  except Exception as e:
    if 'Too Many Requests for url' in str(e):
      print('Rate limit exceede. Stopping data collection')
      break # exit the loop if the rate limit is exceeded

    else:
      print(f'An error occured: {e}')
      continue # continue to the next iteration in case of other errors

  0%|          | 0/111 [00:00<?, ?it/s]

# Open the final JSON file with Pandas


In [34]:
# load the final JSON file
df = pd.read_json(JSON_FILE)

# display the first 5 rows of the data
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,2jXS4oZkMhAONtd2j7L5Yg,chef-21-sushi-burger-and-korean-bbq-greenville-3,Chef 21 Sushi Burger & Korean BBQ,https://s3-media4.fl.yelpcdn.com/bphoto/DbV4BU...,False,https://www.yelp.com/biz/chef-21-sushi-burger-...,38,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.5,"{'latitude': 34.847671, 'longitude': -82.394229}","[pickup, delivery]","{'address1': '500 E McBee Ave', 'address2': 'S...",18642633018,(864) 263-3018,3341.861901,
1,RGRk1ioORwm_FIX8PM732Q,konnichiwa-greenville,Konnichiwa,https://s3-media3.fl.yelpcdn.com/bphoto/p47H0_...,False,https://www.yelp.com/biz/konnichiwa-greenville...,71,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.1,"{'latitude': 34.845952342825115, 'longitude': ...",[],"{'address1': '101 Falls Park Dr', 'address2': ...",18642524436,(864) 252-4436,4184.255183,
2,zG_XOAFi9Y560WJ1RvghBw,sushi-masa-japanese-restaurant-greenville,Sushi-Masa Japanese Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/zsRavZ...,False,https://www.yelp.com/biz/sushi-masa-japanese-r...,163,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",4.4,"{'latitude': 34.8512725830078, 'longitude': -8...",[delivery],"{'address1': '8590 Pelham Rd', 'address2': 'St...",18642882227,(864) 288-2227,11481.830881,$$
3,7cJxOV-ANX1qLThK3yV96w,otto-izakaya-greenville-4,Otto Izakaya,https://s3-media1.fl.yelpcdn.com/bphoto/TdPhFy...,False,https://www.yelp.com/biz/otto-izakaya-greenvil...,449,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.2,"{'latitude': 34.8228218820722, 'longitude': -8...",[delivery],"{'address1': '15 Market Point Dr', 'address2':...",18645688009,(864) 568-8009,5933.485357,$$
4,Kx1x7Kf6C2gtogQErWSu0A,o-ku-greenville,O-Ku,https://s3-media2.fl.yelpcdn.com/bphoto/7dR0xy...,False,https://www.yelp.com/biz/o-ku-greenville?adjus...,45,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 34.847954222223294, 'longitude': ...",[],"{'address1': '30 W Broad St', 'address2': None...",18643264812,(864) 326-4812,3931.009612,


# Save the file in the directory

In [35]:
# specify the directory
directory = 'Data'
filename = 'final_results_SC_sushi.csv.gz' # include the .csv.gz extension
path = os.path.join(directory, filename)

# ensure that the 'Data' directory exists
os.makedirs(directory, exist_ok=True)

# save the dataframe as a compressed csv file (to save space)
df.to_csv(path, compression='gzip', index=False)

In [36]:
# save as JSON file
json_file = 'Data/final_results_SC_sushi.json'

# sav