In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os, json, math, time

from tqdm.notebook import tqdm_notebook

In [2]:
#Load in API credentials 

import json
with open ('/Users/bandj/.secret/yelp_api.json') as f: 
    login = json.load(f)

login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
from yelpapi import YelpAPI
yelp = YelpAPI(login['api-key'], timeout_s = 5.0)
yelp

<yelpapi.yelpapi.YelpAPI at 0x21751534e20>

In [4]:
location = 'Austin, TX 78652'
term = 'sushi'

In [5]:
location.split(',')[0]

'Austin'

In [6]:
#Specify folder for saving the data
folder = 'Data/'

os.makedirs(folder, exist_ok=True)
json_file = folder+f"{location.split(',')[0]}-{term}.json"

In [7]:
json_file

'Data/Austin-sushi.json'

In [8]:
#Check if JSON file exists, create it if it doesnt
file_exists = os.path.isfile(json_file)

#if file doesn't exist
if file_exists == False:
    folder = os.path.dirname(json_file)
    
    if len(folder) > 0:
        os.makedirs(folder, exist_ok = True)
        
#Inform user and save in empty list

    print(f'[i]{json_file} not found. Saving empty list to file.')
    
    with open (json_file, 'w') as f:
        json.dump([],f)
        
else:
    print(f'[i] {json_file} already exists.')

[i] Data/Austin-sushi.json already exists.


In [9]:
#Make first API call to get the first page of data
results = yelp.search_query(term=term, location = location)
type(results)

dict

In [10]:
len(results)

3

In [11]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [12]:
results['total']

193

In [13]:
results['region']

{'center': {'longitude': -97.87651062011719, 'latitude': 30.133210656698505}}

In [14]:
results['businesses']

[{'id': 'de4NckKM0aX-sPJ8KrxWjA',
  'alias': 'bluefin-sushi-bar-and-ramen-sunset-valley',
  'name': 'Bluefin Sushi Bar & Ramen',
  'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/mzOyl-JJNkC9V9FZneZ_mg/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/bluefin-sushi-bar-and-ramen-sunset-valley?adjust_creative=M_n_c0bs2T-9fpMzepv-5g&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=M_n_c0bs2T-9fpMzepv-5g',
  'review_count': 178,
  'categories': [{'alias': 'sushi', 'title': 'Sushi Bars'},
   {'alias': 'ramen', 'title': 'Ramen'}],
  'rating': 4.5,
  'coordinates': {'latitude': 30.2307, 'longitude': -97.82042},
  'transactions': ['delivery', 'pickup'],
  'price': '$$',
  'location': {'address1': '5400 Brodie Ln',
   'address2': 'Ste 1200',
   'address3': None,
   'city': 'Sunset Valley',
   'zip_code': '78745',
   'country': 'US',
   'state': 'TX',
   'display_address': ['5400 Brodie Ln',
    'Ste 1200',
    'Sunset Valley, TX 78745']},
  'phone': '+

In [15]:
#how many results total?
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,de4NckKM0aX-sPJ8KrxWjA,bluefin-sushi-bar-and-ramen-sunset-valley,Bluefin Sushi Bar & Ramen,https://s3-media1.fl.yelpcdn.com/bphoto/mzOyl-...,False,https://www.yelp.com/biz/bluefin-sushi-bar-and...,178,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 30.2307, 'longitude': -97.82042}","[delivery, pickup]",$$,"{'address1': '5400 Brodie Ln', 'address2': 'St...",15129531200.0,(512) 953-1200,11756.566905
1,NRPemqVb4qpWFF0Avq_6OQ,eurasia-sushi-bar-and-seafood-austin-2,Eurasia Sushi Bar & Seafood,https://s3-media2.fl.yelpcdn.com/bphoto/u0zypd...,False,https://www.yelp.com/biz/eurasia-sushi-bar-and...,483,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 30.23475765103238, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '7101 W Hwy 71', 'address2': 'Ste...",15123820968.0,(512) 382-0968,11292.666778
2,cWgni4c-EZhsbQBoCwANhw,nanami-sushi-bar-and-grill-austin,Nanami Sushi Bar & Grill,https://s3-media3.fl.yelpcdn.com/bphoto/5KqkN4...,False,https://www.yelp.com/biz/nanami-sushi-bar-and-...,415,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",3.5,"{'latitude': 30.1934934475843, 'longitude': -9...","[delivery, pickup]",$$,"{'address1': '9001 Brodie Ln', 'address2': 'St...",15122924228.0,(512) 292-4228,7439.741259
3,DrfvW9_9pytbYzqRgNTeYw,haiku-japanese-restaurant-austin,Haiku Japanese Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/ReVb1V...,False,https://www.yelp.com/biz/haiku-japanese-restau...,418,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",3.5,"{'latitude': 30.161115, 'longitude': -97.791604}",[delivery],$$,"{'address1': '9600 S Interstate 35', 'address2...",15122915600.0,(512) 291-5600,8733.885038
4,khuHvlcz53RqjzfjrFbvmg,yoshiko-austin,Yoshiko,https://s3-media2.fl.yelpcdn.com/bphoto/XcHHpA...,False,https://www.yelp.com/biz/yoshiko-austin?adjust...,100,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.0,"{'latitude': 30.169597027649644, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '8900 S Congress Ave', 'address2'...",,,9395.047198
5,7IuVOU8gHGmpG9xydhgxqQ,muse-fusion-sushi-austin,MUSE Fusion+Sushi,https://s3-media2.fl.yelpcdn.com/bphoto/E0NLY2...,False,https://www.yelp.com/biz/muse-fusion-sushi-aus...,94,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 30.23445, 'longitude': -97.79379}",[],$$,"{'address1': '4211 S Lamar Blvd', 'address2': ...",15122913637.0,(512) 291-3637,13757.773316
6,vPlAUK-QMfetFvVwm9PGig,dk-sushi-south-and-seoul-korean-restaurant-aus...,DK Sushi South & Seoul Korean Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/whgJwS...,False,https://www.yelp.com/biz/dk-sushi-south-and-se...,329,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 30.20029477492128, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '6400 S 1st St', 'address2': 'Ste...",15123265807.0,(512) 326-5807,11443.329697
7,Du_ky4T3VAuObjwA3IuswA,rose-s-kitchen-austin,Rose’s Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/rV7xkn...,False,https://www.yelp.com/biz/rose-s-kitchen-austin...,30,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",5.0,"{'latitude': 30.15754, 'longitude': -97.83403}","[delivery, pickup]",,"{'address1': '11200 Menchaca Rd', 'address2': ...",15127856615.0,(512) 785-6615,4934.457764
8,vDalkq9fz5XY_fr-FGZQuQ,umi-sushi-bar-and-grill-austin,Umi Sushi Bar & Grill,https://s3-media2.fl.yelpcdn.com/bphoto/hwJINW...,False,https://www.yelp.com/biz/umi-sushi-bar-and-gri...,946,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 30.200633, 'longitude': -97.764297}","[delivery, pickup]",$$,"{'address1': '5510 I-35', 'address2': 'Ste 400...",15123838681.0,(512) 383-8681,13013.788797
9,R9n_wjS-C5nWY_OQt7cKdg,tadashi-bee-cave,Tadashi,https://s3-media1.fl.yelpcdn.com/bphoto/1-F_DF...,False,https://www.yelp.com/biz/tadashi-bee-cave?adju...,299,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",3.5,"{'latitude': 30.3093715923974, 'longitude': -9...","[delivery, pickup]",$$,"{'address1': '12820 Hill Country Blvd', 'addre...",15122633200.0,(512) 263-3200,20545.982385


In [16]:
#Where is the actual data we want to save?
#How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [17]:
#Calculate how many pages of results needed to cover the total_results
n_pages = math.ceil((results['total'])/results_per_page)
n_pages

10

In [18]:
for i in tqdm_notebook(range(1,n_pages+1)):
    #the block of code we need to TRY to run
    try:
        time.sleep(.2)
        
        #Read in results in progress file and check the length
        with open (json_file, 'r') as f:
            previous_results = json.load(f)
            
        #Save number of results for to use as offset
        n_results = len(previous_results)
        
        results = yelp.search_query(location=location, term=term,
                                   offset = n_results+1)
        
        #append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open (json_file, 'w') as f:
            json.dump(previous_results, f)
        
    #What to do if we get an error / exception:
    except Exception as e:
        print(' [!] ERROR', e)

  0%|          | 0/10 [00:00<?, ?it/s]

In [19]:
#Open the final json file with pandas
df = pd.read_json(json_file)
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,NRPemqVb4qpWFF0Avq_6OQ,eurasia-sushi-bar-and-seafood-austin-2,Eurasia Sushi Bar & Seafood,https://s3-media2.fl.yelpcdn.com/bphoto/u0zypd...,False,https://www.yelp.com/biz/eurasia-sushi-bar-and...,483,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 30.23475765103238, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '7101 W Hwy 71', 'address2': 'Ste...",15123820968.0,(512) 382-0968,11292.666778
1,cWgni4c-EZhsbQBoCwANhw,nanami-sushi-bar-and-grill-austin,Nanami Sushi Bar & Grill,https://s3-media3.fl.yelpcdn.com/bphoto/5KqkN4...,False,https://www.yelp.com/biz/nanami-sushi-bar-and-...,415,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",3.5,"{'latitude': 30.1934934475843, 'longitude': -9...","[pickup, delivery]",$$,"{'address1': '9001 Brodie Ln', 'address2': 'St...",15122924228.0,(512) 292-4228,7439.741259
2,DrfvW9_9pytbYzqRgNTeYw,haiku-japanese-restaurant-austin,Haiku Japanese Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/ReVb1V...,False,https://www.yelp.com/biz/haiku-japanese-restau...,418,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",3.5,"{'latitude': 30.161115, 'longitude': -97.791604}",[delivery],$$,"{'address1': '9600 S Interstate 35', 'address2...",15122915600.0,(512) 291-5600,8733.885038
3,vPlAUK-QMfetFvVwm9PGig,dk-sushi-south-and-seoul-korean-restaurant-aus...,DK Sushi South & Seoul Korean Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/whgJwS...,False,https://www.yelp.com/biz/dk-sushi-south-and-se...,329,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 30.20029477492128, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '6400 S 1st St', 'address2': 'Ste...",15123265807.0,(512) 326-5807,11443.329697
4,khuHvlcz53RqjzfjrFbvmg,yoshiko-austin,Yoshiko,https://s3-media2.fl.yelpcdn.com/bphoto/XcHHpA...,False,https://www.yelp.com/biz/yoshiko-austin?adjust...,100,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.0,"{'latitude': 30.169597027649644, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '8900 S Congress Ave', 'address2'...",,,9395.047198


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192 entries, 0 to 191
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             192 non-null    object 
 1   alias          192 non-null    object 
 2   name           192 non-null    object 
 3   image_url      192 non-null    object 
 4   is_closed      192 non-null    bool   
 5   url            192 non-null    object 
 6   review_count   192 non-null    int64  
 7   categories     192 non-null    object 
 8   rating         192 non-null    float64
 9   coordinates    192 non-null    object 
 10  transactions   192 non-null    object 
 11  price          163 non-null    object 
 12  location       192 non-null    object 
 13  phone          192 non-null    object 
 14  display_phone  192 non-null    object 
 15  distance       192 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 22.8+ KB


In [21]:
#convert the filename to a .csv.gz
csv_file = json_file.replace('.json', 'csv.gz')
csv_file

'Data/Austin-sushicsv.gz'

In [22]:
#Save it as a compressed csv to save space
df.to_csv(csv_file, compression='gzip', index=False)

In [24]:
df.duplicated(subset='id').sum()

0