In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time

# to make yelpapi calls
from yelpapi import YelpAPI

# progress bar from tqdm_notebook
from tqdm.notebook import tqdm_notebook

In [2]:
!pip install yelpapi
!pip install tqdm



# Load Credentials and Create Yelp API Object

In [3]:
# Load API Credentials
with open('/Users/huytran_1/.secret/yelp_api.json', 'r') as f:  
    login = json.load(f)

In [4]:
# Instantiate YelpAPI Variable
yelp = YelpAPI(login['api-key'], timeout_s=5.0)

# Define Search Terms and File Paths

In [5]:
# set our API call parameters and filename before the first call
location = 'Lafayette, LA 70508'
term = 'seafood'

In [6]:
location.split(',')[0]

'Lafayette'

In [7]:
## Specify folder for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"

In [8]:
JSON_FILE

'Data/Lafayette-seafood.json'

# Check if Json File exists and Create it if it doesn't

In [9]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON folder name is not empty:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE, 'w') as f:
          json.dump([], f)
        
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Lafayette-seafood.json not found. Saving empty list to file.


In [10]:
os.path.isfile(JSON_FILE)

True

# Load JSON FIle and account for previous results

# Make the first API call to get the first page of data

In [11]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp.search_query(term = term, location = location)

In [12]:
type(results)

dict

In [13]:
len(results)

3

In [14]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [15]:
results['total']

222

In [16]:
results['region']

{'center': {'longitude': -92.03384399414062, 'latitude': 30.158149285486516}}

In [17]:
results['businesses']

[{'id': 'l2X390e5Reoxid5v8jOvvQ',
  'alias': 'the-cajun-table-lafayette',
  'name': 'The Cajun Table',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/DgnTYQ3aADIcqgJ62FNkow/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/the-cajun-table-lafayette?adjust_creative=GAXTYfb4qtwMRsCd0avxFw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=GAXTYfb4qtwMRsCd0avxFw',
  'review_count': 158,
  'categories': [{'alias': 'cajun', 'title': 'Cajun/Creole'}],
  'rating': 4.5,
  'coordinates': {'latitude': 30.1542038, 'longitude': -92.0485576},
  'transactions': ['delivery'],
  'price': '$$',
  'location': {'address1': '4510 Ambassador Caffery',
   'address2': 'Ste D',
   'address3': None,
   'city': 'Lafayette',
   'zip_code': '70508',
   'country': 'US',
   'state': 'LA',
   'display_address': ['4510 Ambassador Caffery',
    'Ste D',
    'Lafayette, LA 70508']},
  'phone': '+13378069565',
  'display_phone': '(337) 806-9565',
  'distance': 1481.115837374508

In [18]:
## How many results total?
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,l2X390e5Reoxid5v8jOvvQ,the-cajun-table-lafayette,The Cajun Table,https://s3-media3.fl.yelpcdn.com/bphoto/DgnTYQ...,False,https://www.yelp.com/biz/the-cajun-table-lafay...,158,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}]",4.5,"{'latitude': 30.1542038, 'longitude': -92.0485...",[delivery],$$,"{'address1': '4510 Ambassador Caffery', 'addre...",13378069565,(337) 806-9565,1481.115837
1,_eUkcZsTcwpgQ4g3NtJ8sg,bon-temps-grill-lafayette,Bon Temps Grill,https://s3-media4.fl.yelpcdn.com/bphoto/vgoqns...,False,https://www.yelp.com/biz/bon-temps-grill-lafay...,952,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.5,"{'latitude': 30.2024736, 'longitude': -92.0140...","[delivery, pickup]",$$,"{'address1': '1211 W Pinhook Rd', 'address2': ...",13377068850,(337) 706-8850,5283.372659
2,uoE_f470PCOozaATDyRpjA,dons-seafood-lafayette-lafayette,Don's Seafood - Lafayette,https://s3-media2.fl.yelpcdn.com/bphoto/DfnwRx...,False,https://www.yelp.com/biz/dons-seafood-lafayett...,348,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 30.19525, 'longitude': -92.05694}","[delivery, pickup]",$$,"{'address1': '4309 Johnston St', 'address2': '...",13379811141,(337) 981-1141,4684.858755
3,3PZX6hODVlvbaSKAO1j4Ww,the-crawfish-spot-lafayette,The Crawfish Spot,https://s3-media3.fl.yelpcdn.com/bphoto/a5KYi8...,False,https://www.yelp.com/biz/the-crawfish-spot-laf...,120,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.5,"{'latitude': 30.1899, 'longitude': -92.01352}","[delivery, pickup]",$$,"{'address1': '1899 W Pinhook Rd', 'address2': ...",13372378533,(337) 237-8533,4034.645168
4,79Jr4VhRUe_k687DqEGasA,mandezs-seafood-bar-and-grill-lafayette,Mandez's Seafood Bar & Grill,https://s3-media3.fl.yelpcdn.com/bphoto/Hu_t6e...,False,https://www.yelp.com/biz/mandezs-seafood-bar-a...,79,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.5,"{'latitude': 30.2021924927875, 'longitude': -9...","[delivery, pickup]",$$,"{'address1': '110 Doucet Rd', 'address2': '', ...",13377693917,(337) 769-3917,5081.836529
5,oe356jU3hGaw2caDZcD-aQ,half-shell-oyster-house-lafayette,Half Shell Oyster House,https://s3-media4.fl.yelpcdn.com/bphoto/q3wNWJ...,False,https://www.yelp.com/biz/half-shell-oyster-hou...,135,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.0,"{'latitude': 30.1539250703709, 'longitude': -9...",[delivery],$$,"{'address1': '109 Old Camp Rd', 'address2': 'S...",13374841120,(337) 484-1120,1030.500193
6,n0RkGJQTMCcdIy-AKk03VQ,uncle-ts-oyster-bar-scott,Uncle T's Oyster Bar,https://s3-media1.fl.yelpcdn.com/bphoto/gA0o_2...,False,https://www.yelp.com/biz/uncle-ts-oyster-bar-s...,221,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.5,"{'latitude': 30.235258, 'longitude': -92.09492}",[delivery],$$,"{'address1': '1001 St Mary St', 'address2': ''...",13375042285,(337) 504-2285,10390.825149
7,eRdYDjaZcOjpPVF1sAR1cQ,louisiana-crawfish-time-lafayette,Louisiana Crawfish Time,https://s3-media2.fl.yelpcdn.com/bphoto/11SSJL...,False,https://www.yelp.com/biz/louisiana-crawfish-ti...,75,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.5,"{'latitude': 30.156499764300143, 'longitude': ...",[delivery],$$,"{'address1': '2019 Verot School Rd', 'address2...",13379882645,(337) 988-2645,982.927653
8,xUmy6KQQluX7enVlSgEEow,prejeans-lafayette-3,Prejean's,https://s3-media2.fl.yelpcdn.com/bphoto/GMMw6X...,False,https://www.yelp.com/biz/prejeans-lafayette-3?...,723,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.0,"{'latitude': 30.306143044990996, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '3480 NE Evangeline Trwy', 'addre...",13378963247,(337) 896-3247,16465.93879
9,yjbNUqfgeJbkVZzk43frtA,fiery-crab-lafayette,Fiery Crab,https://s3-media2.fl.yelpcdn.com/bphoto/_KjOLP...,False,https://www.yelp.com/biz/fiery-crab-lafayette?...,88,"[{'alias': 'seafood', 'title': 'Seafood'}]",2.5,"{'latitude': 30.160371120892343, 'longitude': ...","[delivery, pickup]",,"{'address1': '2330 Kaliste Saloom Rd', 'addres...",13375348118,(337) 534-8118,1109.705111


- Where is the actual data we want to save?

In [19]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

- Calculate how many pages of results needed to cover the total_results

In [20]:
(results['total'])/ results_per_page

11.1

In [21]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total'])/ results_per_page)
n_pages

12

In [22]:
for i in tqdm_notebook(range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try:
        
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = yelp.search_query(location = location, term = term,
                                   offset = n_results+1)

        ## append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)

            
    ## What to do if we get an error/exception.
    except Exception as e:
        print(' [!] ERROR', e)

  0%|          | 0/12 [00:00<?, ?it/s]

# Open the Final JSON File with Pandas

In [23]:
df = pd.read_json(JSON_FILE)

In [24]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,_eUkcZsTcwpgQ4g3NtJ8sg,bon-temps-grill-lafayette,Bon Temps Grill,https://s3-media4.fl.yelpcdn.com/bphoto/vgoqns...,False,https://www.yelp.com/biz/bon-temps-grill-lafay...,952,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.5,"{'latitude': 30.2024736, 'longitude': -92.0140...","[delivery, pickup]",$$,"{'address1': '1211 W Pinhook Rd', 'address2': ...",13377068850,(337) 706-8850,5283.372659
1,uoE_f470PCOozaATDyRpjA,dons-seafood-lafayette-lafayette,Don's Seafood - Lafayette,https://s3-media2.fl.yelpcdn.com/bphoto/DfnwRx...,False,https://www.yelp.com/biz/dons-seafood-lafayett...,348,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 30.19525, 'longitude': -92.05694}","[delivery, pickup]",$$,"{'address1': '4309 Johnston St', 'address2': '...",13379811141,(337) 981-1141,4684.858755
2,3PZX6hODVlvbaSKAO1j4Ww,the-crawfish-spot-lafayette,The Crawfish Spot,https://s3-media3.fl.yelpcdn.com/bphoto/a5KYi8...,False,https://www.yelp.com/biz/the-crawfish-spot-laf...,120,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.5,"{'latitude': 30.1899, 'longitude': -92.01352}","[delivery, pickup]",$$,"{'address1': '1899 W Pinhook Rd', 'address2': ...",13372378533,(337) 237-8533,4034.645168
3,79Jr4VhRUe_k687DqEGasA,mandezs-seafood-bar-and-grill-lafayette,Mandez's Seafood Bar & Grill,https://s3-media3.fl.yelpcdn.com/bphoto/Hu_t6e...,False,https://www.yelp.com/biz/mandezs-seafood-bar-a...,79,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.5,"{'latitude': 30.2021924927875, 'longitude': -9...","[delivery, pickup]",$$,"{'address1': '110 Doucet Rd', 'address2': '', ...",13377693917,(337) 769-3917,5081.836529
4,oe356jU3hGaw2caDZcD-aQ,half-shell-oyster-house-lafayette,Half Shell Oyster House,https://s3-media4.fl.yelpcdn.com/bphoto/q3wNWJ...,False,https://www.yelp.com/biz/half-shell-oyster-hou...,135,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.0,"{'latitude': 30.1539250703709, 'longitude': -9...",[delivery],$$,"{'address1': '109 Old Camp Rd', 'address2': 'S...",13374841120,(337) 484-1120,1030.500193


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 221 entries, 0 to 220
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             221 non-null    object 
 1   alias          221 non-null    object 
 2   name           221 non-null    object 
 3   image_url      221 non-null    object 
 4   is_closed      221 non-null    bool   
 5   url            221 non-null    object 
 6   review_count   221 non-null    int64  
 7   categories     221 non-null    object 
 8   rating         221 non-null    float64
 9   coordinates    221 non-null    object 
 10  transactions   221 non-null    object 
 11  price          140 non-null    object 
 12  location       221 non-null    object 
 13  phone          221 non-null    object 
 14  display_phone  221 non-null    object 
 15  distance       221 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 26.2+ KB


# Check for duplicates

In [26]:
# check for duplicate ID's 
df.duplicated(subset='id').sum()

0

In [27]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Lafayette-seafood.csv.gz'

In [28]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)

# Compare filesize with os module's os.path.getsize

In [29]:
size_json = os.path.getsize(JSON_FILE)
size_csv_gz = os.path.getsize(JSON_FILE.replace('.json','.csv.gz'))

print(f'JSON FILE: {size_json:,} Bytes')
print(f'CSV.GZ FILE: {size_csv_gz:,} Bytes')

print(f'the csv.gz is {size_json/size_csv_gz} times smaller!')

JSON FILE: 216,656 Bytes
CSV.GZ FILE: 31,521 Bytes
the csv.gz is 6.873385996637163 times smaller!
