# Yelp Extraction 

## Imports

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
# os - for saving and loading files
# json - to work with json files
# math - to round up results
# time - to add a short pause to not overwhelm the server
import os, json, math, time

# to make yelpapi calls
from yelpapi import YelpAPI

# progress bar from tqdm_notebook
from tqdm.notebook import tqdm_notebook

In [2]:
!pip install yelpapi
!pip install tqdm



## Loading credentials 

In [3]:
import json
with open('/Users/blass/.secret/yelp_api.json') as f: #change the path to match YOUR path!!
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [5]:
# Instantiate YelpAPI Variable
yelp = YelpAPI(login['api-key'], timeout_s = 5.0)

## Define Search

In [6]:
# set our API call parameters and filename before the first call
location = 'Chicago, IL 60064'
term = 'seafood'

In [7]:
location.split(',')[0]

'Chicago'

In [8]:
## Specify folder for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"

In [9]:
JSON_FILE

'Data/Chicago-seafood.json'

## Check if JSON file exist or Create if it does not

In [10]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)

## If it does not exist: 
if file_exists == False:    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON folder name is not empty:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")

    
    ## save the first page of results
    with open(JSON_FILE, 'w') as f:
          json.dump([], f)
        
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Chicago-seafood.json not found. Saving empty list to file.


In [11]:
os.path.isfile(JSON_FILE)


True

## Load JSON FIle and account for results already in filea

In [12]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp.search_query(term = term, location = location)

In [13]:
type(results)

dict

In [14]:
len(results)

3

In [15]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [16]:
results['total']

11

In [17]:
results['region']

{'center': {'longitude': -87.8602409362793, 'latitude': 42.31742885246409}}

In [18]:
results['businesses']

[{'id': 'uqCP-Z9dLEaIVLjRZ4kb7A',
  'alias': 'ostioneria-briza-azul-waukegan',
  'name': 'Ostioneria Briza Azul',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/djekuseCBcPX78ENNnay8g/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/ostioneria-briza-azul-waukegan?adjust_creative=UxOuRlH9oN_SMuVG_RiNhA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=UxOuRlH9oN_SMuVG_RiNhA',
  'review_count': 28,
  'categories': [{'alias': 'seafood', 'title': 'Seafood'}],
  'rating': 3.5,
  'coordinates': {'latitude': 42.350855, 'longitude': -87.852107},
  'transactions': ['delivery', 'pickup'],
  'price': '$$',
  'location': {'address1': '1611 Belvidere Rd',
   'address2': '',
   'address3': '',
   'city': 'Waukegan',
   'zip_code': '60085',
   'country': 'US',
   'state': 'IL',
   'display_address': ['1611 Belvidere Rd', 'Waukegan, IL 60085']},
  'phone': '+18472445944',
  'display_phone': '(847) 244-5944',
  'distance': 3776.47034182254},
 {'id': '5kXCJ5O

In [19]:
## How many results total?
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,uqCP-Z9dLEaIVLjRZ4kb7A,ostioneria-briza-azul-waukegan,Ostioneria Briza Azul,https://s3-media3.fl.yelpcdn.com/bphoto/djekus...,False,https://www.yelp.com/biz/ostioneria-briza-azul...,28,"[{'alias': 'seafood', 'title': 'Seafood'}]",3.5,"{'latitude': 42.350855, 'longitude': -87.852107}","[delivery, pickup]",$$,"{'address1': '1611 Belvidere Rd', 'address2': ...",18472445944,(847) 244-5944,3776.470342
1,5kXCJ5OEutdr7kfeLmgBSQ,eleven19-kitchen-and-cocktails-north-chicago,Eleven19 Kitchen & Cocktails,https://s3-media3.fl.yelpcdn.com/bphoto/hIoYnx...,False,https://www.yelp.com/biz/eleven19-kitchen-and-...,6,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 42.341193853587214, 'longitude': ...","[delivery, pickup]",,"{'address1': '1119 10TH ST', 'address2': None,...",18476724265,(847) 672-4265,2891.625701
2,SBW7e8uLIOLCp3Fq9RCcRQ,da-local-boy-waukegan,Da Local Boy,https://s3-media3.fl.yelpcdn.com/bphoto/xf-uyN...,False,https://www.yelp.com/biz/da-local-boy-waukegan...,18,"[{'alias': 'foodtrucks', 'title': 'Food Trucks...",5.0,"{'latitude': 42.353329559078, 'longitude': -87...",[],,"{'address1': None, 'address2': None, 'address3...",18083593614,(808) 359-3614,4045.471682
3,dz5MxYb75uD4-EBzA0VS2Q,maevery-public-house-lake-bluff,Maevery Public House,https://s3-media3.fl.yelpcdn.com/bphoto/CSluKk...,False,https://www.yelp.com/biz/maevery-public-house-...,235,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.0,"{'latitude': 42.279872, 'longitude': -87.8451719}",[delivery],$$,"{'address1': '20 E Scranton Ave', 'address2': ...",18476043952,(847) 604-3952,4356.148158
4,8hwBKiiP-u3yOKkA2Gk7dQ,taqueria-toluca-waukegan,Taqueria Toluca,https://s3-media2.fl.yelpcdn.com/bphoto/8y50JE...,False,https://www.yelp.com/biz/taqueria-toluca-wauke...,59,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 42.3414349, 'longitude': -87.8416...","[delivery, pickup]",$,"{'address1': '822 10th St', 'address2': '', 'a...",18472447851,(847) 244-7851,3081.812062
5,rVEI0ZlhOSPvnETtvW49mw,los-altos-mexican-restaurant-north-chicago,Los Altos Mexican Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/sBuvI9...,False,https://www.yelp.com/biz/los-altos-mexican-res...,7,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.0,"{'latitude': 42.31968959112019, 'longitude': -...","[delivery, pickup]",,"{'address1': '2500 Martin Luther King Jr Dr', ...",12246373471,(224) 637-3471,314.502271
6,wIKSv-kL1MVQWm1bsO1Ufg,the-silo-lake-bluff,The Silo Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/WFxZkG...,False,https://www.yelp.com/biz/the-silo-lake-bluff?a...,552,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 42.280318, 'longitude': -87.874765}","[delivery, pickup]",$$,"{'address1': '625 Rockland Rd', 'address2': ''...",18472347456,(847) 234-7456,4295.948591
7,R_isoF7061_7MIa1obOlBQ,inovasi-lake-bluff,Inovasi,https://s3-media1.fl.yelpcdn.com/bphoto/gZcZUn...,False,https://www.yelp.com/biz/inovasi-lake-bluff?ad...,282,"[{'alias': 'newamerican', 'title': 'American (...",3.5,"{'latitude': 42.279528, 'longitude': -87.844572}","[delivery, pickup]",$$$,"{'address1': '28 E Center Ave', 'address2': ''...",18472951000,(847) 295-1000,4407.812172
8,5M978_AsNKLPFT3u21AgAA,browns-chicken-waukegan,Brown's Chicken,https://s3-media2.fl.yelpcdn.com/bphoto/ESVMLO...,False,https://www.yelp.com/biz/browns-chicken-waukeg...,40,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",3.5,"{'latitude': 42.349395, 'longitude': -87.879966}",[],$,"{'address1': '3150 Belvedere Rd', 'address2': ...",18476621820,(847) 662-1820,3902.094233
9,nBl3DVObC6RezR2NsH7b4w,hunan-hibachi-buffet-waukegan,Hunan Hibachi Buffet,https://s3-media1.fl.yelpcdn.com/bphoto/r0jPJp...,False,https://www.yelp.com/biz/hunan-hibachi-buffet-...,94,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",2.5,"{'latitude': 42.3462905883789, 'longitude': -8...","[delivery, pickup]",$$,"{'address1': '3900 Northpoint Blvd', 'address2...",18476889999,(847) 688-9999,4512.221315


In [20]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

11

In [21]:
(results['total'])/ results_per_page

1.0

In [22]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total'])/ results_per_page)
n_pages

1

In [23]:
for i in tqdm_notebook(range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try:
        
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = yelp.search_query(location = location, term = term,
                                   offset = n_results+1)

        ## append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)

            
    ## What to do if we get an error/exception.
    except Exception as e:
        print(' [!] ERROR', e)

  0%|          | 0/1 [00:00<?, ?it/s]

## Final Call on JSON Filewith Pandas

In [24]:
df = pd.read_json(JSON_FILE)
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,5kXCJ5OEutdr7kfeLmgBSQ,eleven19-kitchen-and-cocktails-north-chicago,Eleven19 Kitchen & Cocktails,https://s3-media3.fl.yelpcdn.com/bphoto/hIoYnx...,False,https://www.yelp.com/biz/eleven19-kitchen-and-...,6,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.0,"{'latitude': 42.341193853587214, 'longitude': ...","[pickup, delivery]","{'address1': '1119 10TH ST', 'address2': None,...",18476724265,(847) 672-4265,2891.625701,
1,SBW7e8uLIOLCp3Fq9RCcRQ,da-local-boy-waukegan,Da Local Boy,https://s3-media3.fl.yelpcdn.com/bphoto/xf-uyN...,False,https://www.yelp.com/biz/da-local-boy-waukegan...,18,"[{'alias': 'foodtrucks', 'title': 'Food Trucks...",5.0,"{'latitude': 42.353329559078, 'longitude': -87...",[],"{'address1': None, 'address2': None, 'address3...",18083593614,(808) 359-3614,4045.471682,
2,dz5MxYb75uD4-EBzA0VS2Q,maevery-public-house-lake-bluff,Maevery Public House,https://s3-media3.fl.yelpcdn.com/bphoto/CSluKk...,False,https://www.yelp.com/biz/maevery-public-house-...,235,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.0,"{'latitude': 42.279872, 'longitude': -87.8451719}",[delivery],"{'address1': '20 E Scranton Ave', 'address2': ...",18476043952,(847) 604-3952,4356.148158,$$
3,8hwBKiiP-u3yOKkA2Gk7dQ,taqueria-toluca-waukegan,Taqueria Toluca,https://s3-media2.fl.yelpcdn.com/bphoto/8y50JE...,False,https://www.yelp.com/biz/taqueria-toluca-wauke...,59,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 42.3414349, 'longitude': -87.8416...","[pickup, delivery]","{'address1': '822 10th St', 'address2': '', 'a...",18472447851,(847) 244-7851,3081.812062,$
4,rVEI0ZlhOSPvnETtvW49mw,los-altos-mexican-restaurant-north-chicago,Los Altos Mexican Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/sBuvI9...,False,https://www.yelp.com/biz/los-altos-mexican-res...,7,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.0,"{'latitude': 42.31968959112019, 'longitude': -...","[pickup, delivery]","{'address1': '2500 Martin Luther King Jr Dr', ...",12246373471,(224) 637-3471,314.502271,


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             10 non-null     object 
 1   alias          10 non-null     object 
 2   name           10 non-null     object 
 3   image_url      10 non-null     object 
 4   is_closed      10 non-null     bool   
 5   url            10 non-null     object 
 6   review_count   10 non-null     int64  
 7   categories     10 non-null     object 
 8   rating         10 non-null     float64
 9   coordinates    10 non-null     object 
 10  transactions   10 non-null     object 
 11  location       10 non-null     object 
 12  phone          10 non-null     int64  
 13  display_phone  10 non-null     object 
 14  distance       10 non-null     float64
 15  price          6 non-null      object 
dtypes: bool(1), float64(2), int64(2), object(11)
memory usage: 1.3+ KB


## Convert file and save new dataframe

In [26]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Chicago-seafood.csv.gz'

In [27]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)