In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
with open("/Users/carlo/.secret/yelp_api.json", 'r') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
yelp = YelpAPI(login['api-key'], timeout_s=5.0)

In [4]:
location = 'Los Angeles, CA 90027'
term = 'italian'

In [6]:
location.split(',')[0]

'Los Angeles'

In [9]:
FOLDER = 'Data/'
os.makedirs(FOLDER, exist_ok=True)

JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"
JSON_FILE

'Data/Los Angeles-italian.json'

In [11]:
file_exists = os.path.isfile(JSON_FILE)
if file_exists == False:
    folder = os.path.dirname(JSON_FILE)
    if len(folder)>0:
        os.makedirs(folder, exist_ok=True)
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    with open(JSON_FILE, 'w') as f:
        json.dump([], f)
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Los Angeles-italian.json not found. Saving empty list to file.


In [13]:
results = yelp.search_query(term=term, location=location)
type(results)

dict

In [14]:
len(results)

3

In [15]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [16]:
results['total']

1100

In [17]:
results['region']

{'center': {'longitude': -118.28910827636719, 'latitude': 34.12827883313062}}

In [18]:
results['businesses']

[{'id': '-KqNRM-drDM7Nu9i9xEnfA',
  'alias': 'la-pergoletta-los-angeles-4',
  'name': 'La Pergoletta',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/G1XjeBOpUByZsPHFFiNFgQ/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/la-pergoletta-los-angeles-4?adjust_creative=mXw-LHOZWkYoDBfSh2oMDQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=mXw-LHOZWkYoDBfSh2oMDQ',
  'review_count': 1346,
  'categories': [{'alias': 'italian', 'title': 'Italian'}],
  'rating': 4.5,
  'coordinates': {'latitude': 34.1037812, 'longitude': -118.2872479},
  'transactions': ['restaurant_reservation', 'pickup', 'delivery'],
  'price': '$$',
  'location': {'address1': '1802 Hillhurst Ave',
   'address2': None,
   'address3': '',
   'city': 'Los Angeles',
   'zip_code': '90027',
   'country': 'US',
   'state': 'CA',
   'display_address': ['1802 Hillhurst Ave', 'Los Angeles, CA 90027']},
  'phone': '+13236648259',
  'display_phone': '(323) 664-8259',
  'distance': 2730.591

In [19]:
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,-KqNRM-drDM7Nu9i9xEnfA,la-pergoletta-los-angeles-4,La Pergoletta,https://s3-media3.fl.yelpcdn.com/bphoto/G1XjeB...,False,https://www.yelp.com/biz/la-pergoletta-los-ang...,1346,"[{'alias': 'italian', 'title': 'Italian'}]",4.5,"{'latitude': 34.1037812, 'longitude': -118.287...","[restaurant_reservation, pickup, delivery]",$$,"{'address1': '1802 Hillhurst Ave', 'address2':...",13236648259.0,(323) 664-8259,2730.59101
1,IMvn6wIq4YvJ9Mk5rqdX8g,spina-los-angeles,Spina,https://s3-media2.fl.yelpcdn.com/bphoto/GO39QC...,False,https://www.yelp.com/biz/spina-los-angeles?adj...,13,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 34.11792, 'longitude': -118.26131}",[],,"{'address1': '3193 Glendale Blvd', 'address2':...",,,2803.730389
2,WylKsZv1qKCDqrHd-joiOA,michelangelo-ristorante-los-angeles-2,Michelangelo Ristorante,https://s3-media2.fl.yelpcdn.com/bphoto/ZRopJJ...,False,https://www.yelp.com/biz/michelangelo-ristoran...,961,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 34.1069829190952, 'longitude': -1...","[pickup, delivery]",$$,"{'address1': '2742 Rowena Ave', 'address2': ''...",13236604843.0,(323) 660-4843,3333.343328
3,ArwvKXMv027t4boQiSqvhQ,dal-milanese-los-angeles,Dal Milanese,https://s3-media2.fl.yelpcdn.com/bphoto/OZcLjI...,False,https://www.yelp.com/biz/dal-milanese-los-ange...,98,"[{'alias': 'italian', 'title': 'Italian'}]",4.5,"{'latitude': 34.109437, 'longitude': -118.287155}","[pickup, delivery]",$$$,"{'address1': '2064 Hillhurst Ave', 'address2':...",13233726002.0,(323) 372-6002,2108.104111
4,DU3r_rd4wKXxQfAaqHLRYg,farfalla-trattoria-los-angeles,Farfalla Trattoria,https://s3-media1.fl.yelpcdn.com/bphoto/MO0LHt...,False,https://www.yelp.com/biz/farfalla-trattoria-lo...,756,"[{'alias': 'italian', 'title': 'Italian'}]",4.0,"{'latitude': 34.1074686, 'longitude': -118.287...","[pickup, delivery]",$$,"{'address1': '1978 Hillhurst Ave', 'address2':...",13236617365.0,(323) 661-7365,2310.600183
5,WPe9r4_b_ByxM-c3OhbDew,il-capriccio-on-vermont-los-angeles,Il Capriccio on Vermont,https://s3-media2.fl.yelpcdn.com/bphoto/lXWn8R...,False,https://www.yelp.com/biz/il-capriccio-on-vermo...,1076,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.0,"{'latitude': 34.103042, 'longitude': -118.291857}","[restaurant_reservation, pickup, delivery]",$$,"{'address1': '1757 N Vermont Ave', 'address2':...",13236625900.0,(323) 662-5900,2817.590529
6,T96uMQQ3xEnT0wmWxCfwww,la-bella-pinseria-romana-glendale,La Bella Pinseria Romana,https://s3-media3.fl.yelpcdn.com/bphoto/c38Rne...,False,https://www.yelp.com/biz/la-bella-pinseria-rom...,362,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 34.1503167, 'longitude': -118.258...","[pickup, delivery]",$$,"{'address1': '309 N Central Ave', 'address2': ...",18186468599.0,(818) 646-8599,3780.399178
7,XN31yqCpt4_aOJDoceuCvw,allacqua-los-angeles-2,All'Acqua,https://s3-media3.fl.yelpcdn.com/bphoto/6ukUGH...,False,https://www.yelp.com/biz/allacqua-los-angeles-...,707,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.0,"{'latitude': 34.1184299, 'longitude': -118.25971}","[pickup, delivery]",$$,"{'address1': '3280 Glendale Blvd', 'address2':...",13236633280.0,(323) 663-3280,2915.574007
8,88kri8FhXy8b3DQ_QjSMmQ,l-antica-pizzeria-da-michele-los-angeles-2,L'Antica Pizzeria Da Michele,https://s3-media2.fl.yelpcdn.com/bphoto/SCuuHj...,False,https://www.yelp.com/biz/l-antica-pizzeria-da-...,2111,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 34.09906082844746, 'longitude': -...","[pickup, delivery]",$$$,"{'address1': '1534 N Mccadden Pl', 'address2':...",13233662408.0,(323) 366-2408,5499.656056
9,D3VUJE1Ye4qFWiypiavtOw,mas-italian-kitchen-burbank-2,MA'S Italian Kitchen,https://s3-media2.fl.yelpcdn.com/bphoto/dK5n3_...,False,https://www.yelp.com/biz/mas-italian-kitchen-b...,1284,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 34.1548299, 'longitude': -118.34559}","[pickup, delivery]",$$,"{'address1': '267 North Pass Ave', 'address2':...",18185672288.0,(818) 567-2288,5992.624151


In [20]:
results_per_page = len(results['businesses'])
results_per_page

20

In [21]:
(results['total'])/results_per_page

55.0

In [22]:
n_pages = math.ceil((results['total'])/results_per_page)
n_pages

55

In [24]:
for i in tqdm_notebook(range(1,n_pages+1)):
    try:
        time.sleep(.2)
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        n_results = len(previous_results)
        results = yelp.search_query(location=location, term=term, offset=n_results+1)
        previous_results.extend(results['businesses'])
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)
    except Exception as e:
        print(' [!] ERROR', e)

  0%|          | 0/55 [00:00<?, ?it/s]

 [!] ERROR 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Los+Angeles%2C+CA+90027&term=italian&offset=981
 [!] ERROR 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Los+Angeles%2C+CA+90027&term=italian&offset=981
 [!] ERROR 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Los+Angeles%2C+CA+90027&term=italian&offset=981
 [!] ERROR 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Los+Angeles%2C+CA+90027&term=italian&offset=981
 [!] ERROR 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Los+Angeles%2C+CA+90027&term=italian&offset=981
 [!] ERROR 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Los+Angeles%2C+CA+90027&term=italian&offset=981
 [!] ERROR 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Los+A

In [28]:
df = pd.read_json(JSON_FILE)
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 980 entries, 0 to 979
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             980 non-null    object 
 1   alias          980 non-null    object 
 2   name           980 non-null    object 
 3   image_url      980 non-null    object 
 4   is_closed      980 non-null    bool   
 5   url            980 non-null    object 
 6   review_count   980 non-null    int64  
 7   categories     980 non-null    object 
 8   rating         980 non-null    float64
 9   coordinates    980 non-null    object 
 10  transactions   980 non-null    object 
 11  location       980 non-null    object 
 12  phone          980 non-null    object 
 13  display_phone  980 non-null    object 
 14  distance       980 non-null    float64
 15  price          816 non-null    object 
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 115.9+ KB


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,IMvn6wIq4YvJ9Mk5rqdX8g,spina-los-angeles,Spina,https://s3-media2.fl.yelpcdn.com/bphoto/GO39QC...,False,https://www.yelp.com/biz/spina-los-angeles?adj...,13,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 34.11792, 'longitude': -118.26131}",[],"{'address1': '3193 Glendale Blvd', 'address2':...",,,2803.730389,
1,WylKsZv1qKCDqrHd-joiOA,michelangelo-ristorante-los-angeles-2,Michelangelo Ristorante,https://s3-media2.fl.yelpcdn.com/bphoto/ZRopJJ...,False,https://www.yelp.com/biz/michelangelo-ristoran...,961,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 34.1069829190952, 'longitude': -1...","[pickup, delivery]","{'address1': '2742 Rowena Ave', 'address2': ''...",13236604843.0,(323) 660-4843,3333.343328,$$
2,ArwvKXMv027t4boQiSqvhQ,dal-milanese-los-angeles,Dal Milanese,https://s3-media2.fl.yelpcdn.com/bphoto/OZcLjI...,False,https://www.yelp.com/biz/dal-milanese-los-ange...,98,"[{'alias': 'italian', 'title': 'Italian'}]",4.5,"{'latitude': 34.109437, 'longitude': -118.287155}","[pickup, delivery]","{'address1': '2064 Hillhurst Ave', 'address2':...",13233726002.0,(323) 372-6002,2108.104111,$$$
3,DU3r_rd4wKXxQfAaqHLRYg,farfalla-trattoria-los-angeles,Farfalla Trattoria,https://s3-media1.fl.yelpcdn.com/bphoto/MO0LHt...,False,https://www.yelp.com/biz/farfalla-trattoria-lo...,756,"[{'alias': 'italian', 'title': 'Italian'}]",4.0,"{'latitude': 34.1074686, 'longitude': -118.287...","[pickup, delivery]","{'address1': '1978 Hillhurst Ave', 'address2':...",13236617365.0,(323) 661-7365,2310.600183,$$
4,WPe9r4_b_ByxM-c3OhbDew,il-capriccio-on-vermont-los-angeles,Il Capriccio on Vermont,https://s3-media2.fl.yelpcdn.com/bphoto/lXWn8R...,False,https://www.yelp.com/biz/il-capriccio-on-vermo...,1076,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.0,"{'latitude': 34.103042, 'longitude': -118.291857}","[restaurant_reservation, pickup, delivery]","{'address1': '1757 N Vermont Ave', 'address2':...",13236625900.0,(323) 662-5900,2817.590529,$$


In [30]:
csv_file = JSON_FILE.replace('.json', '.csv.gz')
csv_file

'Data/Los Angeles-italian.csv.gz'

In [32]:
df.to_csv(csv_file, compression='gzip', index=False)