# Yelp API Webscraping

In [12]:
import pandas as pd 
import json
import requests

# Compiled Code
- Don't forget to comment so we can understand!

## known issues:
- function populates dataframe with NaN rows
- do we want business name?
- businesses have multiple categories and function only picks up one at a time
    - currently the 'alias' portion picks up the alias of the business (name), not the category alias

In [10]:
# NOTHING IN THIS CELL NEEDS TO GET CHANGED 
# IF YOU HAVE YOUR API KEY IN 'creds.json' IN ./Assets

# format your json file as a dictionary containing api key with DOUBLE QUOTES
# {"api": "your_super_long_api_key"}
creds_file = open('../Assets/creds.json')

# load credentials into variable
yelp_credentials = json.loads(creds_file.read())
api_key = yelp_credentials['api']
headers = {'Authorization': 'Bearer %s' % api_key}

# this is the url we use to make broad business searches
# https://www.yelp.com/developers/documentation/v3/business_search

url = 'https://api.yelp.com/v3/businesses/search'

In [11]:
# depending on what we want to search we can change values in this dictionary
params = {'term':'food', 'location': 'Los Angeles'}

In [13]:
# ONLY RUN THIS CELL IF YOU WANT TO MAKE A REQUEST
req = requests.get(url, params=params, headers=headers)
print(f'Status Code: {req.status_code}')

Status Code: 200


In [19]:
yelp = json.loads(req.text)

In [60]:
businesses_object = yelp['businesses']
businesses_object[15]

# id
# coordinates
# price
# review_count
# rating
# location > zip_code
# location > city
# categories > alias

{'id': 'ptxjMkVBFfGxOiSEGGNVZg',
 'alias': 'nadri-korean-tea-house-los-angeles',
 'name': 'Nadri Korean Tea House',
 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/-uioZ1kDD0aOgSI40Mi60Q/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/nadri-korean-tea-house-los-angeles?adjust_creative=hJX25nxjhHdw1ByNun2Ocw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=hJX25nxjhHdw1ByNun2Ocw',
 'review_count': 9,
 'categories': [{'alias': 'desserts', 'title': 'Desserts'},
  {'alias': 'korean', 'title': 'Korean'},
  {'alias': 'coffee', 'title': 'Coffee & Tea'}],
 'rating': 5.0,
 'coordinates': {'latitude': 34.06369, 'longitude': -118.31087},
 'transactions': [],
 'location': {'address1': '4011 W 6th St',
  'address2': 'Ste 100',
  'address3': None,
  'city': 'Los Angeles',
  'zip_code': '90020',
  'country': 'US',
  'state': 'CA',
  'display_address': ['4011 W 6th St', 'Ste 100', 'Los Angeles, CA 90020']},
 'phone': '',
 'display_phone': '',
 'distance': 993

In [58]:
# stole this function from project 3 - jerry

def get_price(yelp_object):
    
    # empty list container containing dictionaries representing each unique business
    businesses = []
    businesses_object = yelp_object['businesses']
    
    # looking at each subreddit which is passed into our function
    for i, business in enumerate(list(businesses_object)):
        
        # each dictionary will contain all of the desired information from each post
        business_dict = {}

        if 'price' in businesses_object[i].keys():
            business_dict['id']           = business['id']
            business_dict['latitude']     = business['coordinates']['latitude']
            business_dict['longitude']    = business['coordinates']['longitude']
            business_dict['price']        = business['price']
            business_dict['review_count'] = business['review_count']
            business_dict['rating']       = business['rating']
            business_dict['zip_code']     = business['location']['zip_code']
            business_dict['city']         = business['location']['city']
            business_dict['alias']        = business['alias']

        # populate the posts list with each post dictionary
        businesses.append(business_dict)

        # just a little sanity check to see how far along our function is going
        print(f'{i+1} out of {len(businesses_object)}')
    
    businesses = pd.DataFrame(businesses)
    
    return businesses

In [59]:
get_price(yelp)

1 out of 20
2 out of 20
3 out of 20
4 out of 20
5 out of 20
6 out of 20
7 out of 20
8 out of 20
9 out of 20
10 out of 20
11 out of 20
12 out of 20
13 out of 20
14 out of 20
15 out of 20
16 out of 20
17 out of 20
18 out of 20
19 out of 20
20 out of 20


Unnamed: 0,id,latitude,longitude,price,review_count,rating,zip_code,city,alias
0,pjh40JY5YwWeV8aKhkXERg,34.06577,-118.30847,$,702.0,4.5,90020.0,Los Angeles,myungrang-hot-dog-california-market-la-los-ang...
1,6QeZEUhFk1_vZhWvktKnmA,34.094545,-118.338175,$$,2017.0,4.0,90028.0,Los Angeles,fat-sals-deli-hollywood-los-angeles
2,,,,,,,,,
3,kF_lSWagvBreXwEpCMdHhQ,34.050629,-118.248635,$$,2502.0,4.0,90013.0,Los Angeles,grand-central-market-los-angeles
4,DrToq9357afdpOyO5w0Y5w,34.064009,-118.300701,$$,1961.0,4.5,90020.0,Los Angeles,yup-dduk-la-los-angeles
5,CcqraT0cuGKYEcZ1ri_kxg,34.040403,-118.253512,$$,689.0,5.0,90015.0,Los Angeles,broken-mouth-lees-homestyle-los-angeles-5
6,omaJSGvnj2vaIJ_MBxeyBw,34.032251,-118.334644,$,96.0,4.5,90018.0,Los Angeles,l-a-birria-los-angeles
7,ohosmz6FXVAeoW5nUkYwng,34.090596,-118.277172,$$,2151.0,4.5,90026.0,Los Angeles,pine-and-crane-los-angeles
8,PQGsaaJ9YVL_Sgiy7aB70Q,34.06344,-118.29831,$,223.0,4.0,90020.0,Los Angeles,street-food-of-seoul-los-angeles
9,fxeuGYnoRWwm5aGDg1FRJA,34.050023,-118.239935,$$,3558.0,4.5,90012.0,Los Angeles,marugame-monzo-los-angeles-2


## aerika section

In [1]:
hi

NameError: name 'hi' is not defined

In [2]:
my

NameError: name 'my' is not defined

In [3]:
name

NameError: name 'name' is not defined

## echo section

added comment

In [None]:
added cell

## jerry section

In [None]:
# ds;alkfjds;lfkas;dghas'dlfjas;dlkfjas'dfj

In [1]:
# this is another change a;sdfja;dfkasd;fajsd;fasdkfjas;fwef

In [2]:
# yet another change ;asdlfkjas;dknva;djaweoifsd;laknfas;dfknasd

In [3]:
x = np.exp(52)
x

3.831008000716577e+22