<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li></ul></div>

# Imports

In [336]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
from YelpAPI import get_my_api_key

%matplotlib inline

In [331]:
api_key = get_my_api_key()

In [None]:
def scrape_urls(url_list):
    restaurant_list = []
    for url in url_list:
        res = requests.get(url= url['url'])
        try:
            res.raise_for_status()
            for item in BeautifulSoup(markup=res.text, features= 'html.parser').body.select('strong [href]'):
                restaurant = item.get_text().replace('&','and')
                restaurant = restaurant.replace("’s",'s').replace("’N",'N').replace("’n",'n')
                restaurant_list.append({'restaurant': restaurant,
                                       'neighborhood': url['location']})
        except Exception:
            print(f'Your request raised an error: {Exception}')
    return restaurant_list

In [None]:
def get_businesses(location, term):
    '''Returns businesses in the specified location that match the specified term'''
    endpoint = 'https://api.yelp.com/v3/businesses/search'
    headers = dict(Authorization = 'Bearer %s' % api_key)
    business_data = []
    
    for city in location:
        for offset in range(0,1000,50):
            parameters = dict(term = term,
                     limit = 50,
                    offset = offset,
                     radius = 40000,
                     location = location)
            response = requests.get(url = endpoint, params = parameters, headers = headers)
            if response.status_code == 200:
                business_data += response.json()['businesses']
            elif response.status_code == 400:
                print('400 Error: Bad Request')
                break
    return business_data

In [None]:
def search_businesses(list_of_businesses):
    '''Takes a dictionary of restaurants names and their locations and searches for them in Yelp'''
    endpoint = 'https://api.yelp.com/v3/businesses/search'
    headers = dict(Authorization = 'Bearer %s' % api_key)
    business_data = []
    unmatched_businesses = []
    data_history = []
    
    for item in list_of_businesses:
        parameters = dict(term = item['restaurant'],
                         limit = 1,
                          radius = 40000,
                         location = item['neighborhood'])
        response = requests.get(url = endpoint, params = parameters, headers = headers)
        
        if response.status_code == 200:
            if response.json()['businesses']:
                business_data += response.json()['businesses']
                data_history.append('Your search {} returned {}'.format(item['restaurant'],response.json()['businesses'][0]['name'] ))
            else:
                print('{} in {} not found in Yelp'.format(item['restaurant'], item['neighborhood']))
                unmatched_businesses.append({'restaurant': item['restaurant'], 'neighborhood': item['neighborhood']})
                continue
        elif response.status_code == 400:
            print('400 Error: Bad Request')
            break
    return business_data, unmatched_businesses, data_history

In [None]:
def search_businesses2(list_of_businesses):
    '''Takes a dictionary of restaurants names and their locations and searches for them in Yelp'''
    endpoint = 'https://api.yelp.com/v3/businesses/search'
    headers = dict(Authorization = 'Bearer %s' % api_key)
    business_data = []
    unmatched_businesses2 = []
    for item in list_of_businesses:
        parameters = dict(term = item['restaurant'],
                         limit = 1,
                          radius = 40000,
                         location = 'Los Angeles')
        response = requests.get(url = endpoint, params = parameters, headers = headers)
        
        if response.status_code == 200:
            if response.json()['businesses']:
                business_data += response.json()['businesses']
            else:
                print('{} in {} not found in Yelp'.format(item['restaurant'], item['neighborhood']))
                unmatched_businesses2.append({'restaurant': item['restaurant'], 'neighborhood': item['neighborhood']})
                continue
        elif response.status_code == 400:
            print('400 Error: Bad Request')
            break
    return business_data, unmatched_businesses2

In [272]:
infatuation_urls = [{'location': 'NYC', 'url': 'https://www.theinfatuation.com/features/support-nyc-black-owned-restaurants'},
                   {'location': 'Philadelphia', 'url': 'https://www.theinfatuation.com/features/support-philadelphias-black-owned-restaurants'},
                   {'location': 'Boston', 'url': 'https://www.theinfatuation.com/features/boston-black-owned-restaurants'},
                   {'location': 'Chicago', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-chicago'},
                   {'location': 'Miami', 'url': 'https://www.theinfatuation.com/features/miami-black-owned-restaurants'},
                   {'location': 'Seattle', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-seattle'},
                   {'location': 'Los Angeles', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-la'},
                   {'location': 'San Francisco Bay Area', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-the-bay-area'},
                   {'location': 'Austin', 'url': 'https://www.theinfatuation.com/features/austin-black-owned-restaurants-food-trucks'}]

In [276]:
print(infatuation_urls[:-1])

[{'location': 'NYC', 'url': 'https://www.theinfatuation.com/features/support-nyc-black-owned-restaurants'}, {'location': 'Philadelphia', 'url': 'https://www.theinfatuation.com/features/support-philadelphias-black-owned-restaurants'}, {'location': 'Boston', 'url': 'https://www.theinfatuation.com/features/boston-black-owned-restaurants'}, {'location': 'Chicago', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-chicago'}, {'location': 'Miami', 'url': 'https://www.theinfatuation.com/features/miami-black-owned-restaurants'}, {'location': 'Seattle', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-seattle'}, {'location': 'Los Angeles', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-la'}, {'location': 'San Francisco Bay Area', 'url': 'https://www.theinfatuation.com/features/support-black-owned-restaurants-in-the-bay-area'}]


In [284]:
def scrape_urls(url_list):
    restaurant_list = []
    for url in url_list:
        res = requests.get(url= url['url'])
        try:
            res.raise_for_status()
            for item in BeautifulSoup(markup=res.text, features= 'html.parser').body.select('strong [href]'):
                restaurant = item.get_text().replace('&','and')
                restaurant = restaurant.replace("’s",'s').replace("’N",'N').replace("’n",'n')
                restaurant_list.append({'restaurant': restaurant,
                                       'neighborhood': url['location']})
        except Exception:
            print(f'Your request raised an error: {Exception}')
    return restaurant_list

In [285]:
infatuation_restaurants = scrape_urls(infatuation_urls[:-1])

In [286]:
infatuation_restaurants

[{'restaurant': 'Tings Jamaican Jerk Chicken', 'neighborhood': 'NYC'},
 {'restaurant': 'Mikey Likes It East Village', 'neighborhood': 'NYC'},
 {'restaurant': 'Brooklyn Chop House', 'neighborhood': 'NYC'},
 {'restaurant': 'Alibi Lounge', 'neighborhood': 'NYC'},
 {'restaurant': 'Benyam', 'neighborhood': 'NYC'},
 {'restaurant': 'BLVD Bistro', 'neighborhood': 'NYC'},
 {'restaurant': 'Charles’ Pan Fried Chicken', 'neighborhood': 'NYC'},
 {'restaurant': 'Chocolat', 'neighborhood': 'NYC'},
 {'restaurant': 'The Edge Harlem', 'neighborhood': 'NYC'},
 {'restaurant': 'Famous Fish Market', 'neighborhood': 'NYC'},
 {'restaurant': 'Field Trip', 'neighborhood': 'NYC'},
 {'restaurant': 'Harlem Hops', 'neighborhood': 'NYC'},
 {'restaurant': 'Home Sweet Harlem', 'neighborhood': 'NYC'},
 {'restaurant': 'Kingston', 'neighborhood': 'NYC'},
 {'restaurant': 'Lee Lees Rugelach', 'neighborhood': 'NYC'},
 {'restaurant': 'Lolos Seafood Shack', 'neighborhood': 'NYC'},
 {'restaurant': 'Melbas', 'neighborhood': 'NY

In [None]:
apostrophe_restaurants = [{'restaurant': 'Freda\'s Carribean and Soul Cuisine', 'location': 'NYC'},
                         {'restaurant': 'Percy\'s Jerk Hut', 'location': 'NYC'},
                         {'restaurant': 'Yulonda\'s Edible Sensations', 'location': 'NYC'},
                         {'restaurant': 'Koten\'s Restaurant', 'location': 'NYC'},
                         {'restaurant': 'Careda\'s Caribbean Cuisine', 'location': 'Philadelphia'},
                         {'restaurant': 'Mac\'n By Mari in Philadelphia', 'location': 'Philadelphia'},
                         {'restaurant': 'Daaiyah\'s Delicious', 'location': 'Philadelphia'},
                         {'restaurant': 'TyeMeka\'s Soul Food', 'location': 'Philadelphia'},
                         {'restaurant': 'Franny Lou’s', 'location': 'Philadelphia'},
                         {'restaurant': 'Gilben’s Bakery', 'location': 'Philadelphia'},
                         {'restaurant': 'Jordan Johnson’s Gourmet Seafood', 'location': 'Philadelphia'},
                         {'restaurant': 'Jr. Billy’s Smokehouse', 'location': 'Philadelphia'},
                         {'restaurant': 'Philly’s Platinum Grille', 'location': 'Philadelphia'},
                         {'restaurant': 'Ron’s Carribbean Cafe', 'location': 'Philadelphia'},
                         {'restaurant': 'Ummi Dee’s Burger Bistro', 'location': 'Philadelphia'},
                         {'restaurant': 'Victoria’s Kitchen', 'location': 'Philadelphia'},
                         {'restaurant': 'Zaneyah’s Delights', 'location': 'Philadelphia'},
                         {'restaurant': 'Atiya Ola’s', 'location': 'Philadelphia'},
                          {'restaurant': 'Chef Milly’s', 'location': 'Philadelphia'},
                          {'restaurant': 'Chef Reeky’s', 'location': 'Philadelphia'},
                          {'restaurant': 'Fred’s Water Ice', 'location': 'Philadelphia'},
                          {'restaurant': 'Keeboom’s Kitchen', 'location': 'Philadelphia'},
                          {'restaurant': 'Saddiq’s Water Ice', 'location': 'Philadelphia'},
                          {'restaurant': 'Sheba’s Soul Plate', 'location': 'Philadelphia'},
                          {'restaurant': 'Murl’s Kitchen', 'location': 'Boston'},
                          {'restaurant': 'Yelu’s', 'location': 'Boston'},
                          {'restaurant': 'Brother’s Kafe Kreyol', 'location': 'Boston'},
                          {'restaurant': 'Cleo’s Southern Cuisine', 'location': 'Chicago'},
                          {'restaurant': 'Choka’s Caribbean Restaurant and Lounge', 'location': 'Miami'},
                          {'restaurant': 'Hammond’s Bakery', 'location': 'Miami'},
                          {'restaurant': 'Lorna’s Restaurant and Caribbean Grille', 'location': 'Miami'},
                          {'restaurant': 'Mattie’s House Of Soul', 'location': 'Miami'},
                          {'restaurant': 'Serious Soul At Cubby’s', 'location': 'Seattle'},
                          {'restaurant': 'Jamil’s Big Easy', 'location': 'Seattle'},
                          {'restaurant': 'KJ’s Bakery Cakery', 'location': 'Seattle'},
                          {'restaurant': 'Drae’s Lake Route Eatery', 'location': 'Seattle'},
                          {'restaurant': 'Keith’s Kettle Corn', 'location': 'Los Angeles'},
                          {'restaurant': 'Smakk’D Refreshers and Teas', 'location': 'Los Angeles'},
                          {'restaurant': 'Uncle Thurm’s', 'location': 'Los Angeles'},
                          {'restaurant': 'Uncle Thurm’s', 'location': 'Los Angeles'},
                          {'restaurant': 'Uncle Thurm’s', 'location': 'Los Angeles'},
                         ]

In [287]:
matched_businesses, unmatched_businesses, check = search_businesses(infatuation_restaurants)

DaleView Biscuits and Beer in NYC not found in Yelp
Mac’n By Mari in Philadelphia not found in Yelp
Daaiyahs Delicious in Philadelphia not found in Yelp
Sahabah Cafe in Philadelphia not found in Yelp
Honeysuckle Popup in Philadelphia not found in Yelp
Jr. Billys Smokehouse in Philadelphia not found in Yelp
Rons Carribbean Cafe in Philadelphia not found in Yelp
Zaneyahs Delights in Philadelphia not found in Yelp
Pretzel Workz in Philadelphia not found in Yelp
Chef Reekys in Philadelphia not found in Yelp
Keebooms Kitchen in Philadelphia not found in Yelp
Saddiqs Water Ice in Philadelphia not found in Yelp
Natifnatal in Boston not found in Yelp
Raphael Carribean in Boston not found in Yelp
Vaughn Fish and Chips in Boston not found in Yelp
RandS Jamaican in Boston not found in Yelp
PandR Jamaican in Boston not found in Yelp
Shenger Cafe and Ethiopian in Boston not found in Yelp
PandR Jamaican in Boston not found in Yelp
BandB Ice Cream And Candy in Chicago not found in Yelp
Phlavz Bar And

In [202]:
nyc_restuarants

[{'restaurant': 'Tings Jamaican Jerk Chicken', 'neighborhood': 'NYC'},
 {'restaurant': 'Mikey Likes It East Village', 'neighborhood': 'NYC'},
 {'restaurant': 'Brooklyn Chop House', 'neighborhood': 'NYC'},
 {'restaurant': 'Alibi Lounge', 'neighborhood': 'NYC'},
 {'restaurant': 'Benyam', 'neighborhood': 'NYC'},
 {'restaurant': 'BLVD Bistro', 'neighborhood': 'NYC'},
 {'restaurant': 'Charles’ Pan Fried Chicken', 'neighborhood': 'NYC'},
 {'restaurant': 'Chocolat', 'neighborhood': 'NYC'},
 {'restaurant': 'The Edge Harlem', 'neighborhood': 'NYC'},
 {'restaurant': 'Famous Fish Market', 'neighborhood': 'NYC'},
 {'restaurant': 'Field Trip', 'neighborhood': 'NYC'},
 {'restaurant': 'Harlem Hops', 'neighborhood': 'NYC'},
 {'restaurant': 'Home Sweet Harlem', 'neighborhood': 'NYC'},
 {'restaurant': 'Kingston', 'neighborhood': 'NYC'},
 {'restaurant': 'Lee Lee’s Rugelach', 'neighborhood': 'NYC'},
 {'restaurant': 'Lolo’s Seafood Shack', 'neighborhood': 'NYC'},
 {'restaurant': 'Melba’s', 'neighborhood': 

In [None]:
res = requests.get('https://www.theinfatuation.com/features/support-black-owned-restaurants-in-chicago')
try:
    res.raise_for_status()
    chicago_list = BeautifulSoup(markup = res.text, features = "html.parser")
    nyc_restuarants = []
    for item in nyc_list.body.select('strong [href]'):
        nyc_restuarants.append({'restaurant':item.get_text().replace('&', 'and'), 'neighborhood': 'Chicago'})
except Exception:
    print(f'Your request raised an error: {Exception}')

In [288]:
len(matched_businesses)

1144

In [303]:
len(unmatched_businesses)

55

In [289]:
check

['Your search Tings Jamaican Jerk Chicken returned Tings Jamaican Jerk Chicken',
 'Your search Mikey Likes It East Village returned Snowdays',
 'Your search Brooklyn Chop House returned Brooklyn Chop House',
 'Your search Alibi Lounge returned Alibi',
 'Your search Benyam returned Benyam Cuisine',
 'Your search BLVD Bistro returned BLVD Bistro',
 'Your search Charles’ Pan Fried Chicken returned Charles Pan Fried Chicken',
 'Your search Chocolat returned Chocolat Restaurant & Bar',
 'Your search The Edge Harlem returned The Edge Harlem',
 'Your search Famous Fish Market returned Famous Fish Market',
 'Your search Field Trip returned Fieldtrip',
 'Your search Harlem Hops returned Harlem Hops',
 'Your search Home Sweet Harlem returned Home Sweet Harlem',
 'Your search Kingston returned Kingston',
 "Your search Lee Lees Rugelach returned Lee Lee's Baked Goods",
 "Your search Lolos Seafood Shack returned Lolo's Seafood Shack",
 "Your search Melbas returned Melba's",
 'Your search 67 Orange 

In [330]:
# endpoint = 'https://api.yelp.com/v3/businesses/search'
# headers = dict(Authorization = 'Bearer %s' % api_key)
# manually_added = []

# parameters = dict(term = 'Groovy Deliciousness',
#                  limit = 5,
#                  radius = 40000,
#                  location = 'Philadelphia')
# response = requests.get(url = endpoint, params = parameters, headers = headers)
# response.json()['businesses']

[{'id': 'OAWa1WML2V1ZLJGD6V3nBQ',
  'alias': 'middle-child-philadelphia',
  'name': 'Middle Child',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/69qRShdPNcjYexIM9qk4cg/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/middle-child-philadelphia?adjust_creative=TbeWL7dBz_EmPK89bHN42A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=TbeWL7dBz_EmPK89bHN42A',
  'review_count': 322,
  'categories': [{'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'},
   {'alias': 'sandwiches', 'title': 'Sandwiches'},
   {'alias': 'coffee', 'title': 'Coffee & Tea'}],
  'rating': 4.5,
  'coordinates': {'latitude': 39.94717, 'longitude': -75.15947},
  'transactions': ['delivery'],
  'price': '$',
  'location': {'address1': '248 S 11th St',
   'address2': None,
   'address3': '',
   'city': 'Philadelphia',
   'zip_code': '19107',
   'country': 'US',
   'state': 'PA',
   'display_address': ['248 S 11th St', 'Philadelphia, PA 19107']},
  'phone': '+126793083

In [327]:
# matched_businesses.append(response.json()['businesses'][0])

In [304]:
unmatched_businesses

[{'restaurant': 'DaleView Biscuits and Beer', 'neighborhood': 'NYC'},
 {'restaurant': 'Mac’n By Mari', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Daaiyahs Delicious', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Sahabah Cafe', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Honeysuckle Popup', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Jr. Billys Smokehouse', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Rons Carribbean Cafe', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Zaneyahs Delights', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Pretzel Workz', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Chef Reekys', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Keebooms Kitchen', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Saddiqs Water Ice', 'neighborhood': 'Philadelphia'},
 {'restaurant': 'Natifnatal', 'neighborhood': 'Boston'},
 {'restaurant': 'Raphael Carribean', 'neighborhood': 'Boston'},
 {'restaurant': 'Vaughn Fish and Chips', 'neighborh

In [12]:
def get_businesses(location, term):
    '''Returns businesses in the specified location that match the specified term'''
    endpoint = 'https://api.yelp.com/v3/businesses/search'
    headers = dict(Authorization = 'Bearer %s' % api_key)
    business_data = []
    
    for city in location:
        for offset in range(0,1000,50):
            parameters = dict(term = term,
                     limit = 50,
                    offset = offset,
                     radius = 40000,
                     location = location)
            response = requests.get(url = endpoint, params = parameters, headers = headers)
            if response.status_code == 200:
                business_data += response.json()['businesses']
            elif response.status_code == 400:
                print('400 Error: Bad Request')
                break
    return business_data

In [13]:
returned_businesses = get_businesses(location = ['Los Angeles',], term = 'Black Owned Restaurants' )

In [274]:
def search_businesses(list_of_businesses):
    '''Takes a dictionary of restaurants names and their locations and searches for them in Yelp'''
    endpoint = 'https://api.yelp.com/v3/businesses/search'
    headers = dict(Authorization = 'Bearer %s' % api_key)
    business_data = []
    unmatched_businesses = []
    data_history = []
    
    for item in list_of_businesses:
        parameters = dict(term = item['restaurant'],
                         limit = 1,
                          radius = 40000,
                         location = item['neighborhood'])
        response = requests.get(url = endpoint, params = parameters, headers = headers)
        
        if response.status_code == 200:
            if response.json()['businesses']:
                business_data += response.json()['businesses']
                data_history.append('Your search {} returned {}'.format(item['restaurant'],response.json()['businesses'][0]['name'] ))
            else:
                print('{} in {} not found in Yelp'.format(item['restaurant'], item['neighborhood']))
                unmatched_businesses.append({'restaurant': item['restaurant'], 'neighborhood': item['neighborhood']})
                continue
        elif response.status_code == 400:
            print('400 Error: Bad Request')
            break
    return business_data, unmatched_businesses, data_history

In [15]:
def search_businesses2(list_of_businesses):
    '''Takes a dictionary of restaurants names and their locations and searches for them in Yelp'''
    endpoint = 'https://api.yelp.com/v3/businesses/search'
    headers = dict(Authorization = 'Bearer %s' % api_key)
    business_data = []
    unmatched_businesses2 = []
    for item in list_of_businesses:
        parameters = dict(term = item['restaurant'],
                         limit = 1,
                          radius = 40000,
                         location = 'Los Angeles')
        response = requests.get(url = endpoint, params = parameters, headers = headers)
        
        if response.status_code == 200:
            if response.json()['businesses']:
                business_data += response.json()['businesses']
            else:
                print('{} in {} not found in Yelp'.format(item['restaurant'], item['neighborhood']))
                unmatched_businesses2.append({'restaurant': item['restaurant'], 'neighborhood': item['neighborhood']})
                continue
        elif response.status_code == 400:
            print('400 Error: Bad Request')
            break
    return business_data, unmatched_businesses2

In [23]:
all_returned_businesses = returned_businesses + matched_businesses + matched_businesses2

In [24]:
len(all_returned_businesses)

513

In [25]:
# Turn the list of dictionaries into a pandas DataFrame
businesses = pd.DataFrame(all_returned_businesses, columns = list(returned_businesses[0].keys()))

In [26]:
businesses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 513 entries, 0 to 512
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             513 non-null    object 
 1   alias          513 non-null    object 
 2   name           513 non-null    object 
 3   image_url      513 non-null    object 
 4   is_closed      513 non-null    bool   
 5   url            513 non-null    object 
 6   review_count   513 non-null    int64  
 7   categories     513 non-null    object 
 8   rating         513 non-null    float64
 9   coordinates    513 non-null    object 
 10  transactions   513 non-null    object 
 11  price          424 non-null    object 
 12  location       513 non-null    object 
 13  phone          513 non-null    object 
 14  display_phone  513 non-null    object 
 15  distance       513 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 60.7+ KB


In [27]:
businesses.drop_duplicates(subset=['id'], inplace = True)

In [28]:
businesses.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 401 entries, 0 to 511
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             401 non-null    object 
 1   alias          401 non-null    object 
 2   name           401 non-null    object 
 3   image_url      401 non-null    object 
 4   is_closed      401 non-null    bool   
 5   url            401 non-null    object 
 6   review_count   401 non-null    int64  
 7   categories     401 non-null    object 
 8   rating         401 non-null    float64
 9   coordinates    401 non-null    object 
 10  transactions   401 non-null    object 
 11  price          324 non-null    object 
 12  location       401 non-null    object 
 13  phone          401 non-null    object 
 14  display_phone  401 non-null    object 
 15  distance       401 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 50.5+ KB


In [29]:
businesses.dtypes

id                object
alias             object
name              object
image_url         object
is_closed           bool
url               object
review_count       int64
categories        object
rating           float64
coordinates       object
transactions      object
price             object
location          object
phone             object
display_phone     object
distance         float64
dtype: object

In [30]:
businesses['is_closed'] = businesses['is_closed'].astype('category')
businesses['price'] = businesses['price'].astype('category')

In [31]:
businesses.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,30jrTz8vh1xSXdtXMvt-mA,my-two-cents-los-angeles-3,My Two Cents,https://s3-media3.fl.yelpcdn.com/bphoto/EMZrhy...,False,https://www.yelp.com/biz/my-two-cents-los-ange...,661,"[{'alias': 'southern', 'title': 'Southern'}, {...",4.5,"{'latitude': 34.0498186, 'longitude': -118.359...","[delivery, pickup]",$$,"{'address1': '5583 W Pico Blvd', 'address2': '...",13238799881,(323) 879-9881,3752.555689
1,QWtzUp4zLqzjA_jxLbvDvA,grannys-kitchen-los-angeles,Granny's Kitchen,https://s3-media2.fl.yelpcdn.com/bphoto/iadhZ0...,False,https://www.yelp.com/biz/grannys-kitchen-los-a...,337,"[{'alias': 'soulfood', 'title': 'Soul Food'}, ...",4.0,"{'latitude': 33.9930975470943, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '5440 S Central Ave', 'address2':...",13232312141,(323) 231-2141,9693.70949
2,Sb_hPko42AEdCbX5Nut5hA,reds-flavor-table-los-angeles,Red's Flavor Table,https://s3-media2.fl.yelpcdn.com/bphoto/qYZbz0...,False,https://www.yelp.com/biz/reds-flavor-table-los...,634,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.0,"{'latitude': 33.9743356, 'longitude': -118.324...",[delivery],$$,"{'address1': '2812 W Florence Ave', 'address2'...",13237516000,(323) 751-6000,9705.595002
3,4ZmXcttDdx9KRGUi6tMWFQ,dulans-on-crenshaw-los-angeles,Dulan's On Crenshaw,https://s3-media2.fl.yelpcdn.com/bphoto/_EXOLc...,False,https://www.yelp.com/biz/dulans-on-crenshaw-lo...,614,"[{'alias': 'soulfood', 'title': 'Soul Food'}]",4.0,"{'latitude': 33.9985785, 'longitude': -118.330...","[delivery, pickup]",$$,"{'address1': '4859 Crenshaw Blvd', 'address2':...",13232963034,(323) 296-3034,7059.773076
4,6DEuqc82zAMe-C4MOLqHcQ,simply-wholesome-los-angeles,Simply Wholesome,https://s3-media4.fl.yelpcdn.com/bphoto/7a4DSy...,False,https://www.yelp.com/biz/simply-wholesome-los-...,792,"[{'alias': 'soulfood', 'title': 'Soul Food'}, ...",4.0,"{'latitude': 33.9886372, 'longitude': -118.354...",[delivery],$$,"{'address1': '4508 W Slauson Ave', 'address2':...",13232942144,(323) 294-2144,8666.093649


In [32]:
businesses['category'] = businesses['categories'].apply(pd.Series)[0].apply(pd.Series)['title']
businesses['sub_category'] = businesses['categories'].apply(pd.Series)[1].apply(pd.Series)['title']

In [33]:
businesses['latitude'] =businesses['coordinates'].apply(pd.Series).iloc[:,0]
businesses['longitude'] =businesses['coordinates'].apply(pd.Series).iloc[:,1]

In [34]:
business_locations = businesses['location'].apply(pd.Series)

In [35]:
business_locations

Unnamed: 0,address1,address2,address3,city,zip_code,country,state,display_address
0,5583 W Pico Blvd,,,Los Angeles,90019,US,CA,"[5583 W Pico Blvd, Los Angeles, CA 90019]"
1,5440 S Central Ave,,,Los Angeles,90011,US,CA,"[5440 S Central Ave, Los Angeles, CA 90011]"
2,2812 W Florence Ave,,,Los Angeles,90043,US,CA,"[2812 W Florence Ave, Los Angeles, CA 90043]"
3,4859 Crenshaw Blvd,,,Los Angeles,90043,US,CA,"[4859 Crenshaw Blvd, Los Angeles, CA 90043]"
4,4508 W Slauson Ave,,,Los Angeles,90043,US,CA,"[4508 W Slauson Ave, Los Angeles, CA 90043]"
...,...,...,...,...,...,...,...,...
507,13545 Ventura Blvd,,,Los Angeles,91423,US,CA,"[13545 Ventura Blvd, Los Angeles, CA 91423]"
508,10581 1/2 W Pico Blvd,,,Los Angeles,90064,US,CA,"[10581 1/2 W Pico Blvd, Los Angeles, CA 90064]"
509,,,,Los Angeles,90047,US,CA,"[Los Angeles, CA 90047]"
510,4729 Eagle Rock Blvd,,,Los Angeles,90041,US,CA,"[4729 Eagle Rock Blvd, Los Angeles, CA 90041]"


In [36]:
businesses = businesses.merge(business_locations, left_index = True, right_index = True)

In [37]:
businesses['display_address'] = businesses['display_address'].apply(lambda x: ', '.join(map(str,x)))

In [38]:
businesses.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,...,latitude,longitude,address1,address2,address3,city,zip_code,country,state,display_address
0,30jrTz8vh1xSXdtXMvt-mA,my-two-cents-los-angeles-3,My Two Cents,https://s3-media3.fl.yelpcdn.com/bphoto/EMZrhy...,False,https://www.yelp.com/biz/my-two-cents-los-ange...,661,"[{'alias': 'southern', 'title': 'Southern'}, {...",4.5,"{'latitude': 34.0498186, 'longitude': -118.359...",...,34.049819,-118.359612,5583 W Pico Blvd,,,Los Angeles,90019,US,CA,"5583 W Pico Blvd, Los Angeles, CA 90019"
1,QWtzUp4zLqzjA_jxLbvDvA,grannys-kitchen-los-angeles,Granny's Kitchen,https://s3-media2.fl.yelpcdn.com/bphoto/iadhZ0...,False,https://www.yelp.com/biz/grannys-kitchen-los-a...,337,"[{'alias': 'soulfood', 'title': 'Soul Food'}, ...",4.0,"{'latitude': 33.9930975470943, 'longitude': -1...",...,33.993098,-118.256304,5440 S Central Ave,,,Los Angeles,90011,US,CA,"5440 S Central Ave, Los Angeles, CA 90011"
2,Sb_hPko42AEdCbX5Nut5hA,reds-flavor-table-los-angeles,Red's Flavor Table,https://s3-media2.fl.yelpcdn.com/bphoto/qYZbz0...,False,https://www.yelp.com/biz/reds-flavor-table-los...,634,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ...",4.0,"{'latitude': 33.9743356, 'longitude': -118.324...",...,33.974336,-118.324172,2812 W Florence Ave,,,Los Angeles,90043,US,CA,"2812 W Florence Ave, Los Angeles, CA 90043"
3,4ZmXcttDdx9KRGUi6tMWFQ,dulans-on-crenshaw-los-angeles,Dulan's On Crenshaw,https://s3-media2.fl.yelpcdn.com/bphoto/_EXOLc...,False,https://www.yelp.com/biz/dulans-on-crenshaw-lo...,614,"[{'alias': 'soulfood', 'title': 'Soul Food'}]",4.0,"{'latitude': 33.9985785, 'longitude': -118.330...",...,33.998579,-118.330774,4859 Crenshaw Blvd,,,Los Angeles,90043,US,CA,"4859 Crenshaw Blvd, Los Angeles, CA 90043"
4,6DEuqc82zAMe-C4MOLqHcQ,simply-wholesome-los-angeles,Simply Wholesome,https://s3-media4.fl.yelpcdn.com/bphoto/7a4DSy...,False,https://www.yelp.com/biz/simply-wholesome-los-...,792,"[{'alias': 'soulfood', 'title': 'Soul Food'}, ...",4.0,"{'latitude': 33.9886372, 'longitude': -118.354...",...,33.988637,-118.354468,4508 W Slauson Ave,,,Los Angeles,90043,US,CA,"4508 W Slauson Ave, Los Angeles, CA 90043"


In [39]:
businesses.query('state != "CA"')

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,...,latitude,longitude,address1,address2,address3,city,zip_code,country,state,display_address
190,UM4Y060CK3gXZfW9bboXRg,battle-born-social-carson-city,Battle Born Social,https://s3-media4.fl.yelpcdn.com/bphoto/6Mh3oH...,False,https://www.yelp.com/biz/battle-born-social-ca...,102,"[{'alias': 'tradamerican', 'title': 'American ...",3.5,"{'latitude': 39.16559, 'longitude': -119.76738}",...,39.16559,-119.76738,318 N Carson St,,,Carson City,89701,US,NV,"318 N Carson St, Carson City, NV 89701"
228,JFmmdCCt3RDplwXF6BwCHQ,kfc-coffeyville,KFC,https://s3-media2.fl.yelpcdn.com/bphoto/llZ7dL...,False,https://www.yelp.com/biz/kfc-coffeyville?adjus...,16,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",2.0,"{'latitude': 37.032663, 'longitude': -95.618804}",...,37.032663,-95.618804,215 W 11th St,,,Coffeyville,67337,US,KS,"215 W 11th St, Coffeyville, KS 67337"
248,QZE2wj1T3pHm2NEx64KDGQ,happy-yogurt-garden-new-york,Happy Yogurt Garden,https://s3-media3.fl.yelpcdn.com/bphoto/H1PDbV...,False,https://www.yelp.com/biz/happy-yogurt-garden-n...,17,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 40.6220729293888, 'longitude': -7...",...,40.622073,-74.005983,7102 13th Ave,,,New York,11228,US,NY,"7102 13th Ave, New York, NY 11228"
255,9fMyUeRTEblVqJ083QEsWw,harolds-shrimp-and-chicken-miami-beach,Harolds Shrimp and Chicken,https://s3-media1.fl.yelpcdn.com/bphoto/DhQzjq...,False,https://www.yelp.com/biz/harolds-shrimp-and-ch...,3,"[{'alias': 'newamerican', 'title': 'American (...",3.5,"{'latitude': 25.784511, 'longitude': -80.1318113}",...,25.784511,-80.131811,1311 Washington Ave,,,Miami Beach,33139,US,FL,"1311 Washington Ave, Miami Beach, FL 33139"
256,rcP_maQ5-gr_i3tQ6J2oIg,yardbird-southern-table-and-bar-miami-beach,Yardbird Southern Table & Bar,https://s3-media2.fl.yelpcdn.com/bphoto/sftazX...,False,https://www.yelp.com/biz/yardbird-southern-tab...,5412,"[{'alias': 'southern', 'title': 'Southern'}, {...",4.5,"{'latitude': 25.7890458685996, 'longitude': -8...",...,25.789046,-80.140084,1600 Lenox Ave,,,Miami Beach,33139,US,FL,"1600 Lenox Ave, Miami Beach, FL 33139"
259,695wcjiDU0kJX2R6mtvEsA,billy-joes-ribworks-newburgh,Billy Joe's Ribworks,https://s3-media3.fl.yelpcdn.com/bphoto/zVK3ta...,False,https://www.yelp.com/biz/billy-joes-ribworks-n...,576,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",3.5,"{'latitude': 41.50216, 'longitude': -74.00518}",...,41.50216,-74.00518,26 Front St,,,Newburgh,12550,US,NY,"26 Front St, Newburgh, NY 12550"
260,Bl2B6GDV1lz2uzfSo8yotA,decicco-and-sons-brewster-brewster,DeCicco & Sons - Brewster,https://s3-media2.fl.yelpcdn.com/bphoto/ObxA5T...,False,https://www.yelp.com/biz/decicco-and-sons-brew...,70,"[{'alias': 'grocery', 'title': 'Grocery'}, {'a...",4.0,"{'latitude': 41.4197599, 'longitude': -73.62678}",...,41.41976,-73.62678,50 Independent Way,,,Brewster,10509,US,NY,"50 Independent Way, Brewster, NY 10509"
261,FEm55B90BuR66QyXmL2clA,joe-cristianos-pizza-wappingers-falls,Joe Cristiano's Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/EgTtAH...,False,https://www.yelp.com/biz/joe-cristianos-pizza-...,47,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 41.576131, 'longitude': -73.909311}",...,41.576131,-73.909311,1289 Rte 9,,,Wappingers Falls,12590,US,NY,"1289 Rte 9, Wappingers Falls, NY 12590"
262,6Ih4wtGf9tEONL8K293T1g,the-parish-new-paltz,The Parish,https://s3-media3.fl.yelpcdn.com/bphoto/1Y7cB2...,False,https://www.yelp.com/biz/the-parish-new-paltz?...,59,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",3.5,"{'latitude': 41.7462369082461, 'longitude': -7...",...,41.746237,-74.089372,10 Main St,,,New Paltz,12561,US,NY,"10 Main St, New Paltz, NY 12561"
263,99rUs4E8GWvWnY1bduAnUg,fireside-bbq-and-grill-salt-point,Fireside BBQ & Grill,https://s3-media2.fl.yelpcdn.com/bphoto/B6goT6...,False,https://www.yelp.com/biz/fireside-bbq-and-gril...,63,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",4.0,"{'latitude': 41.80739, 'longitude': -73.79312}",...,41.80739,-73.79312,1920 Salt Point Tpke,,,Salt Point,12578,US,NY,"1920 Salt Point Tpke, Salt Point, NY 12578"


In [40]:
businesses.describe()

Unnamed: 0,review_count,rating,distance,latitude,longitude
count,401.0,401.0,401.0,401.0,401.0
mean,424.461347,4.148379,10747.522977,34.397695,-115.661272
std,690.58006,0.635259,10514.902282,1.747058,10.830174
min,0.0,0.0,17.637273,25.784511,-119.76738
25%,58.0,4.0,2556.31404,33.94526,-118.353867
50%,178.0,4.0,7051.206878,34.016994,-118.308937
75%,452.0,4.5,16226.063093,34.10375,-118.20011
max,5412.0,5.0,69610.718619,45.438352,12.339039


In [41]:
businesses.set_index('id', inplace=True)

In [42]:
businesses.info()

<class 'pandas.core.frame.DataFrame'>
Index: 401 entries, 30jrTz8vh1xSXdtXMvt-mA to OCkQdKMHU-LwWTWNe5leBA
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   alias            401 non-null    object  
 1   name             401 non-null    object  
 2   image_url        401 non-null    object  
 3   is_closed        401 non-null    category
 4   url              401 non-null    object  
 5   review_count     401 non-null    int64   
 6   categories       401 non-null    object  
 7   rating           401 non-null    float64 
 8   coordinates      401 non-null    object  
 9   transactions     401 non-null    object  
 10  price            324 non-null    category
 11  location         401 non-null    object  
 12  phone            401 non-null    object  
 13  display_phone    401 non-null    object  
 14  distance         401 non-null    float64 
 15  category         401 non-null    object  
 16  sub_categ

In [57]:
pd.set_option('max_rows', 80)

In [58]:
businesses.category.value_counts()

Barbeque                     38
Soul Food                    27
Coffee & Tea                 25
Caribbean                    22
Desserts                     17
Southern                     17
Cajun/Creole                 16
Ethiopian                    15
Burgers                      13
Bakeries                     12
Seafood                      11
American (Traditional)       11
Breakfast & Brunch           10
Caterers                      9
Seafood Markets               9
Vegan                         9
African                       8
Food Trucks                   8
Chicken Wings                 7
Chicken Shop                  7
American (New)                7
Ice Cream & Frozen Yogurt     6
Mexican                       5
Tacos                         5
Juice Bars & Smoothies        5
Delis                         4
Coffee Roasteries             4
Donuts                        4
Pizza                         4
Food Delivery Services        3
Comfort Food                  3
Fast Foo

In [44]:
businesses.columns

Index(['alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'coordinates', 'transactions', 'price',
       'location', 'phone', 'display_phone', 'distance', 'category',
       'sub_category', 'latitude', 'longitude', 'address1', 'address2',
       'address3', 'city', 'zip_code', 'country', 'state', 'display_address'],
      dtype='object')

In [60]:
mask = businesses.name.str.contains('Crossroads')
businesses.loc[mask]

Unnamed: 0_level_0,name,image_url,is_closed,url,review_count,rating,transactions,price,display_phone,category,sub_category,latitude,longitude,address1,address2,address3,city,zip_code,country,state,display_address
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9xyjvSkidMTngtyZ6-Upaw,Crossroads,https://s3-media3.fl.yelpcdn.com/bphoto/SAr3gZ...,False,https://www.yelp.com/biz/crossroads-los-angele...,2147,4.0,"[delivery, restaurant_reservation, pickup]",$$$,(323) 782-9245,Vegan,Mediterranean,34.08342,-118.37011,8284 Melrose Ave,,,Los Angeles,90046,US,CA,"8284 Melrose Ave, Los Angeles, CA 90046"


In [46]:
businesses.drop(columns = ['coordinates', 'location', 'alias', 'categories', 'phone', 'distance'], inplace=True)

In [47]:
businesses.to_csv('cleaned_businesses.csv')

In [68]:
res = requests.get('https://www.theinfatuation.com/features/support-nyc-black-owned-restaurants#staten')
res.raise_for_status()
nyc_list = bs4.Beu

In [None]:
nyc_list = bs4.Beu