# Extract Yelp Data by Zipcode
### Description
Extract yelp rating data for different restaurants by zipcodes based on 5 metropoliton cities

In [1]:
!pip install yelpapi

[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import json
from pprint import pprint
from yelpapi import YelpAPI
import gmaps
from ipywidgets.embed import embed_minimal_html

from config import Yelp_API
from config import Google_API

In [3]:
# City will be analyzed
city_list = [{"city":"San Francisco","state":"CA"},
             {"city":"Los Angeles","state":"CA"},
             {"city":"Chicago","state":"IL"},
             {"city":"New York","state":"NY"},
             {"city":"Washington","state":"DC"}]

In [4]:
# Test Yelp API Call
yelp_api = YelpAPI(Yelp_API)
search_results = yelp_api.search_query(categories='restaurants', longitude=-122.4392, latitude=37.7474, limit=20)

pprint(search_results["businesses"][10])

{'alias': 'lovejoys-tea-room-san-francisco',
 'categories': [{'alias': 'british', 'title': 'British'},
                {'alias': 'tea', 'title': 'Tea Rooms'}],
 'coordinates': {'latitude': 37.74919, 'longitude': -122.42689},
 'display_phone': '(415) 648-5895',
 'distance': 1086.1986600413134,
 'id': 's-ihKanDRDHTD8U49HFA5Q',
 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/0rQRbWv1-Rz-h3LOtubcvw/o.jpg',
 'is_closed': False,
 'location': {'address1': '1351 Church St',
              'address2': '',
              'address3': '',
              'city': 'San Francisco',
              'country': 'US',
              'display_address': ['1351 Church St', 'San Francisco, CA 94114'],
              'state': 'CA',
              'zip_code': '94114'},
 'name': "Lovejoy's Tea Room",
 'phone': '+14156485895',
 'price': '$$',
 'rating': 4.5,
 'review_count': 1138,
 'transactions': [],
 'url': 'https://www.yelp.com/biz/lovejoys-tea-room-san-francisco?adjust_creative=zrsPLbTT9FZOAsEKCx1hjQ&utm_campa

In [9]:
# List all the ethnical names in Yelp response
# Reference: https://www.yelp.com/developers/documentation/v3/all_category_list/categories.json
# Yelp_Type_Dict = {"American (New)":"newamerican","Arabian":"arabian","Argentine":"argentine",
                  # "Asian Fusion":"asianfusion","Austrian":"austrian","Australian":"australian",
                  # "Brazilian":"brazilian","British":"british","Bulgarian":"bulgarian",
                  #"Cajun/Creole":"cajun","Canadian (New)":"newcanadian","Caribbean":"caribbean",
                  #"Chilean":"chilean","Chinese":"chinese","Cuban":"cuban","Czech/Slovakian":"czechslovakian",
                  #"Filipino":"filipino","French":"french","German":"german","Greek":"greek","Hawaiian":"hawaiian",
                  #"Himalayan/Nepalese":"himalayan","Indian":"indpak","Italian":"italian",
                  #"Japanese":"japanese","Korean":"korean","Latin American":"latin",
                  #"Malaysian":"malaysian","Mediterranean":"mediterranean","Mexican":"mexican",
                 # "Polish":"polish","Portuguese":"portuguese","Romanian":"romanian",
                  #"Russian":"russian","Thai":"thai","Vietnamese":"vietnamese","Pakistani":"pakistani"}
                
Yelp_Type_Dict = {"Argentine":"argentine","Brazilian":"brazilian","British":"british","Bulgarian":"bulgarian",
                  "Cajun/Creole":"cajun","Caribbean":"caribbean","Chilean":"chilean","Chinese":"chinese","Cuban":"cuban",
                  "Filipino":"filipino","French":"french","German":"german","Greek":"greek","Hawaiian":"hawaiian",
                  "Indian":"indpak","Italian":"italian",
                  "Japanese":"japanese","Korean":"korean",
                  "Malaysian":"malaysian","Mexican":"mexican",
                  "Polish":"polish","Portuguese":"portuguese","Romanian":"romanian",
                  "Russian":"russian","Thai":"thai","Vietnamese":"vietnamese","Pakistani":"pakistani"}


In [6]:
# Get City ZipCode Data
city_zipcode_df = pd.read_csv("zip_code_database.csv")
city_zipcode_df.head()

Unnamed: 0,zip,type,decommissioned,primary_city,acceptable_cities,unacceptable_cities,state,county,timezone,area_codes,world_region,country,latitude,longitude,irs_estimated_population_2015
0,501,UNIQUE,0,Holtsville,,I R S Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,562
1,544,UNIQUE,0,Holtsville,,Irs Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,0
2,601,STANDARD,0,Adjuntas,,"Colinas Del Gigante, Jard De Adjuntas, Urb San...",PR,Adjuntas Municipio,America/Puerto_Rico,787939,,US,18.16,-66.72,0
3,602,STANDARD,0,Aguada,,"Alts De Aguada, Bo Guaniquilla, Comunidad Las ...",PR,Aguada Municipio,America/Puerto_Rico,787939,,US,18.38,-67.18,0
4,603,STANDARD,0,Aguadilla,Ramey,"Bda Caban, Bda Esteves, Bo Borinquen, Bo Ceiba...",PR,Aguadilla Municipio,America/Puerto_Rico,787,,US,18.43,-67.15,0


In [7]:
# Extract zip code data for all the cities that will be analyzed
column_output = ["zip","primary_city","state","latitude","longitude"]
city_zipcode_filter_df = pd.DataFrame(columns=column_output)

for city in city_list:
    city_name = city["city"]
    state_name = city["state"]
    city_zipcode_filter_df = city_zipcode_filter_df.append(city_zipcode_df.loc[(city_zipcode_df["primary_city"]==city_name)&(city_zipcode_df["state"]==state_name),column_output])

print(city_zipcode_filter_df["primary_city"].unique())
city_zipcode_filter_df.head()

['San Francisco' 'Los Angeles' 'Chicago' 'New York' 'Washington']


Unnamed: 0,zip,primary_city,state,latitude,longitude
39679,94101,San Francisco,CA,37.77,-122.41
39680,94102,San Francisco,CA,37.78,-122.42
39681,94103,San Francisco,CA,37.77,-122.41
39682,94104,San Francisco,CA,37.79,-122.4
39683,94105,San Francisco,CA,37.79,-122.39


In [8]:
# Find Restaurant by Zip Code in Chicago
def Restaurants_By_City_and_Type (city_zipcode_df,city_name,restaurant_identifier):

    column_output = ["zip","city","state","name","price","rating","review_count","type","latitude","longitude"]

    zipcode_By_City = city_zipcode_df.loc[(city_zipcode_df["primary_city"]==city_name)].reset_index()
    city_restaurants_by_zipcode = pd.DataFrame(columns=column_output)
    
    for index, row in zipcode_By_City.iterrows():

        lat = float(row["latitude"])
        lng = float(row["longitude"])
        restaurants_count = 0

        # Yelp Request
        try: 
        
            search_results = yelp_api.search_query(categories=restaurant_identifier,longitude=lng,latitude=lat,limit=30,radius = 8000)

            for restaurant in search_results["businesses"]:

                restaurant_categories = restaurant["categories"]
                type_match = 0

                # Filter out all the searched restaurant that does not match the search criteria
                for category in restaurant_categories:
                    if category["alias"] == restaurant_identifier:
                        restaurant_type = category["title"]
                        type_match = 1
                        break

                if (str(restaurant["location"]["zip_code"]) == str(row["zip"])) and (type_match == 1): 
                    # Print the restaurant count in the area
                    restaurants_count = restaurants_count + 1

                    # Restaurant Dictionary 
                    if 'price' in restaurant.keys():
                        price = restaurant["price"]
                    else:
                        price = "$$"

                    restaurant_dict = {"zip":row["zip"],
                                       "city":row["primary_city"],
                                       "state":row["state"],
                                       "name":restaurant["name"],
                                       "price":price,
                                       "rating":restaurant["rating"],
                                       "review_count":restaurant["review_count"],
                                       "type":restaurant_type,
                                       "latitude":restaurant["coordinates"]["latitude"],
                                       "longitude":restaurant["coordinates"]["longitude"]
                                      }
                    city_restaurants_by_zipcode = city_restaurants_by_zipcode.append(restaurant_dict,ignore_index=True) 

            if restaurants_count > 0:
                print(f"Found {restaurants_count} {restaurant_identifier} restaurants in location {row['zip']}")
                
        except:     
            print("Search Fail")
      
    print("------------Finish-------------")
    
    return city_restaurants_by_zipcode
    


In [26]:
# Test with Chicago Chinese Restaurants
city_restaurants_by_zipcode_Chicago_Chinese = Restaurants_By_City_and_Type (city_zipcode_filter_df,"Chicago","chinese")
city_restaurants_by_zipcode_Chicago_Chinese.head()

Found 1 chinese restaurants in location 60601
Found 3 chinese restaurants in location 60607
Found 2 chinese restaurants in location 60608
Found 1 chinese restaurants in location 60610
Found 4 chinese restaurants in location 60611
Found 2 chinese restaurants in location 60613
Found 3 chinese restaurants in location 60614
Found 14 chinese restaurants in location 60616
Found 5 chinese restaurants in location 60617
Found 4 chinese restaurants in location 60618
Found 4 chinese restaurants in location 60619
Found 1 chinese restaurants in location 60620
Found 3 chinese restaurants in location 60622
Found 2 chinese restaurants in location 60623
Found 6 chinese restaurants in location 60625
Found 2 chinese restaurants in location 60626
Found 5 chinese restaurants in location 60628
Found 2 chinese restaurants in location 60629
Found 5 chinese restaurants in location 60630
Found 1 chinese restaurants in location 60631
Found 5 chinese restaurants in location 60632
Found 2 chinese restaurants in lo

Unnamed: 0,zip,city,state,name,price,rating,review_count,type,latitude,longitude
0,60601,Chicago,IL,Wow Bao,$,3.5,543,Chinese,41.885915,-87.628267
1,60607,Chicago,IL,Duck Duck Goat,$$$,4.0,1093,Chinese,41.886656,-87.649717
2,60607,Chicago,IL,WJ Noodles,$$,4.5,158,Chinese,41.878071,-87.647721
3,60607,Chicago,IL,ML Kitchen,$$,4.0,113,Chinese,41.869543,-87.655786
4,60608,Chicago,IL,Joy Yee's Noodle Shop,$$,3.5,615,Chinese,41.86433,-87.6467


In [10]:
# Output Chicago restaurants for different types
column_output = ["zip","city","state","name","price","rating","review_count","type","latitude","longitude"]
city_restaurants_by_zipcode_allType = pd.DataFrame(columns=column_output)

for key in Yelp_Type_Dict:
    
    print(f"Processing {key} type of restaurants")
    print("--------------------------------------")
    restaurant_to_update = Restaurants_By_City_and_Type (city_zipcode_filter_df,"New York",Yelp_Type_Dict[key])
    city_restaurants_by_zipcode_allType = city_restaurants_by_zipcode_allType.append(restaurant_to_update,ignore_index=True)
    

Processing Argentine type of restaurants
--------------------------------------
Found 3 argentine restaurants in location 10001
Found 1 argentine restaurants in location 10002
Found 1 argentine restaurants in location 10009
Found 1 argentine restaurants in location 10013
Found 3 argentine restaurants in location 10036
Found 1 argentine restaurants in location 10128
------------Finish-------------
Processing Brazilian type of restaurants
--------------------------------------
Found 2 brazilian restaurants in location 10009
Found 1 brazilian restaurants in location 10011
Found 1 brazilian restaurants in location 10012
Found 2 brazilian restaurants in location 10014
Found 3 brazilian restaurants in location 10019
Found 2 brazilian restaurants in location 10029
Found 5 brazilian restaurants in location 10036
Found 1 brazilian restaurants in location 10038
------------Finish-------------
Processing British type of restaurants
--------------------------------------
Found 1 british restaurant

Found 1 greek restaurants in location 10011
Found 6 greek restaurants in location 10013
Found 4 greek restaurants in location 10014
Found 1 greek restaurants in location 10016
Found 1 greek restaurants in location 10017
Found 1 greek restaurants in location 10018
Found 7 greek restaurants in location 10019
Found 6 greek restaurants in location 10022
Found 2 greek restaurants in location 10023
Found 2 greek restaurants in location 10025
Found 1 greek restaurants in location 10033
Found 2 greek restaurants in location 10036
Found 2 greek restaurants in location 10038
Found 1 greek restaurants in location 10040
Found 1 greek restaurants in location 10065
Found 1 greek restaurants in location 10075
Found 1 greek restaurants in location 10128
Found 1 greek restaurants in location 10154
------------Finish-------------
Processing Hawaiian type of restaurants
--------------------------------------
Found 1 hawaiian restaurants in location 10001
Found 1 hawaiian restaurants in location 10003
Fou

Found 2 mexican restaurants in location 10024
Found 3 mexican restaurants in location 10025
Found 2 mexican restaurants in location 10026
Found 2 mexican restaurants in location 10028
Found 7 mexican restaurants in location 10029
Found 5 mexican restaurants in location 10031
Found 3 mexican restaurants in location 10032
Found 2 mexican restaurants in location 10033
Found 6 mexican restaurants in location 10034
Found 1 mexican restaurants in location 10035
Found 3 mexican restaurants in location 10036
Found 3 mexican restaurants in location 10040
Found 1 mexican restaurants in location 10065
Found 2 mexican restaurants in location 10075
Found 2 mexican restaurants in location 10128
Found 1 mexican restaurants in location 10282
------------Finish-------------
Processing Polish type of restaurants
--------------------------------------
Found 2 polish restaurants in location 10003
Found 1 polish restaurants in location 10020
------------Finish-------------
Processing Portuguese type of res

In [13]:
city_restaurants_by_zipcode_allType.to_csv("Yelp_Restaurants_New_York.csv",index=False)

In [12]:
gmaps.configure(api_key=Google_API)
 # Store 'Lat' and 'Lng' into  locations 
locations = city_restaurants_by_zipcode_allType[["latitude", "longitude"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
review_count_list = city_restaurants_by_zipcode_allType["review_count"].astype(float)
name_list = city_restaurants_by_zipcode_allType["name"]
rating_list = city_restaurants_by_zipcode_allType["rating"]
type_list = city_restaurants_by_zipcode_allType["type"]
list_len = len(name_list)

fig = gmaps.figure()

review_count_layer = gmaps.symbol_layer(
    locations, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=3,
    info_box_content=[f"{name_list[i]}: (Review Count: {review_count_list[i]},Rating: {rating_list[i]},Type: {type_list[i]})" 
                      for i in range(list_len)]
)

fig.add_layer(review_count_layer)

fig



Figure(layout=FigureLayout(height='420px'))