# Extract Yelp Data by Zipcode
### Description
Extract yelp rating data for different restaurants by zipcodes based on 5 metropoliton cities

In [1]:
!pip install yelpapi

[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [33]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import json
from pprint import pprint
from yelpapi import YelpAPI
import gmaps

from config import Yelp_API
from config import Google_API

In [9]:
# City will be analyzed
city_list = [{"city":"San Francisco","state":"CA"},
             {"city":"Los Angeles","state":"CA"},
             {"city":"Chicago","state":"IL"},
             {"city":"New York","state":"NY"},
             {"city":"Washington","state":"DC"}]

In [29]:
# Test Yelp API Call
yelp_api = YelpAPI(Yelp_API)
search_results = yelp_api.search_query(categories='restaurants', longitude=-122.4392, latitude=37.7474, limit=5)

pprint(search_results["businesses"][1])

{'alias': 'el-farolito-san-francisco-2',
 'categories': [{'alias': 'mexican', 'title': 'Mexican'}],
 'coordinates': {'latitude': 37.75265, 'longitude': -122.41812},
 'display_phone': '(415) 824-7877',
 'distance': 1934.6533607382898,
 'id': 'SGRmnarrNuVEsAjYdEoA0w',
 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/OPnKCvgBR2-lQ1-yahrpiA/o.jpg',
 'is_closed': False,
 'location': {'address1': '2779 Mission St',
              'address2': '',
              'address3': '',
              'city': 'San Francisco',
              'country': 'US',
              'display_address': ['2779 Mission St', 'San Francisco, CA 94110'],
              'state': 'CA',
              'zip_code': '94110'},
 'name': 'El Farolito',
 'phone': '+14158247877',
 'price': '$',
 'rating': 4.0,
 'review_count': 4576,
 'transactions': [],
 'url': 'https://www.yelp.com/biz/el-farolito-san-francisco-2?adjust_creative=eTwFkPcZYVd9p98CCvdsOA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=eTwFkPcZY

In [17]:
# Get City ZipCode Data
city_zipcode_df = pd.read_csv("zip_code_database.csv")
city_zipcode_df.head()

Unnamed: 0,zip,type,decommissioned,primary_city,acceptable_cities,unacceptable_cities,state,county,timezone,area_codes,world_region,country,latitude,longitude,irs_estimated_population_2015
0,501,UNIQUE,0,Holtsville,,I R S Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,562
1,544,UNIQUE,0,Holtsville,,Irs Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,0
2,601,STANDARD,0,Adjuntas,,"Colinas Del Gigante, Jard De Adjuntas, Urb San...",PR,Adjuntas Municipio,America/Puerto_Rico,787939,,US,18.16,-66.72,0
3,602,STANDARD,0,Aguada,,"Alts De Aguada, Bo Guaniquilla, Comunidad Las ...",PR,Aguada Municipio,America/Puerto_Rico,787939,,US,18.38,-67.18,0
4,603,STANDARD,0,Aguadilla,Ramey,"Bda Caban, Bda Esteves, Bo Borinquen, Bo Ceiba...",PR,Aguadilla Municipio,America/Puerto_Rico,787,,US,18.43,-67.15,0


In [18]:
# Extract zip code data for all the cities that will be analyzed
column_output = ["zip","primary_city","state","latitude","longitude","type"]
city_zipcode_filter_df = pd.DataFrame(columns=column_output)

for city in city_list:
    city_name = city["city"]
    state_name = city["state"]
    city_zipcode_filter_df = city_zipcode_filter_df.append(city_zipcode_df.loc[(city_zipcode_df["primary_city"]==city_name)&(city_zipcode_df["state"]==state_name),column_output])

print(city_zipcode_filter_df["primary_city"].unique())
city_zipcode_filter_df.head()

['San Francisco' 'Los Angeles' 'Chicago' 'New York' 'Washington']


Unnamed: 0,zip,primary_city,state,latitude,longitude
39679,94101,San Francisco,CA,37.77,-122.41
39680,94102,San Francisco,CA,37.78,-122.42
39681,94103,San Francisco,CA,37.77,-122.41
39682,94104,San Francisco,CA,37.79,-122.4
39683,94105,San Francisco,CA,37.79,-122.39


In [39]:
# Find Restaurant by Zip Code in Chicago
column_output = ["zip","city","state","name","price","rating","review_count","latitude","longitude"]

city_zipcode_Chicago = city_zipcode_filter_df.loc[(city_zipcode_df["primary_city"]=="Chicago")].reset_index()
city_restaurants_by_zipcode_Chicago = pd.DataFrame(columns=column_output)
for index, row in city_zipcode_Chicago.iterrows():
    lat = float(row["latitude"])
    lng = float(row["longitude"])
    search_results = yelp_api.search_query(categories='restaurants',longitude=lng,latitude=lat,limit=50,radius = 15000)
    for restaurant in search_results["businesses"]:
        if restaurant["location"]["zip_code"] == str(row["zip"]):
            restaurant_dict = {"zip":row["zip"],
                               "city":row["primary_city"],
                               "state":row["state"],
                               "name":restaurant["name"],
                               "price":restaurant["price"],
                               "rating":restaurant["rating"],
                               "review_count":restaurant["review_count"],
                               "latitude":restaurant["coordinates"]["latitude"],
                               "longitude":restaurant["coordinates"]["longitude"]
                              }
            city_restaurants_by_zipcode_Chicago = city_restaurants_by_zipcode_Chicago.append(restaurant_dict,ignore_index=True)
            
city_restaurants_by_zipcode_Chicago.head(20)

Unnamed: 0,zip,city,state,name,price,rating,review_count,latitude,longitude
0,60601,Chicago,IL,Wildberry Pancakes and Cafe,$$,4.5,5855,41.884668,-87.62288
1,60601,Chicago,IL,Giordano's,$$,3.5,2194,41.885165,-87.623753
2,60601,Chicago,IL,Prime & Provisions,$$$$,4.5,893,41.886632,-87.632829
3,60602,Chicago,IL,The Dearborn,$$,4.5,993,41.884253,-87.629315
4,60603,Chicago,IL,The Gage,$$,4.0,2628,41.881048,-87.624533
5,60603,Chicago,IL,Cindy's,$$,4.0,1387,41.881689,-87.625006
6,60603,Chicago,IL,Brightwok Kitchen,$$,4.5,533,41.879458,-87.627003
7,60604,Chicago,IL,Native Foods Cafe,$$,4.0,729,41.87887,-87.63117
8,60605,Chicago,IL,Lou Malnati's Pizzeria,$$,4.0,2187,41.871487,-87.627337
9,60605,Chicago,IL,Cafecito,$,4.5,1469,41.875725,-87.626566


In [41]:
gmaps.configure(api_key=Google_API)
 # Store 'Lat' and 'Lng' into  locations 
locations = city_restaurants_by_zipcode_Chicago[["latitude", "longitude"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
review_count_list = city_restaurants_by_zipcode_Chicago["review_count"].astype(float)
name_list = city_restaurants_by_zipcode_Chicago["name"]
rating_list = city_restaurants_by_zipcode_Chicago["rating"]
list_len = len(name_list)

fig = gmaps.figure()

review_count_layer = gmaps.symbol_layer(
    locations, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=3,
    info_box_content=[f"{name_list[i]}: (Review Count: {review_count_list[i]},Rating: {rating_list[i]})" for i in range(list_len)]
)

fig.add_layer(review_count_layer)

fig

Figure(layout=FigureLayout(height='420px'))