# Step 1. Import Libraries

In [4]:
import pandas as pd
import geopandas as gpd
import numpy as np
import cartopy.crs as ccrs
import altair as alt
import hvplot.pandas
from matplotlib import pyplot as plt

In [5]:
import requests
import json

In [6]:
KEY = "VKa7NurtzgHyEa7DkxJs33QuHFkHOE8C_56ePSwQYK8-exphAolXsNlN1e8oylmnPQdZ65cffx5a0JW5tQd9uIXyZ0st5sokIBS6sMtc8nAtM8oFNBvYin-S4cPNXHYx"
headers = {'Authorization' : 'Bearer %s' % KEY}

In [7]:
from geopandas import GeoDataFrame 

# Step 2. Get Data

## Step 2.1 Neighborhood Boundaries 

In [8]:
# load the Zillow data from GitHub
neighborhood = gpd.read_file("./data/Neighborhood Tabulation Areas.geojson")

# convert the CRS to Web Mercator
neighborhood  = neighborhood .to_crs(epsg=3857)

## Step 2.2 Census Data 

In [9]:
census_tract = gpd.read_file("./data/2010 Census Tracts.geojson")

In [10]:
census_data = pd.read_csv("./data/census_data.csv")

In [11]:
census_data['boro_ct2010']=census_data['boro_ct2010'].astype(str)

In [12]:
joined_data = census_tract.merge(census_data, on='boro_ct2010')

## Step 2.3 Join Data 

In [13]:
joined_data = joined_data.to_crs(epsg=3857)

In [14]:
# perform the spatial join
gdata = gpd.sjoin(joined_data, neighborhood, op='intersects', how='left')

In [15]:
gdata_group = gdata.groupby(['ntaname_right']).sum()

In [16]:
gdata_group =gdata_group.add_suffix('_sum').reset_index()

In [17]:
gdata_group = gdata_group[['ntaname_right','B01001_001E_sum', 'ukrainian_sum', 'turkish_sum', 'romanian_sum', 'russian_sum', 'polish_sum', 'german_sum', 'italian_sum', 'french_sum', 'arabian_sum']]

In [18]:
gdata_group.rename(columns={'ntaname_right':'ntaname'}, inplace=True)

In [19]:
neighborhood_data = neighborhood.merge(gdata_group, on='ntaname')

# Step 2.4 Yelp API

In [152]:
from pandas.io.json import json_normalize
import json
from pprint import pprint
import io

In [153]:
url = "https://api.yelp.com/v3/businesses/search"

In [154]:
rus = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'russian'}
ukr = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'ukrainian'}
rom = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'romanian'}
pol = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'polish'}
ger = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'german'}
ita = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'italian'}
fra = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'french'}
ara = {'term' : 'restaurants', 'location' : 'NYC', 'categories': 'arabian'}

In [155]:
rus_req = requests.get(url, params=rus, headers=headers)
ukr_req = requests.get(url, params=ukr, headers=headers)
rom_req = requests.get(url, params=rom, headers=headers)
pol_req = requests.get(url, params=pol, headers=headers)
ger_req = requests.get(url, params=ger, headers=headers)
ita_req = requests.get(url, params=ita, headers=headers)
fra_req = requests.get(url, params=fra, headers=headers)
ara_req = requests.get(url, params=ara, headers=headers)

In [157]:
rus_res = pd.DataFrame.from_dict(json_normalize(json.loads(rus_req.text)['businesses']), orient='columns')
ukr_res = pd.DataFrame.from_dict(json_normalize(json.loads(ukr_req.text)['businesses']), orient='columns')
pol_res = pd.DataFrame.from_dict(json_normalize(json.loads(pol_req.text)['businesses']), orient='columns')
ger_res = pd.DataFrame.from_dict(json_normalize(json.loads(ger_req.text)['businesses']), orient='columns')
ita_res = pd.DataFrame.from_dict(json_normalize(json.loads(ita_req.text)['businesses']), orient='columns')
fra_res = pd.DataFrame.from_dict(json_normalize(json.loads(fra_req.text)['businesses']), orient='columns')
ara_res = pd.DataFrame.from_dict(json_normalize(json.loads(ara_req.text)['businesses']), orient='columns')

In [29]:
rus_res.head()

Unnamed: 0,alias,categories,coordinates.latitude,coordinates.longitude,display_phone,distance,id,image_url,is_closed,location.address1,...,location.display_address,location.state,location.zip_code,name,phone,price,rating,review_count,transactions,url
0,russian-samovar-restaurant-and-piano-bar-new-y...,"[{'alias': 'russian', 'title': 'Russian'}, {'a...",40.7632,-73.98469,(212) 757-0168,6468.137461,lSJD2Gcf9LjJqpLE7pUlWQ,https://s3-media2.fl.yelpcdn.com/bphoto/o8_HWE...,False,256 W 52nd St,...,"[256 W 52nd St, New York, NY 10019]",NY,10019,Russian Samovar Restaurant and Piano Bar,12127570168,$$,4.0,400,"[restaurant_reservation, pickup, delivery]",https://www.yelp.com/biz/russian-samovar-resta...
1,farida-central-asian-cuisine-and-grill-new-york-2,"[{'alias': 'uzbek', 'title': 'Uzbek'}, {'alias...",40.75568,-73.99441,(646) 863-2020,5576.668196,tXZOJhUUknjTRRo9j4ckZg,https://s3-media1.fl.yelpcdn.com/bphoto/8PDT9g...,False,498 9th Ave,...,"[498 9th Ave, New York, NY 10018]",NY,10018,Farida Central Asian Cuisine & Grill,16468632020,$$,4.5,165,"[delivery, pickup]",https://www.yelp.com/biz/farida-central-asian-...
2,russian-vodka-room-new-york,"[{'alias': 'lounges', 'title': 'Lounges'}, {'a...",40.76349,-73.9848,(212) 307-5835,6503.730872,wJ9VnRlrJJP5QnVohBDjoA,https://s3-media3.fl.yelpcdn.com/bphoto/EJsBbP...,False,265 W 52nd St,...,"[265 W 52nd St, New York, NY 10019]",NY,10019,Russian Vodka Room,12123075835,$$,4.0,524,[],https://www.yelp.com/biz/russian-vodka-room-ne...
3,mari-vanna-new-york-2,"[{'alias': 'russian', 'title': 'Russian'}]",40.73878,-73.98858,(212) 777-1955,3739.97565,btLgjl8CEgW2wvB0ZI-Aqg,https://s3-media3.fl.yelpcdn.com/bphoto/Fp_3cL...,False,41 E 20th St,...,"[41 E 20th St, New York, NY 10003]",NY,10003,Mari Vanna,12127771955,$$$,4.0,619,"[pickup, delivery]",https://www.yelp.com/biz/mari-vanna-new-york-2...
4,tzarevna-new-york,"[{'alias': 'russian', 'title': 'Russian'}, {'a...",40.72082,-73.98874,(917) 388-2814,1768.212605,6Lz2pAw1LqIa7nKsRUT52A,https://s3-media4.fl.yelpcdn.com/bphoto/msxiBu...,False,154 Orchard St,...,"[154 Orchard St, New York, NY 10002]",NY,10002,Tzarevna,19173882814,,5.0,9,"[pickup, delivery]",https://www.yelp.com/biz/tzarevna-new-york?adj...


In [158]:
rus_gpd = gpd.GeoDataFrame(rus_res, geometry = gpd.points_from_xy(rus_res['coordinates.longitude'],rus_res['coordinates.latitude']))
ukr_gpd = gpd.GeoDataFrame(ukr_res, geometry = gpd.points_from_xy(ukr_res['coordinates.longitude'],ukr_res['coordinates.latitude']))
pol_gpd = gpd.GeoDataFrame(pol_res, geometry = gpd.points_from_xy(pol_res['coordinates.longitude'],pol_res['coordinates.latitude']))
ger_gpd = gpd.GeoDataFrame(ger_res, geometry = gpd.points_from_xy(ger_res['coordinates.longitude'],ger_res['coordinates.latitude']))
ita_gpd = gpd.GeoDataFrame(ita_res, geometry = gpd.points_from_xy(ita_res['coordinates.longitude'],ita_res['coordinates.latitude']))
fra_gpd = gpd.GeoDataFrame(fra_res, geometry = gpd.points_from_xy(fra_res['coordinates.longitude'],fra_res['coordinates.latitude']))
ara_gpd = gpd.GeoDataFrame(ara_res, geometry = gpd.points_from_xy(ara_res['coordinates.longitude'],ara_res['coordinates.latitude']))

# Step 3. Plot Pct Population

In [33]:
import folium
import osmnx as ox
import networkx as nx
import matplotlib.colors as mcolors
from folium.plugins import HeatMap
from IPython.display import IFrame
import requests
import json
import matplotlib.colors as mcolors
from xml.etree import ElementTree
from folium.plugins import MarkerCluster
from folium.map import *
from folium.plugins import MarkerCluster

## Step 3.1 Normalize the data column to 0 to 1 

In [34]:
neighborhood_data['pct_russian'] = neighborhood_data['russian_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_ukrainian'] = neighborhood_data['ukrainian_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_turkish'] = neighborhood_data['turkish_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_romanian'] = neighborhood_data['romanian_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_polish'] = neighborhood_data['polish_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_german'] = neighborhood_data['german_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_italian'] = neighborhood_data['italian_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_french'] = neighborhood_data['french_sum'] / neighborhood_data['B01001_001E_sum']
neighborhood_data['pct_arabian'] = neighborhood_data['arabian_sum'] / neighborhood_data['B01001_001E_sum']

In [35]:
x = neighborhood_data['pct_russian']
neighborhood_data['pct_russian_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_ukrainian']
neighborhood_data['pct_ukrainian_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_turkish']
neighborhood_data['pct_turkish_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_romanian']
neighborhood_data['pct_romanian_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_polish']
neighborhood_data['pct_polish_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_german']
neighborhood_data['pct_german_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_italian']
neighborhood_data['pct_italian_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_french']
neighborhood_data['pct_french_normalized'] = (x - x.min()) / (x.max() - x.min())

x = neighborhood_data['pct_arabian']
neighborhood_data['pct_arabian_normalized'] = (x - x.min()) / (x.max() - x.min())


In [36]:
# use a red-purple colorbrewer color scheme
def get_highlighted_style(feature):
    return {'weight': 2, 'color': 'red'}

colorbar = plt.get_cmap('RdPu')

def get_style_russian(feature):
    value = feature['properties']['pct_russian_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

def get_style_ukrainian(feature):
    value = feature['properties']['pct_ukrainian_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

def get_style_romanian(feature):
    value = feature['properties']['pct_romanian_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

def get_style_polish(feature):
    value = feature['properties']['pct_polish_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

def get_style_german(feature):
    value = feature['properties']['pct_german_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

def get_style_italian(feature):
    value = feature['properties']['pct_italian_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

def get_style_french(feature):
    value = feature['properties']['pct_french_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

def get_style_arabian(feature):
    value = feature['properties']['pct_arabian_normalized']
    rgb_color = colorbar(value)
    color = mcolors.rgb2hex(rgb_color)
    return {'weight': 0.25, 'color': color, 'fillColor': color, "fillOpacity": 0.75}

In [37]:
def get_colors(feature):
    if feature['rating']  == 1 or feature['rating']  == 1.5 :
        return 'beige'
    elif feature['rating'] == 2 or feature['rating'] == 2.5 :
        return 'orange'
    elif feature['rating'] == 3 or feature['rating'] == 3.5:
        return 'lighted'
    elif feature['rating'] == 4 or feature['rating'] == 4.5:
        return 'red'
    elif feature['rating'] == 5:
        return 'darkred'
    else:
        return 'lightgray'

In [39]:
rus_gpd["color"] = rus_gpd.apply(get_colors, axis=1)
ukr_gpd["color"] = ukr_gpd.apply(get_colors, axis=1)
pol_gpd["color"] = pol_gpd.apply(get_colors, axis=1)
ger_gpd["color"] = ger_gpd.apply(get_colors, axis=1)
ita_gpd["color"] = ita_gpd.apply(get_colors, axis=1)


In [40]:
rus_locs = rus_gpd[['coordinates.latitude', 'coordinates.longitude']].values.tolist()
ukr_locs = ukr_gpd[['coordinates.latitude', 'coordinates.longitude']].values.tolist()
pol_locs = pol_gpd[['coordinates.latitude', 'coordinates.longitude']].values.tolist()
ger_locs = ger_gpd[['coordinates.latitude', 'coordinates.longitude']].values.tolist()
ita_locs = ita_gpd[['coordinates.latitude', 'coordinates.longitude']].values.tolist()
fra_locs = fra_gpd[['coordinates.latitude', 'coordinates.longitude']].values.tolist()
ara_locs = ara_gpd[['coordinates.latitude', 'coordinates.longitude']].values.tolist()

NameError: name 'fra_gpd' is not defined

In [44]:
# initialize the map
m = folium.Map(location=[40.733189, -73.932258], zoom_start=11, tiles='cartodbpositron')

# add the GeoJson to the map
folium.GeoJson(
    neighborhood_data,
    name='Russian Population',
    style_function=get_style_russian,
    highlight_function=get_highlighted_style,
    tooltip=folium.GeoJsonTooltip(['ntaname', 'pct_russian'])
).add_to(m)


russian_res = FeatureGroup(name='Russian Restaurants')
for point in range(0, len(rus_locs)):
    russian_res.add_child(folium.Marker(rus_locs[point],icon=folium.Icon(color=rus_gpd["color"][point]),
                                        popup='Name: '+rus_gpd['name'][point]+'<br>Address: '+rus_gpd['location.address1'][point]))

m.add_child(russian_res)

folium.GeoJson(
    neighborhood_data,
    name='Ukranian Population',
    style_function=get_style_ukrainian,
    highlight_function=get_highlighted_style,
    tooltip=folium.GeoJsonTooltip(['ntaname', 'pct_ukrainian'])
).add_to(m)

ukrainian_res = FeatureGroup(name='Ukranian Restaurants')
for point in range(0, len(ukr_locs)):
    ukrainian_res.add_child(folium.Marker(ukr_locs[point],icon=folium.Icon(color=ukr_gpd["color"][point]),
                                        popup='Name: '+ukr_gpd['name'][point]+'<br>Address: '+ukr_gpd['location.address1'][point]))

m.add_child(ukrainian_res)
    
folium.GeoJson(
    neighborhood_data,
    name='Polish Population',
    style_function=get_style_polish,
    highlight_function=get_highlighted_style,
    tooltip=folium.GeoJsonTooltip(['ntaname', 'pct_polish'])
).add_to(m)

polish_res = FeatureGroup(name='Polish Restaurants')
for point in range(0, len(pol_locs)):
    polish_res.add_child(folium.Marker(pol_locs[point],icon=folium.Icon(color=pol_gpd["color"][point]),
                                        popup='Name: '+pol_gpd['name'][point]+'<br>Address: '+pol_gpd['location.address1'][point]))

m.add_child(polish_res)
    
folium.GeoJson(
    neighborhood_data,
    name='German Population',
    style_function=get_style_german,
    highlight_function=get_highlighted_style,
    tooltip=folium.GeoJsonTooltip(['ntaname', 'pct_german'])
).add_to(m)

german_res = FeatureGroup(name='German Restaurants')
for point in range(0, len(ger_locs)):
    german_res.add_child(folium.Marker(ger_locs[point],icon=folium.Icon(color=ger_gpd["color"][point]),
                                        popup='Name: '+ger_gpd['name'][point]+'<br>Address: '+ger_gpd['location.address1'][point]))

m.add_child(german_res)
    
folium.GeoJson(
    neighborhood_data,
    name='Italian Population',
    style_function=get_style_italian,
    highlight_function=get_highlighted_style,
    tooltip=folium.GeoJsonTooltip(['ntaname', 'pct_italian'])
).add_to(m)

italian_res = FeatureGroup(name='Italian Restaurants')
for point in range(0, len(ita_locs)):
    italian_res.add_child(folium.Marker(ita_locs[point],icon=folium.Icon(color=ita_gpd["color"][point]),
                                        popup='Name: '+ita_gpd['name'][point]+'<br>Address: '+ita_gpd['location.address1'][point]))

m.add_child(italian_res)

    
folium.GeoJson(
    neighborhood_data,
    name='French Population',
    style_function=get_style_french,
    highlight_function=get_highlighted_style,
    tooltip=folium.GeoJsonTooltip(['ntaname', 'pct_french'])
).add_to(m)





    
folium.LayerControl().add_to(m)


# avoid a rendering bug by saving as HTML and re-loading
m.save('./data/map.html')

#  Interactive plots

In [45]:
import pandas as pd
import geopandas as gpd
import numpy as np
import cartopy.crs as ccrs
from shapely.geometry import Point
from matplotlib import pyplot as plt
%matplotlib inline
import altair as alt
import hvplot.pandas

#   Res Plot

In [159]:
rus_res['type'] = 'Russian'
ukr_res['type'] = 'Ukrainian'
pol_res['type'] = 'Polish'
ger_res['type'] = 'German'
ita_res['type'] = 'Italian'
fra_res['type'] = 'French'
ara_res['type'] = 'Arabian'

In [160]:
rus = rus_res[['type','price','rating']]
ukr = ukr_res[['type','price','rating']]
pol = pol_res[['type','price','rating']]
ger = ger_res[['type','price','rating']]
ita = ita_res[['type','price','rating']]
fra = fra_res[['type','price','rating']]
ara = ara_res[['type','price','rating']]

In [162]:
frames = [rus, ukr, pol, ger, ita, fra, ara]
allRes = pd.concat(frames)
allRes['Count'] = 1
len(allRes)

112

In [163]:
Res = allRes.groupby(['price', 'type'])['rating'].mean()
Res.head()

price  type     
$      Arabian      2.0
       Polish       4.5
       Russian      4.5
       Ukrainian    4.0
$$     Arabian      3.4
Name: rating, dtype: float64

In [164]:
Average_Rating = Res.hvplot.bar(x='price', y='rating', by='type', rot=90)
hvplot.save(Average_Rating, 'Average_Rating.html')

In [165]:
Average_Rating

In [166]:
allRes.head()

Unnamed: 0,type,price,rating,Count
0,Russian,$$,4.5,1
1,Russian,,5.0,1
2,Russian,$$$,4.0,1
3,Russian,$$,4.0,1
4,Russian,$$,4.0,1


In [167]:
Res2 = allRes.groupby(['rating', 'type'])['Count'].sum().reset_index()
Res2.head()

Unnamed: 0,rating,type,Count
0,2.0,Arabian,3
1,2.5,Arabian,1
2,2.5,German,1
3,3.0,Russian,2
4,3.5,German,6


In [168]:
heatmap = Res2.hvplot.heatmap(x='type', y='rating', C='Count',
                height=300, width=500, flip_yaxis=False, rot=90)
heatmap.redim(rating='Rating')

In [169]:
hvplot.save(heatmap, 'Res_Rating_heatmap.html')

In [170]:
Res3 = allRes.groupby(['price', 'type'])['Count'].sum().reset_index()
Res3.head()

Unnamed: 0,price,type,Count
0,$,Arabian,1
1,$,Polish,5
2,$,Russian,2
3,$,Ukrainian,2
4,$$,Arabian,5


In [171]:
import holoviews as hv
PriceChart = Res3.hvplot(x='price', col='type', 
                  kind='line', width=200, 
                  height=150, rot=90)

In [172]:
hvplot.save(PriceChart, 'PriceChart.html')



In [173]:
PriceChart



#  Neighborhood Plot

In [175]:
neighborhood_data['pct_countries'] = neighborhood_data['pct_russian'] + neighborhood_data['pct_ukrainian'] + neighborhood_data['pct_polish'] + neighborhood_data['pct_german'] +  neighborhood_data['pct_italian'] + neighborhood_data['pct_french'] + neighborhood_data['pct_arabian']

In [176]:
neighborhood_clean = neighborhood_data[['B01001_001E_sum','pct_countries','boro_name']]

In [177]:
neighborhood_rename = pd.DataFrame()
neighborhood_rename['Total Population'] = neighborhood_clean['B01001_001E_sum']
neighborhood_rename['Borough'] = neighborhood_clean['boro_name']
neighborhood_rename['Percent Population From Selected Countries'] = neighborhood_clean['pct_countries']
neighborhood_rename.head()

Unnamed: 0,Total Population,Borough,Percent Population From Selected Countries
0,196146,Brooklyn,0.142236
1,120532,Queens,0.13647
2,65546,Queens,0.019605
3,57052,Queens,0.027922
4,61926,Manhattan,0.092481


In [178]:
brush = alt.selection(type='interval')


points = alt.Chart().mark_point().encode(
    x='Total Population:Q',
    y='Percent Population From Selected Countries:Q',
    color=alt.condition(brush, 'Borough:N', alt.value('lightgray'))
).properties(
    selection=brush,
    width=800
)


bars = alt.Chart().mark_bar().encode(
    y='Borough:N',
    color='Borough:N',
    x='count(Borough):Q'
).transform_filter(
    brush.ref() 
                
).properties(
width=800
)

chart = alt.vconcat(points, bars, data=neighborhood_rename) 


In [179]:
chart.save("neighborhood_chart.html")