In [69]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [70]:
import json
import folium
from folium import plugins
from folium.plugins import MarkerCluster, Search

In [71]:
data = pd.read_csv('data/food-inspections.csv', index_col=0)
data.head()

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2213460,"WARREN PARK HEALTH & LIVING CENTER, LLC",WARREN PARK HEALTH & LIVING CENTER,2208054.0,Long Term Care,Risk 1 (High),6700 N DAMEN AVE,CHICAGO,IL,60645.0,2018-08-24T00:00:00,Canvass Re-Inspection,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",42.003389,-87.680461,"{'longitude': '-87.68046070936003', 'needs_rec..."
2213467,"LITTLE GEMS INTERNATIONAL (BELDEN),INC",LITTLE GEMS INTERNATIONAL,2216177.0,Daycare Above and Under 2 Years,Risk 1 (High),2301 N CLARK ST,CHICAGO,IL,60614.0,2018-08-24T00:00:00,License,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.923906,-87.639287,"{'longitude': '-87.63928669557026', 'needs_rec..."
2213434,J.P.E. DAY CARE CENTER,J.P.E. DAY CARE CENTER,2215626.0,Daycare (2 - 6 Years),Risk 1 (High),8625 S COTTAGE GROVE AVE,CHICAGO,IL,60619.0,2018-08-24T00:00:00,License Re-Inspection,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.737699,-87.604707,"{'longitude': '-87.60470700986774', 'needs_rec..."
2213426,ROYAL NUTRITON,ROYAL NUTRITION,2616205.0,Restaurant,Risk 2 (Medium),2624 W 59TH ST,CHICAGO,IL,60629.0,2018-08-24T00:00:00,License,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.786578,-87.68988,"{'longitude': '-87.68987967619805', 'needs_rec..."
2213418,JIMMYS BEST,JIMMY'S BEST,2617464.0,,Risk 2 (Medium),6201 S ASHLAND AVE,CHICAGO,IL,60636.0,2018-08-24T00:00:00,License,No Entry,,41.781204,-87.664113,"{'longitude': '-87.66411327951789', 'needs_rec..."


## Preprocessing

### Facility type

In [72]:
# Drop rows where the type of the facility is unknown
data = data[~data['Facility Type'].isna()]

In [73]:
# Get restaurants from the data
restaurants = data[data['Facility Type'].str.contains('restaurant', case=False)]

In [74]:
restaurants['Facility Type'].unique()

array(['Restaurant', 'RESTAURANT.BANQUET HALLS', 'GROCERY/RESTAURANT',
       'ROOFTOP/RESTAURANT', 'RESTAURANT/GAS STATION',
       'Theater & Restaurant', 'RESTAURANT/GROCERY STORE',
       'Grocery & Restaurant', 'RESTAURANT/BAR', 'GROCERY& RESTAURANT',
       'Restaurant(protein shake bar)', 'RESTAURANT/BAR/THEATER',
       'RESTAURANT AND LIQUOR', 'GAS STATION/RESTAURANT',
       'RESTAURANT/HOSPITAL', 'GROCERY/ RESTAURANT',
       'GROCERY STORE/ RESTAURANT', 'RESTAURANT/BAKERY',
       'RESTAURANT/GROCERY', 'TAVERN/RESTAURANT', 'grocery & restaurant',
       'BAKERY/ RESTAURANT', 'RESTAURANT/LIQUOR', 'bakery/restaurant',
       'GROCERY & RESTAURANT', 'tavern/restaurant'], dtype=object)

### Latitude, Longitude, Zip

In [75]:
# Drop values where Longitude or Latitude is NaN (necessary to do before folium map)
# TODO: Populate those values from Google API results
restaurants = restaurants[~((restaurants['Longitude'].isna()) | (restaurants['Latitude'].isna()))]

Now, we should try to complete missing values for Zip based on Latitude and Longitude

In [76]:
# Check if there are missing Zip values
restaurants_zip_na = restaurants[restaurants['Zip'].isna()]
print('There are {0} missing values for Zip column'.format(restaurants_zip_na.shape[0]))
print('There are {0} missing values for City column'.format(restaurants[restaurants['City'].isna()].shape[0]))

There are 29 missing values for Zip column
There are 76 missing values for City column


In [77]:
import shapely
from shapely.geometry import shape, Point

In [78]:
def create_points(df):
    coords = list(zip(df['Longitude'], df['Latitude']))
    res = []
    for coord in coords:
        # Because there is also geojson Point which I use so that's why I'm importing like this
        res.append(shapely.geometry.Point(coord))
    return res

In [79]:
points = create_points(restaurants_zip_na)

In [80]:
# Method which checks whether the points are in area described in geojson file and returns data with zip value for found points
def populate_missing_zip(points,geojson_filename):
    # load GeoJSON file containing sectors
    state_geo_path = r'{0}'.format(geojson_filename)
    geo_json_data = json.load(open(state_geo_path))

    zip_found = []
    # check each polygon to see if it contains the point
    for feature in geo_json_data['features']:
        polygon = shape(feature['geometry'])
        for point in points: 
            if polygon.contains(point):
                point_complete = {'Longitude':point.x, 'Latitude':point.y, 'Zip':feature.get('properties', {}).get('zip')}
                zip_found.append(point_complete)
    return zip_found

In [81]:
zip_found = populate_missing_zip(points,'chicago-zip.json')
print('Total {0} point found matching sector.'.format(len(zip_found)))

Total 29 point found matching sector.


In [82]:
zip_found = pd.DataFrame(zip_found)
zip_found.head()

Unnamed: 0,Longitude,Latitude,Zip
0,-87.673459,42.019032,60626
1,-87.673459,42.019032,60626
2,-87.673459,42.019032,60626
3,-87.659816,41.968491,60640
4,-87.659816,41.968491,60640


In [83]:
# Before merging, drop duplicate points (TODO: maybe drop duplicate before creating points)
zip_found.drop_duplicates(inplace=True)

In [84]:
restaurants = restaurants.merge(zip_found,on=['Latitude','Longitude'], how='left',suffixes=('', '_notnull'))
restaurants.Zip.fillna(value=restaurants.Zip_notnull, inplace=True)
restaurants.drop(columns=["Zip_notnull"],inplace=True)

In [85]:
print('There are {0} missing Zip left in the restaurant dataset.'.format(restaurants[restaurants.Zip.isnull()].shape[0]))

There are 0 missing Zip left in the restaurant dataset.


In [86]:
# Fix city values, check if they have Zip number which corresponds to Chicago Zips

In [87]:
# Get all Chicago zips 
def create_chicago_zip_list():
    state_geo_path = r'chicago-zip.json'
    geo_json_data = json.load(open(state_geo_path))

    zips = []
   
    for feature in geo_json_data['features']:
        zips.append(str(feature.get('properties', {}).get('zip')))
    return set(zips)

In [88]:
chicago_zip = create_chicago_zip_list()
len(chicago_zip)

59

<div style="border:1px solid black; padding:10px 10px; background-color: LightPink;">
    <strong>In orded to compare Zip codes, their value should be sting! </strong>
</div>

In [89]:
restaurants['Zip']=restaurants['Zip'].apply(lambda x: str(int(x)))

In [90]:
# Check if there is any restaurant not in Chicago
not_in_chicago = len(restaurants[restaurants.City.isna() & (~restaurants.Zip.isin(chicago_zip))])
print('There are {0} Zip values which are not in Chicago.'.format(not_in_chicago))

There are 0 Zip values which are not in Chicago.


In [91]:
# Replace all City missing values with Chicago
restaurants.City.fillna(value='Chicago', inplace=True)

In [92]:
print('There are {0} missing City values left in the restaurant dataset.'.format(restaurants[restaurants.City.isnull()].shape[0]))

There are 0 missing City values left in the restaurant dataset.


## Visualising the restaurants

In [93]:
m = folium.Map([41.8781, -87.6298], zoom_start=15)
m

In [94]:
# Visualize unique restaurants only
restaurants_unique = restaurants[['DBA Name','Latitude','Longitude']].drop_duplicates()

In [95]:
restaurants_unique.head()

Unnamed: 0,DBA Name,Latitude,Longitude
0,ROYAL NUTRITON,41.786578,-87.68988
1,OMAKASE YUME,41.883053,-87.644689
2,LEGEND TASTY HOUSE,41.938784,-87.644209
3,HERITAGE CAFE,41.75146,-87.578908
4,MYRON MIXON'S SMOKE SHOW BARBECUE,41.951176,-87.659542


In [96]:
# All restaurants in one map
from folium.plugins import MarkerCluster

map1 = folium.Map(location=[41.8781, -87.6298], zoom_start=10)

#TODO: Try to change color based on pass/fail
def color(magnitude):
    if magnitude<41.9:
        col='green'
    else:
        col='red'
    return col

cluster = MarkerCluster()

for row in restaurants_unique.itertuples():
    folium.Marker([row[2],row[3]],
              popup=row[1],
              icon=folium.Icon(color='red', icon='info-sign')
              ).add_to(cluster)
map1.add_child(cluster)    
map1.save("all_restaurants_map.html")    

Link for additional interaction
https://blog.dominodatalab.com/creating-interactive-crime-maps-with-folium/
https://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/plugin-Search.ipynb

In [97]:
#ICONS
#I keep finding this result in my Google searches for list of icons and always ending running help(folium.Icon) to find the following urls:

#https://github.com/lvoogdt/Leaflet.awesome-markers
#https://fontawesome.com/icons?from=io
#So this is a service to future me.

#You can find a full list here:
#https://fontawesome.com/icons?d=gallery
#use with: prefix='fa'

#or the glyphicon icons of Bootstrap that are built-in and standard, thus without prefix needed. https://getbootstrap.com/docs/3.3/components/

In [None]:
# Search with marker cluster is not implemented, It can work with a smaller amount of restaurants
from folium.plugins import MarkerCluster, Search

map2 = folium.Map(location=[41.8781, -87.6298], zoom_start=10)

#Changing color works!!
def get_color(magnitude):
    if magnitude<41.9:
        col='green'
    else:
        col='red'
    return col

cluster2 = MarkerCluster()

for row in restaurants_unique.itertuples():
    folium.Marker([row[2],row[3]],
              popup=row[1],
              icon=folium.Icon(color=get_color(row[2]), icon='info-sign')
              ).add_to(cluster2)
    
points = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"name": "ROYAL NUTRITON"
},
"geometry": {
"type": "Point",
"coordinates": [-87.689880,41.786578]
}
},
{
"type": "Feature",
"properties": {
"name": "two"
},
"geometry": {
"type": "Point",
"coordinates": [-74.78991444, 42.82995815]
}
},
{
"type": "Feature",
"properties": {
"name": "three"
},
"geometry": {
"type": "Point",
"coordinates": [-78.56603306, 39.17929819]
}
}
]
}
# Search with marker cluster is not implemented
geojson_obj = folium.GeoJson(points).add_to(map2)
Search(layer=geojson_obj,
                     geom_type='Point',
                     placeholder="Search",
                     collapsed=True,
                     search_label='name',
                     search_zoom=14,
                     position='topright'
                    ).add_to(map2)    

#Search(cluster2, search_zoom=6, geom_type="Polygon").add_to(map2)
map2.add_child(cluster2)    
map2.save("search_example.html") 

In [None]:
state_geo_path = r'chicago-zip.json'
geo_json_data = json.load(open(state_geo_path))

In [None]:
# Code for safe to eat vs dangerous to eat places
# TODO: replace the coloring function (checking from dataset)
state_geo_path = r'chicago-zip.json'
geo_json_data = json.load(open(state_geo_path))
def color_zip(data):
    zip = data.get('properties', {}).get('zip')
    if('3' in zip):
        col='green'
    else:
        col='red'
    return col

results_map = folium.Map(location=[41.8781, -87.6298], zoom_start=10)
folium.GeoJson(
    state_geo_path,
    style_function=lambda feature: {
        'fillColor': color_zip(feature),
        'color' : 'black',
        'weight' : 2,
        'dashArray' : '5, 5'
        }
    ).add_to(results_map)

results_map.save('neighborhood_viz_two_colors_example.html')

In [None]:
# Transform restaurants data into geojson (convert all restaurants to points)
# I tried this because search map works only with Geojson
from geojson import Point, Feature, FeatureCollection, dump

point = Point((-87.689880, 41.786578))
features = []
for row in restaurants_unique.itertuples():
    # row[1] is name, row[2] latitude, row[3] longitude
    point = Point((row[3], row[2]))
    features.append(Feature(geometry=point, properties={"name": row[1]}))

feature_collection = FeatureCollection(features)

with open('restaurants.geojson', 'w') as f:
    dump(feature_collection, f)


In [None]:
# Works but the map cannot load completely, there are too many markers -> Any idea how to reduce number of restaurants we show?
map2 = folium.Map(location=[41.8781, -87.6298], zoom_start=16)
state_geo_path = r'restaurants.geojson'
geo_json_data = json.load(open(state_geo_path))
geojson_obj = folium.GeoJson(geo_json_data).add_to(map2)
Search(layer=geojson_obj,
                     geom_type='Point',
                     placeholder="Search",
                     collapsed=True,
                     search_label='name',
                     search_zoom=14,
                     position='topright'
                    ).add_to(map2)    
map2.save("map5.html") 

## Assessing how "dangerous" is the neighborhood? 

In [None]:
restaurants['Results'].unique()

In [None]:
# Get contigency table
inspections_scores_by_zip = restaurants.groupby(['Zip','Results']).size().unstack('Results', fill_value=0)
inspections_scores_by_zip.head()

If you are suspended, which means Out of Business of Business Not Located (according to the docs), you will get -2 point. If you failed, it will be -1 points. If you pass with condition, you will get 0.5 points. If you passed, you get 1 point. We then divide that with the total amount of restaurants. I made up this formula haha, so we can change it anytime.

In [None]:
def get_safety_score(df):
    score = (-2)*df['Business Not Located']+(-2)*df['Out of Business']+(-1)*df['Fail']+0.5*df['Pass w/ Conditions']+1*df['Pass'] 
    number_of_inspections = df.sum(axis=1)
    return score/number_of_inspections

In [None]:
# Add safety score to DF
inspections_scores_by_zip['Safety score'] = pd.Series(get_safety_score(inspections_scores_by_zip))
inspections_scores_by_zip.head()

Now, let's visualize.

In [None]:
# We need Zip to be a column in order to visualize, and Zip must be string
inspections_scores_by_zip.reset_index(level=0,inplace=True)
inspections_scores_by_zip['Zip'] = inspections_scores_by_zip['Zip'].apply(lambda x: str(x))

In [None]:
state_geo_path = r'chicago-zip.json'
geo_json_data = json.load(open(state_geo_path))

# Color palettes
# 'BuGn', 'BuPu', 'GnBu', 'OrRd', 'PuBu', 'PuBuGn', 'PuRd', 'RdPu','YlGn', 'YlGnBu', 'YlOrBr', and 'YlOrRd'

map3 = folium.Map(location=[41.8781, -87.6298], zoom_start=10)
map3.choropleth(geo_data=geo_json_data, data=inspections_scores_by_zip,
             columns=['Zip', 'Safety score'],
             key_on='feature.properties.zip',
             fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
             legend_name='Safe to eat percentage (%)')
map3.save('safe-vs-dangerous-map.html')

Do you know how they always tell you that the best food you will eat on your trip will be in some restaurant recommended by locals? They know good places which are far from the tourists' crowd, with cheaper prices as well. If you are lucky enough to know someone from Chicago, he can show you one such hidden gem. However, if you decide to risk it on your own and choose a district which is away from the main tourist points, then in general you risk to go to a place with low quality. 


<div style="border:1px solid black; padding:10px 10px; background-color: LightGreen;">
    According to the safety scores we calculated, it seems that the safest places to eat are the <strong>the two Airports and the City Center.</strong> That's right, you may need to pay extra, but you can be quite sure that the restaurant you are eating is fulfulling the regulations. As you are going further from this places, the risk increases. 
</div>