In [77]:
import pandas as pd
import numpy as np
from matplotlib import rcParams
# figure size in inches
rcParams['figure.figsize'] = 11.7,8.27
import seaborn as sns
import json
import folium
import folium.plugins
from folium.plugins import TimestampedGeoJson
from pathlib import Path
from folium import plugins
from area import area
from folium.plugins import Search

from folium.plugins import HeatMap
from folium.plugins import MiniMap

coords_chicago = [41.8781, -87.6298]

In [83]:
cfi_recent = pd.read_csv('data/chicago-food-inspections/scrapped_with_categories.csv')
cfi_recent.drop(cfi_recent[[cfi_recent['Latitude'].iloc[i][0]=='[' for i in range(cfi_recent.shape[0])]].index, inplace=True)
cfi_insp = pd.read_csv('data/cfi_insp.csv')
crimes = pd.read_csv('data/crimes_clean.csv')

cfi_score = pd.read_csv('data/cfi_score.csv')
cfi_recent = cfi_recent.merge(cfi_score, on='License #')

cfi_recent.drop_duplicates(['DBA Name', 'Longitude', 'Latitude'], inplace=True)

### Heatmap

First, we plotted a density map of Chicago restaurants to have an idea which part of the city hosted most of the restaurants.

In [109]:
area_geo_path = r'city.geojson'
geo_json_chicago_area = json.load(open(area_geo_path))

In [111]:
data = cfi_recent[['Latitude', 'Longitude', 'Yelp rating']].dropna()
restaurant_density = folium.Map(location=coords_chicago, zoom_start=10) 
# add the boundaries to the map
folium.GeoJson(geo_json_chicago_area, style_function=lambda x: {'color': 'grey', 'fillOpacity':0.2}).add_to(restaurant_density)

# List comprehension to make out list of lists
heat_data = [[row['Latitude'] ,row['Longitude']] for index, row in data.iterrows()]

# Plot it on the map
HeatMap(heat_data, min_opacity=0.1, radius=15, blur=5).add_to(restaurant_density)

#Minimap
minimap = MiniMap(toggle_display=True, position='bottomleft')
restaurant_density.add_child(minimap)

#Full screen --> Doesn't work on the HTML website
#plugins.Fullscreen(
#    position='topright',
#    title='Expand me',
#    title_cancel='Exit me',
#    force_separate_button=True
#).add_to(restaurant_density)
restaurant_density.save('density.html')

### Evolution map

You can find bellow the code used to plot the time evolving map of the inspection results of Chicago restaurants. In this map, each point represents the results of an inspection to a restaurants (either fail, pass or pass w/ conditions).

In [101]:
#Function used to had a column color in the dataset, each color corresponding to wether the restaurants pass/pass with condition or failed the inspection
def results_to_color(s):
    '''
    Function creating a column color for our cfi_insp dataset
    
    Imput: 
            s: string containing the results of the inspection
    Output:
            color corresponding to the inspection: red if failed, orange if pass with conditions, green if pass
    '''
    if s=='Fail':
        return 'darkred'
    if s=='Pass':
        return 'orange'
    if s=='Pass w/ Conditions':
        return 'red'

In [102]:
cfi_insp['color'] = cfi_insp['Results'].apply(results_to_color)

In [103]:
#Note: this code has been inspired by an article written on the website TowardDataScience.

def create_geojson_features(df):
    '''
    This function enables us to create a dict of features, which will be later on used to create the dynamic map
    
    Imput: 
            df: Data set as a Pandas DataFrame
    Output: 
            features: Dictionnary of features
    '''
    features = []
    for _, row in df.iterrows():
        feature = {
            'type': 'Feature',
            'geometry': {
                'type':'Point', 
                'coordinates':[row['Longitude'],row['Latitude']]
            },
            'properties': {
                'time': str(row['Inspection Date'])[0:4] + '/' + str(row['Inspection Date'])[5:7], 
                'popup': '<b>Name: </b>'+ str(row['DBA Name']) + '\n'+ '<br><b>Reason of inspection: </b>' + str(row['Inspection Type']),
                'style': {'color' : row['color']},
                'icon': 'circle',
                'iconstyle':{
                    'fillColor': row['color'],
                    'fillOpacity': 1,
                    'stroke': 'False',
                    'radius': 5
                }
            }
        }
        features.append(feature)
    return features

In [104]:
def make_map(features, duration=None):
    '''
    This function enables us to create the map, using the features created before.
    
    Imput:
            features: Dictionnary of features
            duration: Time that the point will stay on the map, None by default, meaning that the point will stay 
            forever on the map after having been put (other argument could by 'P1M' for instance, meaning that the 
            point will only stay one period of time)
    Output:
            Folium map containing the restaurants
    '''
    coords_chicago = [41.8781, -87.6298]
    chicago_map = folium.Map(location=coords_chicago, control_scale=True, zoom_start=10)
    
    data = TimestampedGeoJson(
        {'type': 'FeatureCollection',
        'features': features}
        , period='P1M'
        , duration=duration
        , add_last_point=True
        , auto_play=False
        , loop=False
        , max_speed=1
        , loop_button=True
        , date_options='YYYY/MM'
        , time_slider_drag_update=True
    ).add_to(chicago_map)
    
    item_txt = """<i class="fa fa-circle fa-1x" style="color:{col}"></i>  &nbsp;{item} """
    html_itms_1 = item_txt.format(item="Pass", col="orange")
    item_txt = """<br> &nbsp; <i class="fa fa-circle fa-1x" style="color:{col}"></i>  &nbsp;{item} """
    html_itms_2 = item_txt.format(item="Pass w/ conditions", col="red")
    html_itms_3 = item_txt.format(item="Fail", col="darkred")
    
    legend_html = """
         <div style="
         position: fixed; 
         top: 20px; right: 20px; width: 170px; height: 65px; 
         border:2px solid grey; z-index:9999; 

         background-color:white;
         opacity: .85;

         font-size:14px;
         font-weight: bold;
         ">&nbsp; {itm_txt}
          </div> """.format(itm_txt=html_itms_1+html_itms_2+html_itms_3)
    chicago_map.get_root().html.add_child(folium.Element(legend_html))
    
    return chicago_map

In [105]:
def create_the_map(df, N, duration):
    '''
    This function used the previous function to created the features and the map associated with it. Note that, we have
    to take a subsample of our dataset if we used to put a None for the duration, otherwise, the maps becomes too crowded
    and is not readable anymore.
    
    Imput:
            df: Data set as a Pandas DataFrame
            N: Number of samples that we want to take from our dataset
            duration: Time that the point will stay on the map, None by default, meaning that the point will stay 
            forever on the map after having been put (other argument could by 'P1M' for instance, meaning that the 
            point will only stay one period of time)
    Output:
            Folium map containing the restaurants   
            
    '''
    data = df.sample(N) #Only take a subsample of our dataset to not obtain something too crowded
    features = create_geojson_features(data)
    return make_map(features, duration)

In [114]:
map_1 = create_the_map(cfi_insp, 68000, 'P1M')
folium.GeoJson(geo_json_chicago_area, style_function=lambda x: {'color': 'grey', 'fillOpacity':0.2}).add_to(map_1)
map_1.save('map_evolution_inspection_full.html')
#map_1

## Yelp rating

Plot on a map the restaurants with their rating on Yelp. On this plot, the color of each points represent the rating of the restaurants. Note that due to avoid to have a very crowded map, we only plotted some points and not the all dataset.

In [96]:
def yelp_to_color(num):
    if 1<=num and num<2:
        return 'beige'
    if 2<=num and num<3:
        return 'orange'
    if 3<=num and num<4:
        return 'red'
    if 4<=num and num<=5:
        return 'darkred'
    else:
        return 'black'

In [97]:
cfi_recent['Yelp color']=cfi_recent['Yelp rating'].apply(yelp_to_color)

#### Simple map

In [416]:
N_top = 250 #Display the 250 first restaurants in term of number of review
chicago_map = folium.Map(location=coords_chicago, control_scale=True, zoom_start=10)
folium.GeoJson(geo_json_chicago_area, style_function=lambda x: {'color': 'grey', 'fillOpacity':0.2}).add_to(chicago_map)

data = cfi_recent[['Latitude', 'Longitude', 'Yelp name', 'Yelp color', 'Yelp review count']].dropna().sort_values(by='Yelp review count', ascending=False) #.sample(50)
for i in range(N_top):
    row = data.iloc[i]
    folium.Marker([row['Latitude'], row['Longitude']], tooltip = row['Yelp name'],icon=folium.Icon(color=row['Yelp color'], icon='cutlery')).add_to(chicago_map)
    
chicago_map.save('Yelp_rating.html')

#### Dual map

This part enables us to plot a dual map which will contain two maps with the same spirit as the previous one. Plotting two maps enables us to see the difference between two groups of restaurants. Namely here we choose to highlhight the difference between the restaurants with the highest number of reviews on Yelp and the one with the smallest number of reviews.

In [116]:
chicago_dual_map = plugins.DualMap(location=coords_chicago, tiles=None, zoom_start=10)
folium.GeoJson(geo_json_chicago_area, style_function=lambda x: {'color': 'grey', 'fillOpacity':0.2}).add_to(chicago_dual_map)

folium.TileLayer('openstreetmap').add_to(chicago_dual_map)

N_top = 100 #Display the 150 first restaurants in term of number of review

data = cfi_recent[['Latitude', 'Longitude', 'Yelp name', 'Yelp color', 'Yelp review count']].dropna().sort_values(by='Yelp review count', ascending=False) #.sample(50)
for i in range(N_top):
    row_1 = data.iloc[i]
    folium.Marker([row_1['Latitude'], row_1['Longitude']], tooltip = row_1['Yelp name'],icon=folium.Icon(color=row_1['Yelp color'], icon='cutlery')).add_to(chicago_dual_map.m1)
    row_2 = data.iloc[data.shape[0]-1-i]
    folium.Marker([row_2['Latitude'], row_2['Longitude']], tooltip = row_2['Yelp name'],icon=folium.Icon(color=row_2['Yelp color'], icon='cutlery')).add_to(chicago_dual_map.m2)

    
item_txt = """<i class="fa fa-circle fa-1x" style="color:{col}"></i>  &nbsp;{item} """
html_itms_1 = item_txt.format(item="Yelp rating < 2", col="#fbf3d4")
item_txt = """<br> &nbsp; <i class="fa fa-circle fa-1x" style="color:{col}"></i>  &nbsp;{item} """
html_itms_2 = item_txt.format(item="2 "+"\u2264"+" Yelp rating < 3", col="orange")
html_itms_3 = item_txt.format(item="3 "+"\u2264"+" Yelp rating < 4", col="red")
html_itms_4 = item_txt.format(item="4 "+"\u2264"+" Yelp rating", col="darkred")

legend_html = """
             <div style="
             position: fixed; 
             top: 20px; right: 20px; width: 170px; height: 85px; 
             border:2px solid grey; z-index:9999; 

             background-color:white;
             opacity: .85;

             font-size:14px;
             font-weight: bold;
             ">&nbsp; {itm_txt}
              </div> """.format(itm_txt=html_itms_1+html_itms_2+html_itms_3+html_itms_4)
chicago_dual_map.get_root().html.add_child(folium.Element(legend_html))
    
chicago_dual_map.save('dual.html')

## Final plot

The code bellow enables us to generate the final plots, which is a summary of most of the information contained in the previous plot. This plots is also highly interactive, the user will be able to choose the food category, search for a special area, having a view of the inspection score of any restaurant and their Yelp rating as well.

In [84]:
area_geo_path = r'./data/boundaries.geojson'
geo_json_chicago_area_crime = json.load(open(area_geo_path))

In [85]:
#For the crime zones
recent_crimes = crimes[crimes['Year'] == 2019]
cts = recent_crimes['Community Area'].value_counts()
n_areas = pd.DataFrame(cts.values, cts.index, columns=["crimes"])
n_areas['code'] = n_areas.index

# add a column on surface area of the each community area
n_areas['surface'] = np.nan
for feature in geo_json_chicago_area_crime['features']:
    idx = int(feature['properties']['area_num_1'])
    n_areas.loc[idx,'surface'] = area(feature['geometry'])

# calculate the number of crimes per surface area
n_areas['crimes density'] = n_areas['crimes']/n_areas['surface']

# reconvert the area code to string for binding with geojson data
n_areas['code'] = n_areas['code'].apply(str)

In [86]:
#Drop the rows which are not in any of the food category
cfi_recent.drop(cfi_recent[cfi_recent['Yelp category']=='[]'].index, inplace=True)

In [87]:
#Obtain the list of the differents categories
cfi_recent['Yelp category'] = cfi_recent['Yelp category'].apply(lambda x: eval(x))
list_category = pd.unique(cfi_recent.explode(column='Yelp category')['Yelp category'])

In [88]:
data = cfi_recent[['DBA Name', 'Longitude', 'Latitude', 'Yelp rating', 'Yelp category', 'Violations', 'Facility Type', 'Smoothed Food Inspection Score']].dropna() 

In [89]:
q_25 = cfi_recent['Smoothed Food Inspection Score'].quantile(0.25)
q_50 = cfi_recent['Smoothed Food Inspection Score'].quantile(0.5)
q_75 = cfi_recent['Smoothed Food Inspection Score'].quantile(0.75)
def cfi_score_to_color(num):
    if num<=q_25:
        return 'beige'
    if num<=q_50:
        return 'orange'
    if num <=q_75:
        return 'red'
    return 'darkred'

In [90]:
data['CFI color']=data['Smoothed Food Inspection Score'].apply(cfi_score_to_color)

In [98]:
for i in range(len(geo_json_chicago_area_crime['features'])):
    geo_json_chicago_area_crime['features'][i]['properties']['community']=geo_json_chicago_area_crime['features'][i]['properties']['community'].lower().capitalize()

for k in range(1,6):  
    m = folium.Map(location=[41.8781, -87.6298], zoom_start=10)
    mcg=folium.plugins.MarkerCluster(name=str(k-1) +' <= Yelp rating < ' + str(k), control=False, overlay=True, show=False)
    m.add_child(mcg)
    parent_group = data[data['Yelp rating']>=k-1]
        
    for j in range(list_category.shape[0]):
        group = parent_group[[list_category[j] in parent_group['Yelp category'].iloc[i] for i in range(parent_group.shape[0])]]
        g = folium.plugins.FeatureGroupSubGroup(
                mcg, 
                list_category[j].replace('_',' ').capitalize(), 
                show=False
            )
        
        for i in range(group.shape[0]):
            folium.Marker(
                (group['Latitude'].iloc[i], group['Longitude'].iloc[i]), 
                tooltip=group['DBA Name'].iloc[i].capitalize(),
                icon=folium.Icon(color=group['CFI color'].iloc[i], icon='cutlery'), 
                popup='<b>Yelp rating: </b>'+ str(group['Yelp rating'].iloc[i]) + '<br><b>Food category: </b>' 
                + list_category[j].replace('_',' ').capitalize()
            ).add_to(g)
        m.add_child(g) 
        
    #Minimap
    minimap = MiniMap(toggle_display=True, position='bottomleft')
    m.add_child(minimap)

    #Full screem --> Doesn't work on the website
    #plugins.Fullscreen(
    #    position='topright',
    #    title='Expand me',
    #    title_cancel='Exit me',
    #    force_separate_button=True
    #).add_to(m)

    folium.Choropleth(
        geo_data=geo_json_chicago_area_crime,
        name='Community areas (crime density)',
        data=n_areas,
        columns=['code', 'crimes density'],
        key_on='feature.properties.area_num_1',
        fill_color='YlGn',
        fill_opacity=0.7,
        line_opacity=0.5,
        line_color='black',
        legend_name='Density of crimes in 2019 per surface area',
        tooltip=folium.GeoJsonTooltip(
            fields=['community'],
            aliases=['Community:'],
            localize=True)
    ).add_to(m)

    citygeo = folium.GeoJson(
        geo_json_chicago_area_crime,
        show=False,
        name='Community areas names',
        tooltip=folium.GeoJsonTooltip(
            fields=['community'],
            aliases=['Community:'],
            localize=True),
        style_function = lambda x: {
        'fillColor': 'grey',
        'color': 'grey',
        'weight':2,
        'fillOpacity':0.1,
    }
    ).add_to(m)

    citysearch = Search(
        layer=citygeo,
        geom_type='Polygon',
        placeholder='Search for a Community',
        collapsed=True,
        search_label='community'
    ).add_to(m)
    
    item_txt = """<br> &nbsp; <i class="fa fa-circle fa-1x" style="color:{col}"></i>  &nbsp;{item} """
    html_itms_1 = item_txt.format(item="Poor", col="#fbf3d4")
    item_txt = """<br> &nbsp; <i class="fa fa-circle fa-1x" style="color:{col}"></i>  &nbsp;{item} """
    html_itms_2 = item_txt.format(item="Fair", col="orange")
    html_itms_3 = item_txt.format(item="Good", col="red")
    html_itms_4 = item_txt.format(item="Excellent", col="darkred")

    legend_html = """
                 <div style="
                 position: fixed; 
                 bottom: 20px; right: 20px; width: 170px; height: 105px; 
                 border:2px solid grey; z-index:9999; 

                 background-color:white;
                 opacity: .85;

                 font-size:14px;
                 font-weight: bold;
                 ">&nbsp; {title} 
                 {itm_txt}
                  </div> """.format(title="Sanitation Standard",itm_txt=html_itms_1+html_itms_2+html_itms_3+html_itms_4)
    m.get_root().html.add_child(folium.Element(legend_html))

    folium.LayerControl(collapsed=True).add_to(m)
    m.save('final_'+str(k-1)+'.html')

## Interactive crimes map

In [47]:
for i in range(len(geo_json_chicago_area_crime['features'])):
    geo_json_chicago_area_crime['features'][i]['properties']['community']=geo_json_chicago_area_crime['features'][i]['properties']['community'].lower().capitalize()

m = folium.Map(location=[41.8781, -87.6298], zoom_start=10)

folium.Choropleth(
    geo_data=geo_json_chicago_area_crime,
    name='Crimes area',
    data=n_areas,
    columns=['code', 'crimes density'],
    key_on='feature.properties.area_num_1',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.5,
    line_color='black',
    legend_name='Density of crimes from 2010 to present per surface area',
    tooltip=folium.GeoJsonTooltip(
        fields=['community'],
        aliases=['Community:'],
        localize=True)
).add_to(m)

citygeo = folium.GeoJson(
    geo_json_chicago_area_crime,
    name='US Cities',
    tooltip=folium.GeoJsonTooltip(
        fields=['community'],
        aliases=['Community:'],
        localize=True),
    style_function = lambda x: {
    'fillColor': 'grey',
    'color': 'grey',
    'weight':2,
    'fillOpacity':0.1,
}
).add_to(m)

citysearch = Search(
    layer=citygeo,
    geom_type='Polygon',
    placeholder='Search for a Community',
    collapsed=True,
    search_label='community'
).add_to(m)

folium.LayerControl().add_to(m)
m.save('crimes_area.html')