## Choropleth Map - SanFrancisco

In [1]:
#pip install geopandas

import pandas as pd
import numpy as np
import matplotlib as mpl
import folium


In [220]:
# load data

path = 'https://cocl.us/sanfran_crime_dataset'
sf = pd.read_csv(path)
sf.head(2)

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212120
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212168


In [230]:
# rename column

sf.shape
sf.rename(columns = {'PdDistrict':'Neighborhood'}, inplace = True)  # to rename just a few columns


In [219]:
# way1-compact-prepare data

sf1=pd.DataFrame(sf['Neighborhood'].value_counts().reset_index(inplace=True))
# value_counts only work for series, need to convert to DataFrame
# reset_index to add new number index
# inplace = True to change dataset

sf1.head(2)
sf1.columns=['Neighborhood','Counts']  # change all column names
sf1

Unnamed: 0,Neighborhood,Counts
0,SOUTHERN,28445
1,NORTHERN,20100
2,MISSION,19503
3,CENTRAL,17666
4,BAYVIEW,14303
5,INGLESIDE,11594
6,TARAVAL,11325
7,TENDERLOIN,9942
8,RICHMOND,8922
9,PARK,8699


In [231]:
# way2 - Original

count=sf['Neighborhood'].value_counts().to_frame()  # don't forget inplace = True!!
count.reset_index(inplace=True)

count.rename(columns = {'index':'Neighborhood','Neighborhood':'Count'}, inplace = True) # verbose!
count.columns=['Neighborhood','Counts']
count

# add column count - value_counts only for series!!
# reset_index -- automatically add new index!!
# insert_index -- only add one index element


Unnamed: 0,Neighborhood,Counts
0,SOUTHERN,28445
1,NORTHERN,20100
2,MISSION,19503
3,CENTRAL,17666
4,BAYVIEW,14303
5,INGLESIDE,11594
6,TARAVAL,11325
7,TENDERLOIN,9942
8,RICHMOND,8922
9,PARK,8699


In [209]:
# map  
import geopandas as gpd

# geo = r'/Users/yuewu/Downloads/san-francisco.geojson'

geo = gpd.read_file('/Users/yuewu/Downloads/san-francisco.geojson')  # use geopandas to read file
print(geo.head()) # very import to get the feature name - Key_On

   OBJECTID  DISTRICT COMPANY  \
0         1   CENTRAL       A   
1         2  SOUTHERN       B   
2         3   BAYVIEW       C   
3         4   MISSION       D   
4         5      PARK       F   

                                            geometry  
0  POLYGON ((-122.40532 37.80687, -122.40440 37.8...  
1  MULTIPOLYGON (((-122.38822 37.79000, -122.3880...  
2  POLYGON ((-122.39342 37.70841, -122.39349 37.7...  
3  POLYGON ((-122.40802 37.76940, -122.40767 37.7...  
4  POLYGON ((-122.44665 37.77588, -122.44683 37.7...  


In [221]:
loc_sf=[37.775420706711, -122.403404791479]

map = folium.Map(zoom_start=12, location =loc_sf)

map.choropleth(
    geo_data=geo,
    data=count,
    columns=['Neighborhood', 'Count'],
    key_on='feature.properties.DISTRICT',
    ##threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='SanF',
    reset=True
)

folium.LayerControl().add_to(map)
display(map)