In [1]:
import numpy as np  # useful for many scientific computing in Python
import pandas as pd # primary data structure library
import matplotlib as mpl
import matplotlib.pyplot as plt

In [2]:
#!conda install -c conda-forge folium=0.5.0 --yes
#import folium
!pip install folium
import folium

print('Folium installed and imported!')

Folium installed and imported!


In [3]:
df_incidents = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/Police_Department_Incidents_-_Previous_Year__2016_.csv')
print('Dataset has been loaded')

Dataset has been loaded


In [4]:
df_incidents.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212120
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212168
2,141059263,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,BAYVIEW,"ARREST, BOOKED",KEITH ST / SHAFTER AV,-122.388856,37.729981,"(37.7299809672996, -122.388856204292)",14105926363010
3,160013662,NON-CRIMINAL,LOST PROPERTY,Tuesday,01/05/2016 12:00:00 AM,23:50,TENDERLOIN,NONE,JONES ST / OFARRELL ST,-122.412971,37.785788,"(37.7857883766888, -122.412970537591)",16001366271000
4,160002740,NON-CRIMINAL,LOST PROPERTY,Friday,01/01/2016 12:00:00 AM,00:30,MISSION,NONE,16TH ST / MISSION ST,-122.419672,37.76505,"(37.7650501214668, -122.419671780296)",16000274071000


In [5]:
df_incidents.PdDistrict.unique()

array(['SOUTHERN', 'BAYVIEW', 'TENDERLOIN', 'MISSION', 'NORTHERN',
       'TARAVAL', 'INGLESIDE', 'CENTRAL', 'RICHMOND', 'PARK', nan],
      dtype=object)

In [6]:
df_incidents.PdDistrict.isna().sum()

1

In [7]:
df_incidents.PdDistrict.dropna(axis=0, inplace=True)
df_incidents.PdDistrict.isna().sum()

1

In [8]:
df_incidents.PdDistrict.unique()

array(['SOUTHERN', 'BAYVIEW', 'TENDERLOIN', 'MISSION', 'NORTHERN',
       'TARAVAL', 'INGLESIDE', 'CENTRAL', 'RICHMOND', 'PARK', nan],
      dtype=object)

In [9]:
df_incidents.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212120
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212168
2,141059263,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,BAYVIEW,"ARREST, BOOKED",KEITH ST / SHAFTER AV,-122.388856,37.729981,"(37.7299809672996, -122.388856204292)",14105926363010
3,160013662,NON-CRIMINAL,LOST PROPERTY,Tuesday,01/05/2016 12:00:00 AM,23:50,TENDERLOIN,NONE,JONES ST / OFARRELL ST,-122.412971,37.785788,"(37.7857883766888, -122.412970537591)",16001366271000
4,160002740,NON-CRIMINAL,LOST PROPERTY,Friday,01/01/2016 12:00:00 AM,00:30,MISSION,NONE,16TH ST / MISSION ST,-122.419672,37.76505,"(37.7650501214668, -122.419671780296)",16000274071000


In [10]:
# clean up the dataset to remove unnecessary columns (eg. REG) 
df_incidents.drop(['IncidntNum','Category','Descript','DayOfWeek','Date', 'Time', 'Resolution','Address', 'X', 'Y', 'Location', 'PdId'], axis=1, inplace=True)

# let's rename the columns so that they make sense
df_incidents.rename(columns={'PdDistrict':'Neighborhood'}, inplace=True)

# for sake of consistency, let's also make all column labels of type string
# df_can.columns = list(map(str, df_can.columns))

df_incidents.head()

Unnamed: 0,Neighborhood
0,SOUTHERN
1,SOUTHERN
2,BAYVIEW
3,TENDERLOIN
4,MISSION


In [11]:
df_hoods = df_incidents.Neighborhood.value_counts()
df_hoods.head(11)

SOUTHERN      28445
NORTHERN      20100
MISSION       19503
CENTRAL       17666
BAYVIEW       14303
INGLESIDE     11594
TARAVAL       11325
TENDERLOIN     9942
RICHMOND       8922
PARK           8699
Name: Neighborhood, dtype: int64

In [12]:
df_hoods = pd.DataFrame(df_hoods)
df_hoods.head()

Unnamed: 0,Neighborhood
SOUTHERN,28445
NORTHERN,20100
MISSION,19503
CENTRAL,17666
BAYVIEW,14303


In [13]:
df_hoods = df_hoods.reset_index()
df_hoods.head()

Unnamed: 0,index,Neighborhood
0,SOUTHERN,28445
1,NORTHERN,20100
2,MISSION,19503
3,CENTRAL,17666
4,BAYVIEW,14303


In [14]:
df_hoods.columns = ['Neighborhood', 'Count']
df_hoods.head(10)

Unnamed: 0,Neighborhood,Count
0,SOUTHERN,28445
1,NORTHERN,20100
2,MISSION,19503
3,CENTRAL,17666
4,BAYVIEW,14303
5,INGLESIDE,11594
6,TARAVAL,11325
7,TENDERLOIN,9942
8,RICHMOND,8922
9,PARK,8699


In [15]:
# download countries geojson file
#!wget --quiet https://cocl.us/sanfran_geojson -O san_fran_hoods.json
    
#print('GeoJSON file downloaded!')

In [16]:
san_fran_hoods = r'https://cocl.us/sanfran_geojson'

In [17]:
# create a numpy array of length 10 and has linear spacing from the minium incidents to the maximum incidents
threshold_scale = np.linspace(df_hoods['Count'].min(),
                              df_hoods['Count'].max(),
                              6, dtype=int)
threshold_scale

array([ 8699, 12648, 16597, 20546, 24495, 28445])

In [18]:
san_fran_hoods

'https://cocl.us/sanfran_geojson'

In [19]:
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum



In [None]:
# let Folium determine the scale.
world_map = folium.Map(location=[37.759308, -122.438632], zoom_start=12)
world_map.choropleth(
    geo_data = san_fran_hoods,
    data = df_hoods,
    columns=['Neighborhood', 'Count'],
    key_on='features.properties.DISTRICT',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Crime Rate in San Francisco')
world_map


