## Importing libraries

In [106]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.plotly as py
import cufflinks as cf
import folium
import statsmodels.api as sm
from pylab import rcParams



cf.go_offline()

In [128]:
rcParams['figure.figsize'] = 20, 10

## Importing dataset

In [2]:
chicago_cmr = pd.read_csv('chicago_clean.csv')

In [3]:
chicago_cmr.head()

Unnamed: 0,Date,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Year,Latitude,Longitude,Location
0,2001-01-01 00:00:00,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,0,0,631,6.0,2001,41.748623,-87.608654,"(41.748623027, -87.608654109)"
1,2001-01-01 00:00:00,1752,OFFENSE INVOLVING CHILDREN,AGG CRIM SEX ABUSE FAM MEMBER,RESIDENCE,0,0,624,6.0,2001,41.754802,-87.598147,"(41.754802305, -87.598146873)"
2,2001-01-01 00:00:00,810,THEFT,OVER $500,APARTMENT,0,0,2332,19.0,2001,41.938952,-87.638164,"(41.938952432, -87.638163582)"
3,2001-01-01 00:00:00,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,0,0,624,6.0,2001,41.752062,-87.607828,"(41.752061911, -87.607827503)"
4,2001-01-01 00:00:00,1754,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,0,0,1013,10.0,2001,41.850386,-87.715108,"(41.850385805, -87.715107802)"


In [4]:
chicago_cmr['Date'] = pd.to_datetime(chicago_cmr['Date'])

In [5]:
chicago_cmr.dtypes

Date                    datetime64[ns]
IUCR                            object
Primary Type                    object
Description                     object
Location Description            object
Arrest                           int64
Domestic                         int64
Beat                             int64
District                       float64
Year                             int64
Latitude                       float64
Longitude                      float64
Location                        object
dtype: object

## Creating a circle marker map

In [12]:
chicago_crime_counts = pd.DataFrame(chicago_cmr['Location'].value_counts()).reset_index()

In [13]:
chicago_crime_counts = chicago_crime_counts.rename(columns={'index': 'Location', 'Location': 'Crime_Counts'})
chicago_crime_counts.head()

Unnamed: 0,Location,Crime_Counts
0,"(41.976290414, -87.905227221)",13154
1,"(41.754592961, -87.741528537)",9451
2,"(41.883500187, -87.627876698)",7048
3,"(41.897895128, -87.624096605)",4348
4,"(41.896888586, -87.628203192)",3139


In [28]:
chicago_arrest_counts = pd.DataFrame(chicago_cmr[chicago_cmr['Arrest'] == 1]['Location'].value_counts()).reset_index()

In [29]:
chicago_arrest_counts = chicago_arrest_counts.rename(columns={'index': 'Location', 'Location': 'Arrest_Count'})
chicago_arrest_counts.head()

Unnamed: 0,Location,Arrest_Count
0,"(41.883500187, -87.627876698)",4876
1,"(41.754592961, -87.741528537)",4547
2,"(41.976290414, -87.905227221)",3422
3,"(41.885487535, -87.726422045)",2179
4,"(41.909664252, -87.742728815)",2137


In [30]:
chicago_map = chicago_crime_counts.merge(chicago_arrest_counts, on='Location')

In [32]:
chicago_map = chicago_map[chicago_map['Crime_Counts'] > 499]

In [55]:
chicago_map.head()

Unnamed: 0,Location,Crime_Counts,Arrest_Count,Coordinates
0,"(41.976290414, -87.905227221)",13154,3422,"(41.976290414, -87.905227221)"
1,"(41.754592961, -87.741528537)",9451,4547,"(41.754592961, -87.741528537)"
2,"(41.883500187, -87.627876698)",7048,4876,"(41.883500187, -87.627876698)"
3,"(41.897895128, -87.624096605)",4348,2109,"(41.897895128, -87.624096605)"
4,"(41.896888586, -87.628203192)",3139,1999,"(41.896888586, -87.628203192)"


In [70]:
chicago_district = chicago_cmr[['Location', 'District']].drop_duplicates()

In [71]:
chicago_district.head()

Unnamed: 0,Location,District
0,"(41.748623027, -87.608654109)",6.0
1,"(41.754802305, -87.598146873)",6.0
2,"(41.938952432, -87.638163582)",19.0
3,"(41.752061911, -87.607827503)",6.0
4,"(41.850385805, -87.715107802)",10.0


In [72]:
chicago_map = chicago_map.merge(chicago_district, on='Location')

In [73]:
chicago_map.head()

Unnamed: 0,Location,Crime_Counts,Arrest_Count,Coordinates,District
0,"(41.976290414, -87.905227221)",13154,3422,"(41.976290414, -87.905227221)",16.0
1,"(41.754592961, -87.741528537)",9451,4547,"(41.754592961, -87.741528537)",8.0
2,"(41.883500187, -87.627876698)",7048,4876,"(41.883500187, -87.627876698)",1.0
3,"(41.897895128, -87.624096605)",4348,2109,"(41.897895128, -87.624096605)",18.0
4,"(41.896888586, -87.628203192)",3139,1999,"(41.896888586, -87.628203192)",18.0


In [44]:
def split_location(location):
    """
    Input: string
    Output: 2-tuple of floats
    
    # Removes the parenthesis from the string and splits it at the comma, 
    # resulting in two numbers that are parsed to float.
    """

    location = location[1:-1].split(',')
    latitude = float(location[0])
    longitude = float(location[1])
    
    return (latitude, longitude)


In [45]:
# Converting Location from a string to a tuple of floats
chicago_map['Coordinates'] = chicago_map['Location'].apply(split_location)

In [125]:
chicago_map_crime = folium.Map(location=[41.881832, -87.623177])

In [126]:
for i in range(chicago_map.shape[0]):
    if chicago_map['Crime_Counts'].iloc[i] > 1000:
        marker_color = '#ff0000'
    else:
        marker_color = '#e9fc65'
        
    popup = """
            Crimes: {}<br>
            Distric: {}<br>
            """
    popup = popup.format(chicago_map['Crime_Counts'].iloc[i],
                        chicago_map['District'].iloc[i])
    
    latitude = chicago_map['Coordinates'].iloc[i][0]
    longitude = chicago_map['Coordinates'].iloc[i][1]
    radius = chicago_map['Crime_Counts'].iloc[i] / 300
    
    folium.CircleMarker([latitude, longitude], popup=popup, radius=radius,
                        color=marker_color, fill=True).add_to(chicago_map_crime)
    
    
    

In [127]:
chicago_map_crime