In [1]:
import numpy as np
import pandas as pd

import json

from geopy.geocoders import Nominatim

import requests

from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

In [2]:
chicago_chunk = pd.read_csv("Chicago_Crime.csv", chunksize = 1000000)

In [3]:
chunk_list = []
features = ['ID', 'Date', 'Primary Type', 'District', 'Ward', 'Community Area', 'Latitude', 'Longitude']

for chunk in chicago_chunk:   
    chunk_filter = chunk.loc[:,features]
    chunk_filter.dropna(inplace = True)
    chunk_filter['Date'] = pd.to_datetime(chunk_filter['Date'], format = '%m/%d/%Y %I:%M:%S %p')
    chunk_filter = chunk_filter.loc[chunk_filter['Date'] >= '01/01/2015',:]
    chunk_list.append(chunk_filter)
    
chicago_data = pd.concat(chunk_list)

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
print(chicago_data.shape)
chicago_data.head()

(1338577, 8)


Unnamed: 0,ID,Date,Primary Type,District,Ward,Community Area,Latitude,Longitude
60329,11556037,2019-01-03 19:20:00,PUBLIC PEACE VIOLATION,16.0,41.0,76.0,42.002816,-87.906094
62255,11626027,2019-03-16 17:58:00,BATTERY,1.0,42.0,32.0,41.883369,-87.63386
62597,11622422,2019-03-12 22:00:00,THEFT,2.0,4.0,36.0,41.825347,-87.606781
62630,11625922,2019-03-14 18:42:00,BATTERY,24.0,49.0,1.0,42.016542,-87.672499
62631,11622907,2019-03-14 16:03:00,OTHER OFFENSE,2.0,4.0,36.0,41.825299,-87.606961


In [5]:
chicago_data.reset_index(drop = True, inplace = True)
chicago_data['District'] = chicago_data['District'].astype('int32')
chicago_data['Ward'] = chicago_data['Ward'].astype('int32')
chicago_data['Community Area'] = chicago_data['Community Area'].astype('int32')
chicago_data.head()

Unnamed: 0,ID,Date,Primary Type,District,Ward,Community Area,Latitude,Longitude
0,11556037,2019-01-03 19:20:00,PUBLIC PEACE VIOLATION,16,41,76,42.002816,-87.906094
1,11626027,2019-03-16 17:58:00,BATTERY,1,42,32,41.883369,-87.63386
2,11622422,2019-03-12 22:00:00,THEFT,2,4,36,41.825347,-87.606781
3,11625922,2019-03-14 18:42:00,BATTERY,24,49,1,42.016542,-87.672499
4,11622907,2019-03-14 16:03:00,OTHER OFFENSE,2,4,36,41.825299,-87.606961


In [6]:
print('There are', len(chicago_data['District'].unique()), 'districts in Chicago')
print('There are', len(chicago_data['Ward'].unique()), 'wards in Chicago')
print('There are', len(chicago_data['Community Area'].unique()), 'community areas in Chicago')
print('There are', len(chicago_data['Primary Type'].unique()), 'types of crime in Chicago')

There are 23 districts in Chicago
There are 50 wards in Chicago
There are 77 community areas in Chicago
There are 33 types of crime in Chicago


In [7]:
chicago_grouped = chicago_data.groupby('Community Area')

crime_proportion = chicago_grouped['Primary Type'].value_counts()/chicago_grouped['Primary Type'].count()

chicago_crime_prop = pd.DataFrame(crime_proportion)
chicago_crime_prop.columns = ['Count']
chicago_crime_prop.reset_index(inplace = True)
chicago_crime_prop = chicago_crime_prop.pivot_table(index = 'Community Area', columns = 'Primary Type')
chicago_crime_prop.columns = chicago_crime_prop.columns.droplevel()
chicago_crime_prop.fillna(0, inplace = True)
chicago_crime_prop.reset_index(inplace = True)

print(chicago_crime_prop.shape)
chicago_crime_prop.head(10)

(77, 34)


Primary Type,Community Area,ARSON,ASSAULT,BATTERY,BURGLARY,CONCEALED CARRY LICENSE VIOLATION,CRIM SEXUAL ASSAULT,CRIMINAL DAMAGE,CRIMINAL TRESPASS,DECEPTIVE PRACTICE,...,OTHER NARCOTIC VIOLATION,OTHER OFFENSE,PROSTITUTION,PUBLIC INDECENCY,PUBLIC PEACE VIOLATION,ROBBERY,SEX OFFENSE,STALKING,THEFT,WEAPONS VIOLATION
0,1,0.000566,0.068413,0.188459,0.046175,0.0,0.007825,0.125399,0.037424,0.062699,...,5.1e-05,0.059456,0.001441,5.1e-05,0.004479,0.036343,0.006486,0.000927,0.27386,0.008391
1,2,0.001109,0.065912,0.172573,0.065386,0.000175,0.005429,0.132349,0.027264,0.078055,...,0.0,0.070057,0.000292,5.8e-05,0.004846,0.033452,0.006188,0.000525,0.240119,0.008348
2,3,0.000617,0.073823,0.193834,0.039798,5.6e-05,0.010146,0.086435,0.035762,0.10213,...,5.6e-05,0.057567,0.000168,0.0,0.007119,0.031222,0.006054,0.000168,0.269058,0.007623
3,4,0.000627,0.058996,0.162786,0.05816,0.000209,0.009293,0.115067,0.029863,0.102851,...,0.000104,0.058996,0.000313,0.0,0.00355,0.023598,0.005743,0.001149,0.289235,0.005325
4,5,0.001464,0.045248,0.093279,0.077317,0.0,0.004979,0.110119,0.028555,0.118026,...,0.0,0.04437,0.000293,0.000146,0.0041,0.029873,0.004393,0.001318,0.367989,0.002636
5,6,0.000918,0.041417,0.135779,0.055495,0.00017,0.008773,0.082427,0.033358,0.117077,...,6.8e-05,0.036181,0.00017,3.4e-05,0.006019,0.036283,0.004387,0.000612,0.385473,0.002346
6,7,0.001005,0.031331,0.087098,0.054213,0.000137,0.005526,0.100388,0.024389,0.102215,...,0.0,0.026307,4.6e-05,9.1e-05,0.003288,0.031971,0.004933,0.000639,0.479242,0.001918
7,8,0.000449,0.041657,0.120675,0.017584,0.000155,0.007369,0.052546,0.030389,0.142487,...,1.7e-05,0.02837,0.001415,8.6e-05,0.00516,0.030389,0.003227,0.000777,0.466289,0.003779
8,9,0.002946,0.07732,0.183358,0.043446,0.0,0.005155,0.134757,0.037555,0.134021,...,0.0,0.092784,0.000736,0.0,0.007364,0.007364,0.004418,0.000736,0.205449,0.001473
9,10,0.001831,0.07106,0.152272,0.065568,0.0,0.004993,0.132967,0.037777,0.110501,...,0.0,0.076552,0.000999,0.0,0.004826,0.009985,0.00649,0.001165,0.252954,0.001831


In [312]:
community_area_loc = pd.DataFrame(chicago_grouped['Latitude','Longitude'].mean())
community_area_loc.reset_index(inplace = True)
community_area_loc.head(10)

Unnamed: 0,Community Area,Latitude,Longitude
0,1,42.012195,-87.670609
1,2,41.999862,-87.693029
2,3,41.966217,-87.656876
3,4,41.972274,-87.688504
4,5,41.947509,-87.682949
5,6,41.943282,-87.653597
6,7,41.922531,-87.649465
7,8,41.897549,-87.630728
8,9,42.006081,-87.813256
9,10,41.984702,-87.801423


In [392]:
kclusters = 5

chicago_crime_cluster = chicago_crime_prop.drop('Community Area', axis = 1)

kmeans_crime = KMeans(n_clusters = kclusters, random_state = 0).fit(chicago_crime_cluster)

crime_labels = kmeans_crime.labels_
crime_labels[0:10]

array([1, 1, 1, 2, 2, 2, 4, 4, 1, 1], dtype=int32)

In [393]:
chicago_crime_prop['Cluster Labels'] = crime_labels

chicago_merged = community_area_loc
chicago_merged = chicago_merged.join(chicago_crime_prop.set_index('Community Area'), on = 'Community Area')
chicago_merged.head(10)

Unnamed: 0,Community Area,Latitude,Longitude,Cluster Labels,ARSON,ASSAULT,BATTERY,BURGLARY,CONCEALED CARRY LICENSE VIOLATION,CRIM SEXUAL ASSAULT,...,OTHER NARCOTIC VIOLATION,OTHER OFFENSE,PROSTITUTION,PUBLIC INDECENCY,PUBLIC PEACE VIOLATION,ROBBERY,SEX OFFENSE,STALKING,THEFT,WEAPONS VIOLATION
0,1,42.012195,-87.670609,1,0.000566,0.068413,0.188459,0.046175,0.0,0.007825,...,5.1e-05,0.059456,0.001441,5.1e-05,0.004479,0.036343,0.006486,0.000927,0.27386,0.008391
1,2,41.999862,-87.693029,1,0.001109,0.065912,0.172573,0.065386,0.000175,0.005429,...,0.0,0.070057,0.000292,5.8e-05,0.004846,0.033452,0.006188,0.000525,0.240119,0.008348
2,3,41.966217,-87.656876,1,0.000617,0.073823,0.193834,0.039798,5.6e-05,0.010146,...,5.6e-05,0.057567,0.000168,0.0,0.007119,0.031222,0.006054,0.000168,0.269058,0.007623
3,4,41.972274,-87.688504,2,0.000627,0.058996,0.162786,0.05816,0.000209,0.009293,...,0.000104,0.058996,0.000313,0.0,0.00355,0.023598,0.005743,0.001149,0.289235,0.005325
4,5,41.947509,-87.682949,2,0.001464,0.045248,0.093279,0.077317,0.0,0.004979,...,0.0,0.04437,0.000293,0.000146,0.0041,0.029873,0.004393,0.001318,0.367989,0.002636
5,6,41.943282,-87.653597,2,0.000918,0.041417,0.135779,0.055495,0.00017,0.008773,...,6.8e-05,0.036181,0.00017,3.4e-05,0.006019,0.036283,0.004387,0.000612,0.385473,0.002346
6,7,41.922531,-87.649465,4,0.001005,0.031331,0.087098,0.054213,0.000137,0.005526,...,0.0,0.026307,4.6e-05,9.1e-05,0.003288,0.031971,0.004933,0.000639,0.479242,0.001918
7,8,41.897549,-87.630728,4,0.000449,0.041657,0.120675,0.017584,0.000155,0.007369,...,1.7e-05,0.02837,0.001415,8.6e-05,0.00516,0.030389,0.003227,0.000777,0.466289,0.003779
8,9,42.006081,-87.813256,1,0.002946,0.07732,0.183358,0.043446,0.0,0.005155,...,0.0,0.092784,0.000736,0.0,0.007364,0.007364,0.004418,0.000736,0.205449,0.001473
9,10,41.984702,-87.801423,1,0.001831,0.07106,0.152272,0.065568,0.0,0.004993,...,0.0,0.076552,0.000999,0.0,0.004826,0.009985,0.00649,0.001165,0.252954,0.001831


In [394]:
address = 'Chicago, IL'

geolocator = Nominatim(user_agent = "chicago_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinates of Chicago are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Chicago are 41.8755616, -87.6244212.


In [395]:
map_clusters = folium.Map(location = [latitude, longitude], zoom_start = 10)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, com, cluster in zip(chicago_merged['Latitude'], chicago_merged['Longitude'], 
                                  chicago_merged['Community Area'], chicago_merged['Cluster Labels']):
    label = folium.Popup(str(com) + ' Cluster ' + str(cluster), parse_html = True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = rainbow[cluster - 1],
        fill = True,
        fill_color = rainbow[cluster - 1],
        fill_opacity = 0.7).add_to(map_clusters)
       
map_clusters

In [357]:
CLIENT_ID = 'AADZNFJL102ZBDR5UT4GCADHPFMT0GURPVQ5TFKTMPCHJZBV' 
CLIENT_SECRET = '04202ITCIA3HYLNLGU5NRPDX1WU55CUAIZELH5P0SQEIXSXP'
VERSION = '20180605'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AADZNFJL102ZBDR5UT4GCADHPFMT0GURPVQ5TFKTMPCHJZBV
CLIENT_SECRET:04202ITCIA3HYLNLGU5NRPDX1WU55CUAIZELH5P0SQEIXSXP


In [423]:
LIMIT = 50

def getNearbyVenues(areas, latitudes, longitudes, radius = 500):
    
    venues_list = []
    for area, lat, lng in zip(areas, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            area, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Community Area', 
                  'Area Latitude', 
                  'Area Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [424]:
chicago_venues = getNearbyVenues(areas = community_area_loc['Community Area'],
                                 latitudes = community_area_loc['Latitude'],
                                 longitudes = community_area_loc['Longitude'])

In [481]:
print(chicago_venues.shape)
chicago_venues.head(10)

(1428, 7)


Unnamed: 0,Community Area,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,1,42.012195,-87.670609,El Famous Burrito,42.010421,-87.674204,Mexican Restaurant
1,1,42.012195,-87.670609,Taqueria & Restaurant Cd. Hidalgo,42.011634,-87.674484,Mexican Restaurant
2,1,42.012195,-87.670609,Taste Food & Wine,42.016086,-87.668488,Wine Shop
3,1,42.012195,-87.670609,R Public House,42.016032,-87.668571,Sports Bar
4,1,42.012195,-87.670609,Bark Place,42.01008,-87.675223,Pet Store
5,1,42.012195,-87.670609,Romanian Kosher Sausage Co.,42.012765,-87.674692,Deli / Bodega
6,1,42.012195,-87.670609,Luzzat,42.015952,-87.668774,Indian Restaurant
7,1,42.012195,-87.670609,Smack Dab,42.009291,-87.666201,Bakery
8,1,42.012195,-87.670609,Nueva Italy Pizzeria,42.011629,-87.674205,Pizza Place
9,1,42.012195,-87.670609,Charmers Cafe,42.016164,-87.66825,Café


In [482]:
chicago_dummy = pd.get_dummies(chicago_venues[['Venue Category']], prefix = "", prefix_sep = "")
chicago_dummy = pd.concat([chicago_venues['Community Area'], chicago_dummy], axis = 1)
print(chicago_dummy.shape)
chicago_dummy.head(100)

(1428, 226)


Unnamed: 0,Community Area,ATM,Accessories Store,African Restaurant,Airport Lounge,Airport Service,American Restaurant,Arcade,Arepa Restaurant,Argentinian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Weight Loss Center,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
97,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
98,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [522]:
chicago_dummy_grouped = chicago_dummy.groupby('Community Area').sum().reset_index()

total_venues = pd.DataFrame(chicago_dummy_grouped.iloc[:,1:].sum(axis = 0) > 10).reset_index()
total_venues.columns = ['Venue','Popular']
unpopular_venues = total_venues[total_venues['Popular'] == False]['Venue'].to_list()

chicago_dummy_grouped.drop(unpopular_venues, axis = 1, inplace = True)
print(chicago_dummy_grouped.shape)
chicago_dummy_grouped.head(10)

(77, 41)


Unnamed: 0,Community Area,American Restaurant,Asian Restaurant,Bakery,Bar,Boutique,Breakfast Spot,Bus Station,Café,Chinese Restaurant,...,Pharmacy,Pizza Place,Sandwich Place,Seafood Restaurant,Spa,Sushi Restaurant,Thai Restaurant,Theater,Train Station,Video Store
0,1,1,0,2,0,0,1,0,1,2,...,0,3,1,0,0,0,0,2,0,0
1,2,1,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
2,3,0,1,0,3,0,0,0,1,1,...,0,2,2,0,0,3,1,0,0,0
3,4,0,0,0,2,0,0,2,1,1,...,0,1,0,0,0,1,1,0,0,1
4,5,2,0,0,2,1,1,0,0,0,...,0,3,3,0,0,0,0,0,0,0
5,6,0,0,1,2,0,1,0,2,0,...,0,3,4,0,1,1,1,1,0,0
6,7,1,0,1,1,1,2,0,1,0,...,0,3,2,0,1,0,1,2,0,0
7,8,3,0,1,3,2,1,0,0,0,...,0,1,0,0,1,0,0,0,0,0
8,9,1,0,0,2,0,1,0,0,0,...,0,1,0,1,1,0,0,2,0,0
9,10,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [523]:
def most_common(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [524]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
columns = ['Community Area']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind + 1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind + 1))

area_venues_sorted = pd.DataFrame(columns = columns)
area_venues_sorted['Community Area'] = chicago_dummy_grouped['Community Area']

for ind in np.arange(chicago_dummy_grouped.shape[0]):
    area_venues_sorted.iloc[ind, 1:] = most_common(chicago_dummy_grouped.iloc[ind, :], num_top_venues)

area_venues_sorted.head(10)

Unnamed: 0,Community Area,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Mexican Restaurant,Pizza Place,Theater,Bakery,Chinese Restaurant,Park,Diner,Gym,Indian Restaurant,Café
1,2,Indian Restaurant,Grocery Store,Spa,Bus Station,Fast Food Restaurant,American Restaurant,Seafood Restaurant,Chinese Restaurant,Discount Store,Diner
2,3,Coffee Shop,Mexican Restaurant,Sushi Restaurant,Bar,Diner,Sandwich Place,Pizza Place,Korean Restaurant,Fast Food Restaurant,Fried Chicken Joint
3,4,Convenience Store,Bar,Italian Restaurant,Bus Station,Video Store,Mobile Phone Shop,Coffee Shop,Gym / Fitness Center,Chinese Restaurant,Korean Restaurant
4,5,Pizza Place,Sandwich Place,American Restaurant,Bar,Mexican Restaurant,Boutique,Breakfast Spot,Fast Food Restaurant,Food,Fried Chicken Joint
5,6,Sandwich Place,Pizza Place,Bar,Café,Coffee Shop,Mexican Restaurant,Fried Chicken Joint,Italian Restaurant,Korean Restaurant,Diner
6,7,Coffee Shop,Hot Dog Joint,Pizza Place,Theater,Breakfast Spot,Sandwich Place,Gym / Fitness Center,Grocery Store,Bakery,Bar
7,8,American Restaurant,Bar,Gym / Fitness Center,Boutique,Mexican Restaurant,Bakery,Breakfast Spot,Coffee Shop,Fast Food Restaurant,Gym
8,9,Mexican Restaurant,Theater,Bar,Italian Restaurant,Grocery Store,Breakfast Spot,Cosmetics Shop,Hot Dog Joint,Liquor Store,American Restaurant
9,10,Park,Fried Chicken Joint,Fast Food Restaurant,Donut Shop,Discount Store,Diner,Cosmetics Shop,Convenience Store,Coffee Shop,Cocktail Bar


In [525]:
kclusters = 5

chicago_venue_cluster = chicago_dummy_grouped.drop('Community Area', axis = 1)

kmeans_venue = KMeans(n_clusters = kclusters, random_state = 0).fit(chicago_venue_cluster)

venue_labels = kmeans_venue.labels_ 
venue_labels[0:10]

array([1, 3, 4, 2, 2, 4, 4, 2, 2, 0], dtype=int32)

In [526]:
chicago_dummy_grouped['Cluster Labels'] = venue_labels
        
chicago_venue_merged = community_area_loc
chicago_venue_merged = chicago_venue_merged.join(chicago_dummy_grouped.set_index('Community Area'), on = 'Community Area')
chicago_venue_merged.head(10)

Unnamed: 0,Community Area,Latitude,Longitude,American Restaurant,Asian Restaurant,Bakery,Bar,Boutique,Breakfast Spot,Bus Station,...,Pizza Place,Sandwich Place,Seafood Restaurant,Spa,Sushi Restaurant,Thai Restaurant,Theater,Train Station,Video Store,Cluster Labels
0,1,42.012195,-87.670609,1,0,2,0,0,1,0,...,3,1,0,0,0,0,2,0,0,1
1,2,41.999862,-87.693029,1,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,3
2,3,41.966217,-87.656876,0,1,0,3,0,0,0,...,2,2,0,0,3,1,0,0,0,4
3,4,41.972274,-87.688504,0,0,0,2,0,0,2,...,1,0,0,0,1,1,0,0,1,2
4,5,41.947509,-87.682949,2,0,0,2,1,1,0,...,3,3,0,0,0,0,0,0,0,2
5,6,41.943282,-87.653597,0,0,1,2,0,1,0,...,3,4,0,1,1,1,1,0,0,4
6,7,41.922531,-87.649465,1,0,1,1,1,2,0,...,3,2,0,1,0,1,2,0,0,4
7,8,41.897549,-87.630728,3,0,1,3,2,1,0,...,1,0,0,1,0,0,0,0,0,2
8,9,42.006081,-87.813256,1,0,0,2,0,1,0,...,1,0,1,1,0,0,2,0,0,2
9,10,41.984702,-87.801423,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [527]:
map_venue_clusters = folium.Map(location = [latitude, longitude], zoom_start = 10)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, com, cluster in zip(chicago_venue_merged['Latitude'], chicago_venue_merged['Longitude'], 
                                  chicago_venue_merged['Community Area'], chicago_venue_merged['Cluster Labels']):
    label = folium.Popup(str(com) + ' Cluster ' + str(cluster), parse_html = True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = rainbow[cluster - 1],
        fill = True,
        fill_color = rainbow[cluster - 1],
        fill_opacity = 0.7).add_to(map_venue_clusters)
       
map_venue_clusters

In [545]:
n = len(chicago_data['Community Area'].unique())
sim = np.zeros([n,n])
for i in np.arange(n):
    for j in np.arange(n):
        if ((crime_labels[i] == crime_labels[j]) == (venue_labels[i] == venue_labels[j])):
            sim[i,j] = 1
np.fill_diagonal(sim,0)
np.sum(sim)/(n*(n-1))

0.5618591934381408