In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    cryptography-2.4.2         |   py36h1ba5d50_0         618 KB
    openssl-1.1.1a             |    h14c3975_1000         4.0 MB  conda-forge
    libarchive-3.3.3           |       h5d8350f_5         1.5 MB
    grpcio-1.16.1              |   py36hf8bcb03_1         1.1 MB
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    conda-4.6.2                |           py36_0         869 KB  conda-forge
    libssh2-1.8.0              |                1         239 KB  conda-forge
    python-3.6.8               |       h0371630_0        34.4 MB
    ------------------------------------------------------------
      

Get Orlando Crime data

In [3]:
fdf = pd.read_csv('https://data.cityoforlando.net/resource/6qd7-sr7g.csv')
fdf.shape
fdf.head(5)

Unnamed: 0,:@computed_region_bgqw_styj,:@computed_region_gsfg_ku74,:@computed_region_u8wz_9eai,case_date_time,case_deposition,case_location,case_number,case_offense_category,case_offense_charge_type,case_offense_location,case_offense_type,location,location_address,location_city,location_state,location_zip,status
0,,,,2010-01-28T12:47:00.000,Closed,4100 Block of N ORANGE BLOSSOM TL,2010-00800219,Theft,Committed,Construction Site,All other larceny,,,,,,Unmapped
1,,,,2010-01-28T12:54:00.000,Closed,4100 Block of N ORANGE BLOSSOM TL,2010-00800220,Theft,Committed,Industrial/Mfg,All other larceny,,,,,,Unmapped
2,,,,2010-01-28T13:02:00.000,Closed,4100 Block of N ORANGE BLOSSOM TL,2010-00800221,Theft,Committed,Industrial/Mfg,All other larceny,,,,,,Unmapped
3,,,,2010-04-16T12:50:00.000,Closed,4000 Block of N ORANGE BLOSSOM TL,2010-00178297,Robbery,Committed,Bus/Rail Terminal,Robbery,,,,,,Unmapped
4,,,,2010-04-25T18:41:00.000,Arrest,4800 Block of PHEASANT RUN DR,2010-00193916,Assault,Committed,Residence/Single,Simple assault,,,,,,Unmapped


In [4]:
# Extract Longitude and Latitude information from location column
fdf['long'] = (fdf.location.str.slice(7,19)).astype(float)
fdf['lat'] = (fdf.location.str.slice(20,30)).astype(float)
fdf['OffenseCategory']=fdf['case_offense_category']
fdf['OffenseLocationType']=fdf['case_offense_location']
fdf['CaseNumber'] = fdf['case_number']

In [5]:
mfdf = fdf[['CaseNumber','OffenseCategory','OffenseLocationType','long','lat']]   # does NOT depend on the column order
mfdf.head()

Unnamed: 0,CaseNumber,OffenseCategory,OffenseLocationType,long,lat
0,2010-00800219,Theft,Construction Site,,
1,2010-00800220,Theft,Industrial/Mfg,,
2,2010-00800221,Theft,Industrial/Mfg,,
3,2010-00178297,Robbery,Bus/Rail Terminal,,
4,2010-00193916,Assault,Residence/Single,,


#### Drop rows without loaction data

In [6]:
mfdf=mfdf.dropna().reset_index(drop=True)
mfdf.shape
mfdf.head()

Unnamed: 0,CaseNumber,OffenseCategory,OffenseLocationType,long,lat
0,2014-00109569,Assault,Residence/Single,-81.389827,28.545702
1,2016-00475579,Theft,Department/Discount Store,-81.4591,28.514787
2,2013-00308749,Theft,Department/Discount Store,-81.4591,28.514787
3,2012-00308325,Theft,Department/Discount Store,-81.343495,28.553342
4,2015-00251741,Theft,Department/Discount Store,-81.4591,28.514787


In [7]:
print('The dataframe has {} OffenseCategory and {} OffenseLocationType.'.format(
        len(mfdf['OffenseCategory'].unique()),
        len(mfdf['OffenseLocationType'].unique())
    )
)

The dataframe has 8 OffenseCategory and 12 OffenseLocationType.


In [8]:
address = 'Orlando, FL'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Orlando are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Orlando are 28.5421097, -81.3790388.


In [9]:
# create map of Orlando using latitude and longitude values
map_orlando = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, category,loctype in zip(mfdf['lat'], mfdf['long'], mfdf['CaseNumber'], mfdf['OffenseCategory']):
    label = '{}, {}'.format(category, loctype)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_orlando)  
    
map_orlando

In [None]:
# Foursquare Credentials

In [10]:
CLIENT_ID = 'F1DOA3B4YYBKS3MYJAO4J5LEAKTZDN25IZG3GKJ45SJT2JPD' # your Foursquare ID
CLIENT_SECRET = '3NBMWHUXBPC4R1U4SG3BLX42DV3TRRVRUI52BCBVOS0NG1AH' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: F1DOA3B4YYBKS3MYJAO4J5LEAKTZDN25IZG3GKJ45SJT2JPD
CLIENT_SECRET:3NBMWHUXBPC4R1U4SG3BLX42DV3TRRVRUI52BCBVOS0NG1AH


In [11]:
neighborhood_latitude = mfdf['lat'] # neighborhood latitude value
neighborhood_longitude = mfdf['long'] # neighborhood longitude value


#Create the GET request URL 
# Explore venues near crime location

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Category', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:

orlando_venues = getNearbyVenues(names=mfdf['CaseNumber'],
                                   latitudes=mfdf['lat'],
                                   longitudes=mfdf['long']
                                  )

2014-00109569
2016-00475579
2013-00308749
2012-00308325
2015-00251741
2011-00523474
2015-00191414
2012-00330148
2012-00802658
2016-00045906
2014-00227581
2011-00027777
2012-00340971
2010-00443675
2011-00173066
2016-00349990
2014-00217106
2015-00412278
2016-00525068
2014-00218505
2013-00801176
2014-00411749
2011-00197436
2014-00081569
2015-00476173
2014-00120628
2014-00167876
2016-00485264
2013-00470504
2011-00802990
2015-00270431
2013-00532022
2013-00430792
2014-00213490
2015-00367893
2012-00407104
2014-00484553
2016-00334211
2015-00802377
2010-00437888
2015-00277806
2010-00022092
2010-00496360
2014-00803830
2012-00536675
2014-00267156
2016-00247113
2012-00801605
2013-00349804
2014-00243199
2010-00248062
2014-00171167
2016-00441163
2012-00197095
2012-00534542
2012-00800554
2010-00800766
2010-00037154
2014-00434577
2016-00161903
2011-00295885
2015-00481137
2015-00401072
2012-00212365
2011-00323603
2014-00255768
2016-00169292
2013-00405777
2015-00488837
2010-00505794
2016-00182646
2011-0

In [14]:
print(orlando_venues.shape)
orlando_venues.head()

(2771, 7)


Unnamed: 0,Category,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,2014-00109569,28.545702,-81.389827,Orlando Tennis Center,28.548173,-81.387839,Tennis Court
1,2014-00109569,28.545702,-81.389827,FDLE Gym,28.545172,-81.384758,Gym / Fitness Center
2,2016-00475579,28.514787,-81.4591,Tijuana Flats,28.516545,-81.459408,Tex-Mex Restaurant
3,2016-00475579,28.514787,-81.4591,Stonington's Fried Shrimp,28.516541,-81.459666,Seafood Restaurant
4,2016-00475579,28.514787,-81.4591,Starbucks,28.51519,-81.458346,Coffee Shop


# Get unique cases for which venues were gathered

In [15]:
O_u =orlando_venues['Category'].unique()
Q2 = pd.DataFrame({'Category':O_u}) # put the array into Dataframe

In [16]:
result = pd.merge(mfdf,Q2, 
                  left_on='CaseNumber',
                  right_on='Category',
                  how='right')
result.shape

(113, 6)

In [17]:
orlando_venues.groupby('Category').count() # Count for each crime location

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-00022092,30,30,30,30,30,30
2010-00037154,30,30,30,30,30,30
2010-00061142,13,13,13,13,13,13
2010-00134851,4,4,4,4,4,4
2010-00248062,30,30,30,30,30,30
2010-00434010,30,30,30,30,30,30
2010-00434499,30,30,30,30,30,30
2010-00437888,30,30,30,30,30,30
2010-00443675,30,30,30,30,30,30
2010-00482147,30,30,30,30,30,30


In [19]:
print('There are {} uniques categories.'.format(len(orlando_venues['Venue Category'].unique())))


There are 96 uniques categories.


# Analyze each crime location

In [21]:
# one hot encoding
orlando_onehot = pd.get_dummies(orlando_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
orlando_onehot['Category'] = orlando_venues['Category'] 

# move neighborhood column to the first column
fixed_columns = [orlando_onehot.columns[-1]] + list(orlando_onehot.columns[:-1])
orlando_onehot = orlando_onehot[fixed_columns]

orlando_onehot.head()

Unnamed: 0,Category,American Restaurant,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Basketball Stadium,Bed & Breakfast,Beer Bar,Big Box Store,Bookstore,Brazilian Restaurant,Brewery,Burger Joint,Business Service,Cajun / Creole Restaurant,Chinese Restaurant,Cocktail Bar,Coffee Shop,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Design Studio,Dessert Shop,Discount Store,Dive Bar,Donut Shop,Dry Cleaner,Event Space,Fast Food Restaurant,Food,Football Stadium,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Gift Shop,Grocery Store,Gym,Gym / Fitness Center,Home Service,Ice Cream Shop,Intersection,Italian Restaurant,Juice Bar,Latin American Restaurant,Liquor Store,Lounge,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Moving Target,Music Festival,Music Venue,Park,Pharmacy,Pizza Place,Platform,Pool,Pub,Rental Service,Restaurant,Rock Club,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Smoke Shop,Smoothie Shop,Speakeasy,Sporting Goods Shop,Sports Club,Stadium,Steakhouse,Sushi Restaurant,Taco Place,Tapas Restaurant,Tattoo Parlor,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
0,2014-00109569,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,2014-00109569,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2016-00475579,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,2016-00475579,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2016-00475579,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [22]:
orlando_onehot.shape

(2771, 97)

#### Next, let's group rows by case and by taking the mean of the frequency of occurrence of each category

In [25]:
orlando_grouped = orlando_onehot.groupby('Category').mean().reset_index()


#### Let's print each offense along with the top 5 most common venues

In [26]:
num_top_venues = 5

for hood in orlando_grouped['Category']:
    print("----"+hood+"----")
    temp = orlando_grouped[orlando_grouped['Category'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----2010-00022092----
                       venue  freq
0                     Bakery  0.10
1      Vietnamese Restaurant  0.07
2              Deli / Bodega  0.07
3         Italian Restaurant  0.07
4  Latin American Restaurant  0.03


----2010-00037154----
           venue  freq
0            Bar  0.13
1   Cocktail Bar  0.10
2      Gastropub  0.07
3  Smoothie Shop  0.03
4    Music Venue  0.03


----2010-00061142----
                        venue  freq
0            Football Stadium  0.15
1                     Stadium  0.15
2                Intersection  0.08
3  Construction & Landscaping  0.08
4              Sandwich Place  0.08


----2010-00134851----
                 venue  freq
0        Grocery Store  0.50
1       Rental Service  0.25
2       Discount Store  0.25
3  American Restaurant  0.00
4       Music Festival  0.00


----2010-00248062----
                  venue  freq
0        Ice Cream Shop  0.10
1  Brazilian Restaurant  0.07
2    Chinese Restaurant  0.07
3           Coffee Shop 

#### Let's put that into a *pandas* dataframe

Function to sort the venues in descending order.

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Category']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Category'] = orlando_grouped['Category']

for ind in np.arange(orlando_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(orlando_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Category,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2010-00022092,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
1,2010-00037154,Bar,Cocktail Bar,Gastropub,Sporting Goods Shop,Comedy Club,Music Venue,Mexican Restaurant,Burger Joint,Brewery,Convenience Store
2,2010-00061142,Stadium,Football Stadium,Construction & Landscaping,Park,Convenience Store,BBQ Joint,Intersection,Sandwich Place,Seafood Restaurant,Music Festival
3,2010-00134851,Grocery Store,Rental Service,Discount Store,Yoga Studio,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Design Studio
4,2010-00248062,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
5,2010-00434010,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
6,2010-00434499,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
7,2010-00437888,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
8,2010-00443675,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
9,2010-00482147,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center


##  Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [29]:
# set number of clusters
kclusters = 5

orlando_grouped_clustering = orlando_grouped.drop('Category', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(orlando_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 3, 0, 0, 0, 3, 0, 3], dtype=int32)

In [30]:
orlando_merged = result

# add clustering labels
orlando_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
orlando_merged = orlando_merged.join(neighborhoods_venues_sorted.set_index('Category'), on='Category')

orlando_merged.head() 

Unnamed: 0,CaseNumber,OffenseCategory,OffenseLocationType,long,lat,Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2014-00109569,Assault,Residence/Single,-81.389827,28.545702,2014-00109569,3,Gym / Fitness Center,Tennis Court,Yoga Studio,Donut Shop,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Design Studio
1,2016-00475579,Theft,Department/Discount Store,-81.4591,28.514787,2016-00475579,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
2,2013-00308749,Theft,Department/Discount Store,-81.4591,28.514787,2013-00308749,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
3,2012-00308325,Theft,Department/Discount Store,-81.343495,28.553342,2012-00308325,3,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
4,2015-00251741,Theft,Department/Discount Store,-81.4591,28.514787,2015-00251741,0,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop


In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(orlando_merged['lat'], orlando_merged['long'], orlando_merged['CaseNumber'], orlando_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

#Cluster 1

In [34]:
orlando_merged.loc[orlando_merged['Cluster Labels'] == 0, orlando_merged.columns[[1] + list(range(5, orlando_merged.shape[1]))]]

Unnamed: 0,OffenseCategory,Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Theft,2015-00251741,0,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
5,Theft,2011-00523474,0,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
6,Arson,2015-00191414,0,Shipping Store,Moving Target,Yoga Studio,Dive Bar,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Design Studio
8,Fraud,2012-00802658,0,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
10,Theft,2014-00227581,0,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
11,Theft,2011-00027777,0,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
12,Narcotics,2012-00340971,0,Bed & Breakfast,Convenience Store,Platform,Intersection,Food,Park,Yoga Studio,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
13,Theft,2010-00443675,0,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
14,Fraud,2011-00173066,0,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
16,Fraud,2014-00217106,0,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop


In [35]:
#orlando_venues.groupby('Category').count() # Count for each crime location
orlando_merged.loc[orlando_merged['Cluster Labels']== 0].groupby('1st Most Common Venue').count()

Unnamed: 0_level_0,CaseNumber,OffenseCategory,OffenseLocationType,long,lat,Category,Cluster Labels,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1st Most Common Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Asian Restaurant,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Athletics & Sports,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Bakery,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9
Bar,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
Bed & Breakfast,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Construction & Landscaping,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
Donut Shop,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Fried Chicken Joint,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Grocery Store,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Gym,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


# Cluster 2

In [44]:
orlando_merged.loc[orlando_merged['Cluster Labels'] == 1, orlando_merged.columns[[1] + list(range(5, orlando_merged.shape[1]))]]

Unnamed: 0,OffenseCategory,Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,Theft,2010-00496360,1,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
42,Theft,2014-00803830,1,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop


# Cluster 3

In [45]:
orlando_merged.loc[orlando_merged['Cluster Labels'] == 2, orlando_merged.columns[[1] + list(range(5, orlando_merged.shape[1]))]]

Unnamed: 0,OffenseCategory,Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
98,Narcotics,2013-00543306,2,Bar,Cocktail Bar,Gastropub,Sporting Goods Shop,Comedy Club,Music Venue,Mexican Restaurant,Burger Joint,Brewery,Convenience Store
100,Theft,2010-00434499,2,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop


# Cluster 4

In [46]:
orlando_merged.loc[orlando_merged['Cluster Labels'] == 3, orlando_merged.columns[[1] + list(range(5, orlando_merged.shape[1]))]]

Unnamed: 0,OffenseCategory,Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Assault,2014-00109569,3,Gym / Fitness Center,Tennis Court,Yoga Studio,Donut Shop,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Design Studio
1,Theft,2016-00475579,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
2,Theft,2013-00308749,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
3,Theft,2012-00308325,3,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
7,Theft,2012-00330148,3,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
9,Theft,2016-00045906,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
15,Theft,2016-00349990,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
18,Assault,2016-00525068,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
19,Robbery,2014-00218505,3,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
21,Theft,2014-00411749,3,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center


# Cluster 5

In [47]:
orlando_merged.loc[orlando_merged['Cluster Labels'] == 4, orlando_merged.columns[[1] + list(range(5, orlando_merged.shape[1]))]]

Unnamed: 0,OffenseCategory,Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,Theft,2013-00801176,4,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
22,Burglary,2011-00197436,4,Chinese Restaurant,Convenience Store,Wings Joint,Gym / Fitness Center,Pool,Gym,Sandwich Place,Bank,Cosmetics Shop,Dry Cleaner
49,Theft,2010-00248062,4,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
69,Assault,2011-00149231,4,Ice Cream Shop,Brazilian Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Gym,Gas Station,Salon / Barbershop,Mobile Phone Shop
85,Theft,2010-00482147,4,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center
97,Assault,2016-00507006,4,Bar,Cocktail Bar,Gastropub,Sporting Goods Shop,Comedy Club,Music Venue,Mexican Restaurant,Burger Joint,Brewery,Convenience Store
108,Theft,2012-00478901,4,Bakery,Vietnamese Restaurant,Italian Restaurant,Deli / Bodega,Yoga Studio,Cosmetics Shop,Mexican Restaurant,Liquor Store,Latin American Restaurant,Gym / Fitness Center


In [48]:
## orlando_merged.loc[orlando_merged['OffenseCategory']== 'Burglary'].groupby('1st Most Common Venue').count()
orlando_merged.groupby(['Cluster Labels','OffenseCategory','1st Most Common Venue']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CaseNumber,OffenseLocationType,long,lat,Category,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Cluster Labels,OffenseCategory,1st Most Common Venue,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,Arson,Shipping Store,1,1,1,1,1,1,1,1,1,1,1,1,1,1
0,Assault,Stadium,1,1,1,1,1,1,1,1,1,1,1,1,1,1
0,Burglary,Athletics & Sports,1,1,1,1,1,1,1,1,1,1,1,1,1,1
0,Burglary,Construction & Landscaping,1,1,1,1,1,1,1,1,1,1,1,1,1,1
0,Burglary,Gym,1,1,1,1,1,1,1,1,1,1,1,1,1,1
0,Burglary,Stadium,1,1,1,1,1,1,1,1,1,1,1,1,1,1
0,Fraud,Asian Restaurant,1,1,1,1,1,1,1,1,1,1,1,1,1,1
0,Fraud,Construction & Landscaping,2,2,2,2,2,2,2,2,2,2,2,2,2,2
0,Fraud,Ice Cream Shop,5,5,5,5,5,5,5,5,5,5,5,5,5,5
0,Narcotics,Bar,3,3,3,3,3,3,3,3,3,3,3,3,3,3
