In [1]:
import pandas as pd
import numpy as np
import requests

In [2]:
neighborhoods = pd.read_csv("dublin_neighborhoods.csv")

In [3]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 200)

In [4]:
neighborhoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Aderrig,53.340213,-6.474632
1,Artaine,53.384701,-6.217102
2,Baldongan,53.553805,-6.12648
3,Baldoyle,53.403104,-6.139341
4,Balgriffin,53.412817,-6.183087


In [5]:
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [6]:
address = 'Dublin, Ireland'

geolocator = Nominatim(user_agent="dublin_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Dublin are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Dublin are 53.3497645, -6.2602732.


In [7]:
# create map of Dublin using latitude and longitude values
map_dublin = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dublin)  
    
map_dublin

In [8]:
dublin_venues = pd.read_csv("dublin_venues.csv")

In [9]:
print(dublin_venues.shape)
dublin_venues.head()

(2173, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Aderrig,53.340213,-6.474632,Finnstown Castle Hotel,53.339602,-6.460888,Hotel
1,Aderrig,53.340213,-6.474632,Adamstown Railway Station,53.337099,-6.462584,Train Station
2,Aderrig,53.340213,-6.474632,Peacock Restaurant,53.339547,-6.461501,Gastropub
3,Aderrig,53.340213,-6.474632,Londis,53.336752,-6.457966,Convenience Store
4,Aderrig,53.340213,-6.474632,PDM Construction,53.350451,-6.46992,Building


In [10]:
dublin_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aderrig,7,7,7,7,7,7
Artaine,35,35,35,35,35,35
Baldongan,1,1,1,1,1,1
Baldoyle,16,16,16,16,16,16
Balgriffin,19,19,19,19,19,19
Ballyboghil,3,3,3,3,3,3
Ballyfermot,10,10,10,10,10,10
Ballymadun,3,3,3,3,3,3
Balrothery,5,5,5,5,5,5
Balscaddan,1,1,1,1,1,1


In [11]:
print('There are {} uniques categories.'.format(len(dublin_venues['Venue Category'].unique())))

There are 217 uniques categories.


In [12]:
filter_list = ['Bakery', 'Food & Drink Shop', 'Pharmacy','Betting Shop','Scenic Lookout', 'Ice Cream Shop', 'Gourmet Shop', 'Health Food Store', 'Gift Shop', 'Bookstore']
dublin_venues = dublin_venues[dublin_venues['Venue Category'].isin(filter_list)].reset_index(drop=True)
print('There are {} uniques categories.'.format(len(dublin_venues['Venue Category'].unique())))

There are 10 uniques categories.


In [13]:
dublin_venues['Venue Category'].unique()

array(['Food & Drink Shop', 'Pharmacy', 'Health Food Store', 'Bakery',
       'Gift Shop', 'Scenic Lookout', 'Bookstore', 'Betting Shop',
       'Ice Cream Shop', 'Gourmet Shop'], dtype=object)

In [14]:
# one hot encoding
dublin_onehot = pd.get_dummies(dublin_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dublin_onehot['Neighborhood'] = dublin_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [dublin_onehot.columns[-1]] + list(dublin_onehot.columns[:-1])
dublin_onehot = dublin_onehot[fixed_columns]

dublin_onehot.head()

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout
0,Baldoyle,0,0,0,1,0,0,0,0,0,0
1,Balgriffin,0,0,0,0,0,0,0,0,1,0
2,Balgriffin,0,0,0,0,0,0,1,0,0,0
3,Castleknock,1,0,0,0,0,0,0,0,0,0
4,Cloghran,0,0,0,0,1,0,0,0,0,0


In [15]:
dublin_onehot.shape

(77, 11)

In [16]:
dublin_onehot

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout
0,Baldoyle,0,0,0,1,0,0,0,0,0,0
1,Balgriffin,0,0,0,0,0,0,0,0,1,0
2,Balgriffin,0,0,0,0,0,0,1,0,0,0
3,Castleknock,1,0,0,0,0,0,0,0,0,0
4,Cloghran,0,0,0,0,1,0,0,0,0,0
5,Cloghran,0,0,0,0,0,0,0,0,0,1
6,Cloghran,0,0,0,1,0,0,0,0,0,0
7,Cloghran,0,0,1,0,0,0,0,0,0,0
8,Clontarf,0,0,0,0,0,0,0,0,0,1
9,Clonturk,0,1,0,0,0,0,0,0,0,0


In [17]:
dublin_grouped = dublin_onehot.groupby('Neighborhood').sum().reset_index()
dublin_grouped

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout
0,Baldoyle,0,0,0,1,0,0,0,0,0,0
1,Balgriffin,0,0,0,0,0,0,1,0,1,0
2,Castleknock,1,0,0,0,0,0,0,0,0,0
3,Cloghran,0,0,1,1,1,0,0,0,0,1
4,Clontarf,0,0,0,0,0,0,0,0,0,1
5,Clonturk,0,1,0,0,0,0,0,0,0,0
6,Cruagh,0,0,0,0,0,0,0,0,0,1
7,Dalkey,1,0,0,0,0,0,1,0,0,2
8,Donabate,0,0,0,0,0,0,0,1,0,0
9,Donnybrook,0,0,0,1,0,1,0,1,1,0


In [18]:
dublin_grouped.shape

(37, 11)

In [19]:
dublin_grouped["Count_Venues"] = dublin_grouped[dublin_grouped.iloc[:,1:11] > 0].iloc[:,1:10].count(axis=1)
dublin_grouped["Total"] = dublin_grouped.iloc[:,1:11].sum(axis=1)
dublin_grouped

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout,Count_Venues,Total
0,Baldoyle,0,0,0,1,0,0,0,0,0,0,1,1
1,Balgriffin,0,0,0,0,0,0,1,0,1,0,2,2
2,Castleknock,1,0,0,0,0,0,0,0,0,0,1,1
3,Cloghran,0,0,1,1,1,0,0,0,0,1,3,4
4,Clontarf,0,0,0,0,0,0,0,0,0,1,0,1
5,Clonturk,0,1,0,0,0,0,0,0,0,0,1,1
6,Cruagh,0,0,0,0,0,0,0,0,0,1,0,1
7,Dalkey,1,0,0,0,0,0,1,0,0,2,2,4
8,Donabate,0,0,0,0,0,0,0,1,0,0,1,1
9,Donnybrook,0,0,0,1,0,1,0,1,1,0,4,4


In [20]:
neighborhoods_venues_sorted = dublin_grouped.sort_values(by=['Total'], ascending=False).reset_index(drop=True)
neighborhoods_venues_sorted_with_total = neighborhoods_venues_sorted.append(neighborhoods_venues_sorted.sum(numeric_only=True), ignore_index=True)
neighborhoods_venues_sorted_with_total.iloc[-1:,:]

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout,Count_Venues,Total
37,,15.0,2.0,6.0,15.0,4.0,6.0,4.0,6.0,12.0,7.0,59.0,77.0


In [21]:
# set number of clusters
kclusters = 3

dublin_grouped_clustering = neighborhoods_venues_sorted.drop(['Neighborhood','Count_Venues','Total'], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dublin_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:30] 

array([2, 1, 1, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2, 0, 2, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 2], dtype=int32)

In [22]:
dublin_grouped_clustering

Unnamed: 0,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout
0,0,0,2,0,0,0,1,1,2,0
1,0,0,0,3,0,0,1,1,0,0
2,1,0,1,1,0,1,0,1,0,0
3,0,0,1,1,1,0,0,0,0,1
4,0,0,0,0,2,2,0,0,0,0
5,1,0,0,0,0,0,1,0,0,2
6,0,0,0,1,0,1,0,1,1,0
7,2,0,0,1,0,1,0,0,0,0
8,2,0,0,1,0,0,0,0,0,0
9,2,0,0,0,0,0,0,0,1,0


In [23]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dublin_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dublin_merged = dublin_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

dublin_merged.dropna(inplace=True)

dublin_merged['Cluster Labels'] = dublin_merged['Cluster Labels'].astype(int)

dublin_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout,Count_Venues,Total
3,Baldoyle,53.403104,-6.139341,1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
4,Balgriffin,53.412817,-6.183087,2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,2.0
11,Castleknock,53.38515,-6.356383,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
13,Cloghran,53.419194,-6.247615,0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,4.0
17,Clontarf,53.367679,-6.19265,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0


In [24]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dublin_merged['Latitude'], dublin_merged['Longitude'], dublin_merged['Neighborhood'], dublin_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [25]:
dublin_merged.loc[dublin_merged['Cluster Labels'] == 0, dublin_merged.columns[[0] + list(range(4, dublin_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout,Count_Venues,Total
11,Castleknock,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
13,Cloghran,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,4.0
17,Clontarf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
18,Clonturk,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
20,Cruagh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
22,Dalkey,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,4.0
23,Donabate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
33,Holmpatrick,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
34,Howth,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
43,Kilmactalway,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0


In [26]:
dublin_merged.loc[dublin_merged['Cluster Labels'] == 1, dublin_merged.columns[[0] + list(range(4, dublin_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout,Count_Venues,Total
3,Baldoyle,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
24,Donnybrook,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,4.0,4.0
31,Grangegorman,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,3.0
53,Monkstown,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,5.0,5.0
64,Rathfarnham,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
68,St Mark's,2.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,4.0
69,St. Catherine's,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
71,St. James',2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0
74,St. Nicholas Without,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
75,St. Peter's,0.0,0.0,0.0,3.0,0.0,0.0,1.0,1.0,0.0,0.0,3.0,5.0


In [27]:
dublin_merged.loc[dublin_merged['Cluster Labels'] == 2, dublin_merged.columns[[0] + list(range(4, dublin_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout,Count_Venues,Total
4,Balgriffin,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,2.0
27,Finglas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
38,Kill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,2.0
54,Mulhuddart,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
58,Palmerston,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,2.0
62,Raheny,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
66,Saggart,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
70,St. George's,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,3.0
79,Taney,0.0,0.0,2.0,0.0,0.0,0.0,1.0,1.0,2.0,0.0,4.0,6.0


In [28]:
dublin_merged.sort_values(by=['Total','Count_Venues'], ascending=False)

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,Bakery,Betting Shop,Bookstore,Food & Drink Shop,Gift Shop,Gourmet Shop,Health Food Store,Ice Cream Shop,Pharmacy,Scenic Lookout,Count_Venues,Total
79,Taney,53.290457,-6.237441,2,0.0,0.0,2.0,0.0,0.0,0.0,1.0,1.0,2.0,0.0,4.0,6.0
53,Monkstown,53.288286,-6.151282,1,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,5.0,5.0
75,St. Peter's,53.323399,-6.260344,1,0.0,0.0,0.0,3.0,0.0,0.0,1.0,1.0,0.0,0.0,3.0,5.0
24,Donnybrook,53.324877,-6.22871,1,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,4.0,4.0
13,Cloghran,53.419194,-6.247615,0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,4.0
68,St Mark's,53.33984,-6.234471,1,2.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,4.0
22,Dalkey,53.274833,-6.091553,0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,4.0
52,Malahide,53.447919,-6.163629,0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,2.0,4.0
31,Grangegorman,53.35976,-6.290792,1,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,3.0
70,St. George's,53.36654,-6.263466,2,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,3.0
