# Identifying clusters in UK for a phased relaxation of the COVID-19 lockdown

### Installing required libraries

In [1]:
# Installing required libraries

import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files

!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium
import folium # map rendering library

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 7.5MB/s ta 0:00:011
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


### Importing a CSV files with UK Borough wise COVID case data. The below code is generated from the file upload widget in IBM Watson

In [2]:
import types
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_d3e6f1a094734626bdae121f72c60060 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='PYNGCTkR88-wT4dzzYP8LGbMoPAq9ovVcxdWMg2i5vTn',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.eu-geo.objectstorage.service.networklayer.com')

body = client_d3e6f1a094734626bdae121f72c60060.get_object(Bucket='course9notebooks-donotdelete-pr-f0q5muesi2twnw',Key='uk covid.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

# If you are reading an Excel file into a pandas DataFrame, replace `read_csv` by `read_excel` in the next statement.
df = pd.read_csv(body)
df.head(5)

Unnamed: 0,Area,Cases,Per 100k
0,Rhondda Cynon Taf,1617,673.4
1,Merthyr Tydfil,396,658.0
2,Denbighshire,595,624.1
3,Dundee City,890,596.0
4,Cardiff,2065,566.9


In [3]:
df.columns =['Neighborhood', 'Cases','Per_100k'] 
df.shape

(217, 3)

In [4]:
df.describe()

Unnamed: 0,Per_100k
count,215.0
mean,283.625581
std,114.69329
min,22.5
25%,210.5
50%,275.8
75%,358.25
max,673.4


In [5]:
# Adding extra columns for longitude and latitude
df['Latitude'] = ""
df['Longitude'] = ""
df.head(10)

Unnamed: 0,Neighborhood,Cases,Per_100k,Latitude,Longitude
0,Rhondda Cynon Taf,1617,673.4,,
1,Merthyr Tydfil,396,658.0,,
2,Denbighshire,595,624.1,,
3,Dundee City,890,596.0,,
4,Cardiff,2065,566.9,,
5,Newport,841,548.6,,
6,Swansea,1265,513.3,,
7,Midlothian,461,498.6,,
8,Sunderland,1379,497.1,,
9,Gateshead,1003,495.3,,


### Gathering the Latitude and Longitude data for all the UK boroughs using Geopy

In [6]:
for index, row in df.iterrows():
    geolocator = Nominatim(user_agent="london_explorer")
    address = row['Neighborhood']
    a = address + ", United kingdom"
    print(index)
    print(a)
    location = geolocator.geocode(a)
    if location is None:
        print("Lets move on")
    else:
        print("Latitude = {}, Longitude = {}".format(location.latitude, location.longitude))
        df.loc[index,'Latitude'] = location.latitude
        df.loc[index,'Longitude'] = location.longitude

0
Rhondda Cynon Taf, United kingdom
Latitude = 51.66516365, Longitude = -3.4577557723845347
1
Merthyr Tydfil, United kingdom
Latitude = 51.7490624, Longitude = -3.3786183
2
Denbighshire, United kingdom
Latitude = 53.1097895, Longitude = -3.367126163895732
3
Dundee City, United kingdom
Latitude = 56.473845999999995, Longitude = -2.96641895351409
4
Cardiff, United kingdom
Latitude = 51.4816546, Longitude = -3.1791934
5
Newport, United kingdom
Latitude = 51.5882332, Longitude = -2.9974967
6
Swansea, United kingdom
Latitude = 51.6195955, Longitude = -3.9459248
7
Midlothian, United kingdom
Latitude = 55.833333, Longitude = -3.083333
8
Sunderland, United kingdom
Latitude = 54.9058512, Longitude = -1.3828727
9
Gateshead, United kingdom
Latitude = 54.9625789, Longitude = -1.6019294
10
South Tyneside, United kingdom
Latitude = 54.969874250000004, Longitude = -1.4476805465645368
11
Vale of Glamorgan, United kingdom
Latitude = 51.4470646, Longitude = -3.395207949001259
12
Middlesbrough, United ki

Latitude = 55.5, Longitude = -4.25
104
Kingston upon Hull, City of, United kingdom
Latitude = 53.7435722, Longitude = -0.3394758
105
Scottish Borders, United kingdom
Latitude = 55.583333, Longitude = -2.833333
106
Waltham Forest, United kingdom
Latitude = 51.1981586, Longitude = 1.0165175
107
Havering, United kingdom
Latitude = 51.5443687, Longitude = -0.1443031798919795
108
Blackburn with Darwen, United kingdom
Latitude = 53.699176949999995, Longitude = -2.4709000953138327
109
South Ayrshire, United kingdom
Latitude = 55.299818, Longitude = -4.645988367016132
110
Medway, United kingdom
Latitude = 51.4157386, Longitude = 0.5687308512408333
111
North Lanarkshire, United kingdom
Latitude = 55.88305065, Longitude = -3.946155104761104
112
Dudley, United kingdom
Latitude = 52.5110832, Longitude = -2.0816813
113
Hounslow, United kingdom
Latitude = 51.4686132, Longitude = -0.3613471
114
East Riding of Yorkshire, United kingdom
Latitude = 53.873596500000005, Longitude = -0.5347787525091569
115

Latitude = 51.35632375, Longitude = -2.486661424502094
202
Wiltshire, United kingdom
Latitude = 51.324162, Longitude = -1.9032486699002247
203
Derry City and Strabane, United kingdom
Latitude = 54.8351485, Longitude = -7.457994790640386
204
Devon, United kingdom
Latitude = 50.724165, Longitude = -3.660795843955193
205
Cornwall and Isles of Scilly, United kingdom
Latitude = 49.87163645, Longitude = -6.4034597229910695
206
North East Lincolnshire, United kingdom
Latitude = 53.53680335, Longitude = -0.09368101348534888
207
Dorset, United kingdom
Latitude = 50.79683685, Longitude = -2.34473226124306
208
Rutland, United kingdom
Latitude = 52.6423036, Longitude = -0.6632643077026672
209
Highland, United kingdom
Latitude = 57.5066357, Longitude = -5.0038367
210
Moray, United kingdom
Latitude = 57.416667, Longitude = -3.25
211
Fermanagh and Omagh, United kingdom
Latitude = 54.4413243, Longitude = -7.7567616226758105
212
Ceredigion, United kingdom
Latitude = 52.2945938, Longitude = -3.952472505

In [7]:
df_ready = df[df['Longitude']!=""]
df_missing = df[df['Longitude']==""]
print(df_ready.shape)
print(df_missing.shape)
# Below numbers show that expect for 2 boroughs, the location data is gathered for 215 boroughs

(215, 5)
(2, 5)


In [8]:
nan_value = float("NaN") #Convert NaN values to empty string
df.replace("", nan_value, inplace=True)
df.dropna(subset = ["Latitude"], inplace=True)

In [9]:
df.shape

(215, 5)

In [10]:
df.head()

Unnamed: 0,Neighborhood,Cases,Per_100k,Latitude,Longitude
0,Rhondda Cynon Taf,1617,673.4,51.665164,-3.457756
1,Merthyr Tydfil,396,658.0,51.749062,-3.378618
2,Denbighshire,595,624.1,53.109789,-3.367126
3,Dundee City,890,596.0,56.473846,-2.966419
4,Cardiff,2065,566.9,51.481655,-3.179193


### Plotting a map of the boroughs

In [11]:
#import folium # map rendering library

address = 'United Kingdom'

geolocator = Nominatim(user_agent="london_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_London = folium.Map(location=[latitude, longitude], zoom_start=10)
counter = 0
# add markers to map
for lat, lng, area in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(area)
    label = folium.Popup(label, parse_html=True)
    counter = counter + 1
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_London)   
print (counter)
map_London

215


### Preparing Foursquare credentials

In [12]:
CLIENT_ID = '0IGO0GF0MHBANY0JGUWRYXQHVVZZHZ23OZKSL5NA1HOQBNW1' # your Foursquare ID
CLIENT_SECRET = 'FIDY1VPGAN55RLWQXJQHN02UDGUDTU3VGWVXFHA2Q5N5QJA5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0IGO0GF0MHBANY0JGUWRYXQHVVZZHZ23OZKSL5NA1HOQBNW1
CLIENT_SECRET:FIDY1VPGAN55RLWQXJQHN02UDGUDTU3VGWVXFHA2Q5N5QJA5


### Preparing getNearbyVenues function to query Foursquare for 100 venues per borough within a 10 km radius

In [128]:
def getNearbyVenues(names, latitudes, longitudes, radius=10000):
    Counter = 0
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(Counter, name)
        Counter = Counter + 1
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    print(nearby_venues.shape)
    return(nearby_venues)

In [129]:
uk_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

0 Rhondda Cynon Taf
1 Merthyr Tydfil
2 Denbighshire
3 Dundee City
4 Cardiff
5 Newport
6 Swansea
7 Midlothian
8 Sunderland
9 Gateshead
10 South Tyneside
11 Vale of Glamorgan
12 Middlesbrough
13 Blaenau Gwent
14 Blackpool
15 Oldham
16 Conwy
17 Knowsley
18 Brent
19 Cumbria
20 Sheffield
21 Neath Port Talbot
22 St. Helens
23 Harrow
24 Wrexham
25 Southwark
26 Bury
27 Wirral
28 Inverclyde
29 Barnsley
30 Croydon
31 Carmarthenshire
32 Bromley
33 Walsall
34 Caerphilly
35 Bedford
36 Belfast
37 Wolverhampton
38 County Durham
39 Torfaen
40 Lambeth
41 Wigan
42 West Dunbartonshire
43 Warrington
44 East Dunbartonshire
45 Rotherham
46 Salford
47 Darlington
48 Sutton
49 Monmouthshire
50 Stockport
51 Rochdale
52 Reading
53 Bridgend
54 Hammersmith and Fulham
55 Bolton
56 Angus
57 Hartlepool
58 Newcastle upon Tyne
59 Luton
60 Ealing
61 Trafford
62 Sefton
63 Gwynedd
64 Merton
65 Cheshire West and Chester
66 Falkirk
67 Barnet
68 Liverpool
69 Tameside
70 Lewisham
71 Cheshire East
72 Clackmannanshire
73 Kensin

In [130]:
print(uk_venues.shape)

(15425, 7)


### Total of 15425 venues are gathered across 215 boroughs

In [131]:
uk_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aberdeen City,100,100,100,100,100,100
Aberdeenshire,7,7,7,7,7,7
Angus,14,14,14,14,14,14
Antrim and Newtownabbey,100,100,100,100,100,100
Ards and North Down,68,68,68,68,68,68
Argyll and Bute,7,7,7,7,7,7
Barking and Dagenham,100,100,100,100,100,100
Barnet,100,100,100,100,100,100
Barnsley,100,100,100,100,100,100
Bath and North East Somerset,100,100,100,100,100,100


In [132]:
print('There are {} unique boroughs.'.format(len(uk_venues['Neighborhood'].unique())))

There are 215 unique boroughs.


### One hot encoding to covert the venue details into categorical variables to use in the clustering

In [133]:
# one hot encoding
uk_onehot = pd.get_dummies(uk_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
uk_onehot['Neighborhood'] = uk_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [uk_onehot.columns[-1]] + list(uk_onehot.columns[:-1])
uk_onehot = uk_onehot[fixed_columns]

uk_onehot.head()

Unnamed: 0,Zoo Exhibit,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Waterfront,Whisky Bar,Windmill,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [138]:
df3 = uk_onehot

In [141]:
df3.head(5)

Unnamed: 0,Zoo Exhibit,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Whisky Bar,Windmill,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio,Zoo,Restaurants
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,


In [142]:
df3.shape 

(15425, 360)

In [143]:
uk_grouped = df3.groupby('Neighborhood').sum().reset_index()
uk_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Waterfront,Whisky Bar,Windmill,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio,Zoo
0,Aberdeen City,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,Aberdeenshire,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Angus,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Antrim and Newtownabbey,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,Ards and North Down,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Argyll and Bute,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Barking and Dagenham,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
7,Barnet,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8,Barnsley,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
9,Bath and North East Somerset,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [144]:
uk_grouped.shape

(215, 359)

## We now have the uk_grouped dataframe with all the venues converted to categorical variables and listed in coloums for the 215 boroughs

In [145]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### The below table displays the 20 most frequent venues in each borough

In [146]:
num_top_venues = 20

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = uk_grouped['Neighborhood']

for ind in np.arange(uk_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(uk_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Aberdeen City,Hotel,Bar,Grocery Store,Café,Supermarket,Beer Bar,Park,Restaurant,Coffee Shop,...,Pub,Hotel Bar,Beach,Sandwich Place,Gym / Fitness Center,Italian Restaurant,Theater,Tea Room,Sushi Restaurant,Hostel
1,Aberdeenshire,Gas Station,Grocery Store,Construction & Landscaping,History Museum,Platform,Castle,Zoo,Field,Factory,...,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Film Studio,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop
2,Angus,Supermarket,Gas Station,Fast Food Restaurant,Gastropub,Grocery Store,Café,Sandwich Place,Discount Store,Castle,...,Flea Market,Field,Food Truck,Factory,Falafel Restaurant,Farm,Farmers Market,Food Court,Food & Drink Shop,Fish Market
3,Antrim and Newtownabbey,Restaurant,Coffee Shop,Park,Pizza Place,Café,Bar,Pub,Gym / Fitness Center,Theater,...,Supermarket,Shopping Mall,Sandwich Place,Clothing Store,Trail,Pet Store,Bistro,Italian Restaurant,Mexican Restaurant,Grocery Store
4,Ards and North Down,Coffee Shop,Supermarket,Clothing Store,Bar,Café,Sandwich Place,Pizza Place,Restaurant,Hotel,...,Department Store,Discount Store,Garden Center,Pharmacy,Gastropub,Beach,Breakfast Spot,Gas Station,Park,Shopping Mall


In [147]:
neighborhoods_venues_sorted.shape

(215, 21)

## K- means clustering for a K value of 10

In [148]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 10

uk_clustering = uk_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(uk_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:145] 

array([7, 0, 0, 7, 5, 0, 3, 6, 8, 8, 7, 7, 2, 7, 8, 5, 5, 1, 9, 1, 1, 2,
       5, 6, 6, 3, 1, 5, 5, 8, 5, 4, 7, 0, 0, 8, 0, 8, 1, 6, 4, 5, 0, 0,
       5, 2, 3, 0, 5, 0, 1, 0, 0, 2, 8, 0, 1, 0, 8, 6, 0, 7, 0, 5, 5, 0,
       6, 1, 8, 0, 0, 5, 7, 7, 8, 6, 0, 6, 8, 4, 1, 6, 2, 5, 4, 5, 2, 0,
       2, 3, 5, 0, 1, 4, 4, 5, 2, 3, 5, 2, 4, 0, 2, 2, 7, 6, 0, 5, 2, 8,
       7, 8, 5, 3, 0, 8, 5, 2, 0, 0, 0, 5, 7, 6, 5, 0, 0, 0, 8, 0, 5, 5,
       9, 0, 2, 0, 2, 0, 2, 0, 7, 2, 0, 0, 8], dtype=int32)

In [149]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Aberdeen City,Hotel,Bar,Grocery Store,Café,Supermarket,Beer Bar,Park,Restaurant,Coffee Shop,...,Pub,Hotel Bar,Beach,Sandwich Place,Gym / Fitness Center,Italian Restaurant,Theater,Tea Room,Sushi Restaurant,Hostel
1,Aberdeenshire,Gas Station,Grocery Store,Construction & Landscaping,History Museum,Platform,Castle,Zoo,Field,Factory,...,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Film Studio,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop
2,Angus,Supermarket,Gas Station,Fast Food Restaurant,Gastropub,Grocery Store,Café,Sandwich Place,Discount Store,Castle,...,Flea Market,Field,Food Truck,Factory,Falafel Restaurant,Farm,Farmers Market,Food Court,Food & Drink Shop,Fish Market
3,Antrim and Newtownabbey,Restaurant,Coffee Shop,Park,Pizza Place,Café,Bar,Pub,Gym / Fitness Center,Theater,...,Supermarket,Shopping Mall,Sandwich Place,Clothing Store,Trail,Pet Store,Bistro,Italian Restaurant,Mexican Restaurant,Grocery Store
4,Ards and North Down,Coffee Shop,Supermarket,Clothing Store,Bar,Café,Sandwich Place,Pizza Place,Restaurant,Hotel,...,Department Store,Discount Store,Garden Center,Pharmacy,Gastropub,Beach,Breakfast Spot,Gas Station,Park,Shopping Mall


In [150]:
# add clustering labels

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
#neighborhoods_venues_sorted.head()
uk_merged = df

# merge toronto_grouped with df to add latitude/longitude for each neighborhood
uk_merged = uk_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on ='Neighborhood')
#uk_merged = uk_merged(neighborhoods_venues_sorted.merge, on ='Neighborhood')
#result = df1.merge(df2, on=['Column1'])

uk_merged.head() 

Unnamed: 0,Neighborhood,Cases,Per_100k,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Rhondda Cynon Taf,1617,673.4,51.665164,-3.457756,5,Supermarket,Grocery Store,Train Station,Pub,...,Coffee Shop,Chinese Restaurant,Bike Trail,Furniture / Home Store,Theater,Movie Theater,Restaurant,Warehouse Store,Portuguese Restaurant,American Restaurant
1,Merthyr Tydfil,396,658.0,51.749062,-3.378618,5,Supermarket,Hotel,Pub,Grocery Store,...,Sandwich Place,Pharmacy,Furniture / Home Store,Bike Trail,Shopping Plaza,Bookstore,Light Rail Station,English Restaurant,Trail,Train Station
2,Denbighshire,595,624.1,53.109789,-3.367126,0,Grocery Store,Scenic Lookout,Supermarket,Hotel,...,Pub,Flea Market,Food Court,Event Space,Exhibit,Factory,Falafel Restaurant,Farm,Farmers Market,Field
3,Dundee City,890,596.0,56.473846,-2.966419,8,Hotel,Coffee Shop,Grocery Store,Pub,...,Fast Food Restaurant,Art Museum,Restaurant,Chinese Restaurant,Sandwich Place,Cheese Shop,Scenic Lookout,Science Museum,Skating Rink,College Gym
4,Cardiff,2065,566.9,51.481655,-3.179193,7,Coffee Shop,Pub,Park,Café,...,Rugby Stadium,Vegetarian / Vegan Restaurant,Shopping Mall,Bakery,Soccer Field,Theater,Steakhouse,Beer Bar,Deli / Bodega,Castle


In [151]:
uk_merged.shape

(215, 26)

In [152]:
uk_merged.dropna(subset = ["Cluster Labels"], inplace=True)


In [153]:
uk_merged['Cluster Labels'] = uk_merged['Cluster Labels'].astype('int32')

### Preparing a cluster map

In [154]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(uk_merged['Latitude'], uk_merged['Longitude'], uk_merged['Neighborhood'], uk_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Comparing the average number of infections per borough with the cluster data

In [155]:
uk_merged.groupby('Cluster Labels').mean().sort_values(by=['Per_100k'],ascending=True)

Unnamed: 0_level_0,Per_100k,Latitude,Longitude
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,220.619565,54.516099,-3.500875
9,226.114286,51.986596,-1.836494
1,255.215,52.090073,-1.390103
6,258.118182,51.875844,-0.613702
4,289.9,51.515259,-0.138245
8,297.104,53.294072,-1.74417
3,304.27,51.444859,-0.141936
2,309.184848,52.56752,-1.560627
5,335.325714,53.196993,-2.603724
7,341.352941,53.80746,-2.624473


### Below list displays the number of boroughs per cluster

In [156]:
uk_merged.groupby('Cluster Labels').size()

Cluster Labels
0    46
1    20
2    34
3    10
4     9
5    35
6    11
7    18
8    25
9     7
dtype: int64

### Details and cluster map of Cluster 0 - the cluster with the lowest infection rate

In [172]:
cluster0Neighborhoods = uk_merged.loc[uk_merged['Cluster Labels'] == 0]
cluster0Neighborhoods.head(5)

Unnamed: 0,Neighborhood,Cases,Per_100k,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
2,Denbighshire,595,624.1,53.109789,-3.367126,0,Grocery Store,Scenic Lookout,Supermarket,Hotel,...,Pub,Flea Market,Food Court,Event Space,Exhibit,Factory,Falafel Restaurant,Farm,Farmers Market,Field
16,Conwy,523,446.3,53.146425,-3.759135,0,Café,Hotel,Waterfall,Train Station,...,Restaurant,Gas Station,Athletics & Sports,Trail,Tapas Restaurant,Tea Room,Bed & Breakfast,Grocery Store,Falafel Restaurant,Factory
19,Cumbria,2207,442.4,54.614314,-2.942098,0,Pub,Hotel,Boat or Ferry,Lake,...,Hostel,Tea Room,Resort,Field,Fast Food Restaurant,Farmers Market,Farm,Film Studio,Zoo,Fish & Chips Shop
31,Carmarthenshire,726,387.1,51.89367,-4.217283,0,Clothing Store,Coffee Shop,Pub,Department Store,...,Botanical Garden,Golf Course,Gastropub,Supermarket,Bistro,Castle,Soccer Field,Fast Food Restaurant,Grocery Store,Electronics Store
49,Monmouthshire,340,361.2,51.750106,-2.833333,0,Hotel,Garden Center,Coffee Shop,Gastropub,...,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Food Court,Food Truck,Food,Fish & Chips Shop


In [158]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(uk_merged['Latitude'], uk_merged['Longitude'], uk_merged['Neighborhood'], cluster0Neighborhoods['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Details and cluster map of Cluster 7 - the cluster with the higest infection rate

In [171]:
cluster7Neighborhoods = uk_merged.loc[uk_merged['Cluster Labels'] == 7]
cluster7Neighborhoods.head(5)

Unnamed: 0,Neighborhood,Cases,Per_100k,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
4,Cardiff,2065,566.9,51.481655,-3.179193,7,Coffee Shop,Pub,Park,Café,...,Rugby Stadium,Vegetarian / Vegan Restaurant,Shopping Mall,Bakery,Soccer Field,Theater,Steakhouse,Beer Bar,Deli / Bodega,Castle
9,Gateshead,1003,495.3,54.962579,-1.601929,7,Pub,Café,Indian Restaurant,Park,...,Italian Restaurant,Burger Joint,Supermarket,Soccer Stadium,Theater,Indie Movie Theater,Science Museum,Brewery,Beer Bar,Student Center
35,Bedford,654,381.1,52.136381,-0.467504,7,Pub,Coffee Shop,Café,Park,...,Rock Club,Restaurant,Event Space,Garden Center,Museum,Supermarket,General Entertainment,Golf Course,Lounge,Lake
36,Belfast,1303,381.1,54.596441,-5.930276,7,Coffee Shop,Park,Café,Restaurant,...,Bistro,Trail,Shopping Mall,Mexican Restaurant,Indian Restaurant,Theater,Hotel,Asian Restaurant,Cuban Restaurant,Pool Hall
44,East Dunbartonshire,399,367.3,55.933333,-4.2,7,Coffee Shop,Bar,Italian Restaurant,Restaurant,...,Cocktail Bar,Tea Room,Brewery,Pizza Place,Beer Bar,Seafood Restaurant,Park,Bookstore,Gastropub,English Restaurant


In [162]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(uk_merged['Latitude'], uk_merged['Longitude'], uk_merged['Neighborhood'], cluster7Neighborhoods['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster + 1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters