# Segmenting and Clustering Neighborhoods in Toronto(Project-1 )

# Part-1

In [1]:
#importing required libraries
import pandas as pd
import numpy as np
import folium
import geopy
print('libraries imported')

libraries imported


### Scraping data from wikipedia

#### Gathering data from table on wikipedia

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
dfs=pd.read_html(url,header=0)
for df in dfs:
    print(df)

    Postcode           Borough  \
0        M1A      Not assigned   
1        M2A      Not assigned   
2        M3A        North York   
3        M4A        North York   
4        M5A  Downtown Toronto   
5        M5A  Downtown Toronto   
6        M6A        North York   
7        M6A        North York   
8        M7A      Queen's Park   
9        M8A      Not assigned   
10       M9A         Etobicoke   
11       M1B       Scarborough   
12       M1B       Scarborough   
13       M2B      Not assigned   
14       M3B        North York   
15       M4B         East York   
16       M4B         East York   
17       M5B  Downtown Toronto   
18       M5B  Downtown Toronto   
19       M6B        North York   
20       M7B      Not assigned   
21       M8B      Not assigned   
22       M9B         Etobicoke   
23       M9B         Etobicoke   
24       M9B         Etobicoke   
25       M9B         Etobicoke   
26       M9B         Etobicoke   
27       M1C       Scarborough   
28       M1C  

In [3]:
df=dfs[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Cleaning the data fetched

#### Renaming columns

In [4]:
df.columns=['PostalCode','Borough','Neighborhood']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Handling rows with not assigned borough

In [5]:
df=df[df.Borough != 'Not assigned'].reset_index(drop=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


#### Combining postal code with multiple neighborhood

In [6]:
df_grouped=df.groupby(['PostalCode','Borough'],as_index=False).agg(lambda x: ','.join(x))
df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### Handling rows with not assigned neighborhood with assigned borough

In [7]:
na_neigh_rows = df_grouped.Neighborhood == 'Not assigned'
df_grouped.loc[na_neigh_rows, 'Neighborhood'] = df_grouped.loc[na_neigh_rows, 'Borough']
df_grouped[na_neigh_rows]

Unnamed: 0,PostalCode,Borough,Neighborhood
85,M7A,Queen's Park,Queen's Park


In [8]:
df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [9]:
df_grouped.shape

(103, 3)

# Part-2

In [10]:
%matplotlib inline
import geocoder
import requests
from pandas.io.json import json_normalize
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
print('Libraries imported')

Libraries imported


### Getting Latitude and Longitude of the neighborhoods

#### Downloading dataset from link provided in the course

In [11]:
!wget -O 'toronto_coordinates.csv' https://cocl.us/Geospatial_data
print('Coordinates downloaded!')
coors = pd.read_csv('toronto_coordinates.csv')

--2019-10-17 20:54:21--  https://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 161.202.50.39
Connecting to cocl.us (cocl.us)|161.202.50.39|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-10-17 20:54:23--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 103.116.4.197
Connecting to ibm.box.com (ibm.box.com)|103.116.4.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-10-17 20:54:24--  https://ibm.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Reusing existing connection to ibm.box.com:443.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-10-17 20:5

In [12]:
coors.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Renaming column names

In [13]:
coors.columns=['PostalCode','Latitude','Longitude']
coors.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merging two dataframes into one dataframe

In [14]:
toronto_df_cord=pd.merge(df_grouped,coors,on='PostalCode')
toronto_df_cord.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [15]:
toronto_df_cord.shape

(103, 5)

# Part-3

## Exploring and cluster the neighborhoods in Toronto

#### Import kMeans for clustering

In [16]:
from sklearn.cluster import KMeans

In [17]:
print('The dataframe has {} neighborhoods and {} boroughs.'.format(len(df['Neighborhood'].unique()),toronto_df_cord['Borough'].unique().shape[0]))

The dataframe has 209 neighborhoods and 11 boroughs.


#### Creating map of Toronto with boroughs on top

In [18]:
map_toronto=folium.Map(location=[43.651070, -79.347015],zoom_start=10)

#adding markers to map
for lat,lng,brh,neigh in zip(toronto_df_cord['Latitude'],toronto_df_cord['Longitude'],toronto_df_cord['Borough'],toronto_df_cord['Neighborhood']) :
    label='{},{}'.format(neigh,brh)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7).add_to(map_toronto)

map_toronto

#### Taking only those boroughs for analysis that has toronto in its name

In [19]:
toronto_boroughs=['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']
toronto_boroughs_df=toronto_df_cord[toronto_df_cord['Borough'].isin(toronto_boroughs)].reset_index(drop=True)
print('Shape:',toronto_boroughs_df.shape)
toronto_boroughs_df.head()

Shape: (38, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


#### Creating map of boroughs with toronto in its name

In [20]:
map_toronto_boroughs=folium.Map(location=[43.651070, -79.347015],zoom_start=12)

#adding markers to map
for lat,lng,brh,neigh in zip(toronto_boroughs_df['Latitude'],toronto_boroughs_df['Longitude'],toronto_boroughs_df['Borough'],toronto_boroughs_df['Neighborhood']) :
    label='{},{}'.format(neigh,brh)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7).add_to(map_toronto_boroughs)

map_toronto_boroughs

In [21]:
CLIENT_ID = '0DWHJ05IBVSSZLFJBNYMJ0VRVZXSDG15UWQWJXMVUL1CRC4X' # your Foursquare ID
CLIENT_SECRET = '3S4WZFTO204JYQX0Z1F4SW02WBZBAETWT5ULBMBA143FDH14' # your Foursquare Secret
VERSION = '20191017' # Foursquare API version

#### Finding maximum 100 venues in boroughs with toronto as name

In [22]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(toronto_boroughs_df['Latitude'], toronto_boroughs_df['Longitude'], toronto_boroughs_df['PostalCode'], toronto_boroughs_df['Borough'], toronto_boroughs_df['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

#### Converting venues array into pandas dataframe

In [23]:
venues_df=pd.DataFrame(venues)
venues_df.columns=['PostalCode','Borough','Neighborhood','BoroughLatitude','BoroughLongitude','Venue','VenueLatitude','VenueLongitude','VenueCategory']
print('Shape:',venues_df.shape)
venues_df.head()

Shape: (1709, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,Venue,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


#### Checking number of venues for each postal code

In [24]:
venues_df.groupby(['PostalCode'])['Venue'].count()

PostalCode
M4E      4
M4K     43
M4L     19
M4M     39
M4N      3
M4P      7
M4R     22
M4S     36
M4T      2
M4V     15
M4W      5
M4X     46
M4Y     87
M5A     52
M5B    100
M5C    100
M5E     55
M5G     86
M5H    100
M5J    100
M5K    100
M5L    100
M5N      2
M5P      4
M5R     21
M5S     34
M5T    100
M5V     17
M5W     98
M5X    100
M6G     15
M6H     17
M6J     66
M6K     27
M6P     23
M6R     15
M6S     33
M7Y     16
Name: Venue, dtype: int64

#### Checking number of unique venues

In [25]:
print('No. of unique venues :',len(venues_df['VenueCategory'].unique()))

No. of unique venues : 241


#### Analyzing venues in each area

In [26]:
#one hot encoding
toronto_venue_onehot=pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

#adding postal code,borough, neighborhood and venue columns
toronto_venue_onehot['PostalCode']=venues_df['PostalCode']
toronto_venue_onehot['Borough']=venues_df['Borough']
toronto_venue_onehot['Neighborhood']=venues_df['Neighborhood']

#moving postal code,borough, neighborhood and venue columns to starting
cols = toronto_venue_onehot.columns.tolist()
cols.insert(0, cols.pop(cols.index('PostalCode')))
cols.insert(1, cols.pop(cols.index('Borough')))
cols.insert(2, cols.pop(cols.index('Neighborhood')))

toronto_venue_onehot=toronto_venue_onehot.reindex(columns=cols)

print('Shape:',toronto_venue_onehot.shape)
toronto_venue_onehot.head()

Shape: (1709, 243)


Unnamed: 0,PostalCode,Borough,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4K,East Toronto,"The Danforth West,Riverdale",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Finding frequecy of each type of venue in each borough

In [27]:
toronto_venue_freq=toronto_venue_onehot.groupby(['PostalCode','Borough','Neighborhood']).mean().reset_index()
print('Shape:',toronto_venue_freq.shape)
toronto_venue_freq.head()

Shape: (38, 243)


Unnamed: 0,PostalCode,Borough,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West,Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256
2,M4L,East Toronto,"The Beaches West,India Bazaar",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Getting 10 most occuring venues in each borough

In [28]:
num_top_venues=10

#creating columns
indicator=['st','nd','rd']
areaCol=['PostalCode','Borough','Neighborhood']
freqCol=[]
for n in range(1,num_top_venues+1):
    try:
        freqCol.append('{}{} most common venue'.format(n,indicator[n-1]))
    except:
        freqCol.append('{}{} most common venue'.format(n,'th'))
cols=areaCol+freqCol

#creating new dataframe
toronto_venue_top=pd.DataFrame(columns=cols)
toronto_venue_top['PostalCode']=toronto_venue_freq['PostalCode']
toronto_venue_top['Borough']=toronto_venue_freq['Borough']
toronto_venue_top['Neighborhood']=toronto_venue_freq['Neighborhood']
for n in range(toronto_venue_freq.shape[0]):
    row_categories = toronto_venue_freq.iloc[n, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    toronto_venue_top.iloc[n, 3:] = row_categories_sorted.index.values[0:num_top_venues]

toronto_venue_top.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,M4E,East Toronto,The Beaches,Health Food Store,Pub,Trail,Discount Store,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant
1,M4K,East Toronto,"The Danforth West,Riverdale",Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Yoga Studio,Bookstore,Brewery,Bubble Tea Shop,Café
2,M4L,East Toronto,"The Beaches West,India Bazaar",Park,Italian Restaurant,Pet Store,Pizza Place,Gym,Pub,Movie Theater,Sandwich Place,Burrito Place,Burger Joint
3,M4M,East Toronto,Studio District,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Chinese Restaurant,Bar,Stationery Store,Clothing Store,Middle Eastern Restaurant
4,M4N,Central Toronto,Lawrence Park,Park,Swim School,Bus Line,Yoga Studio,Dog Run,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space


### Clustering Areas

In [29]:
k_clusters=5

toronto_venue_top_clustering=toronto_venue_freq.drop(['PostalCode','Borough','Neighborhood'],axis=1)

k_means=KMeans(n_clusters=k_clusters,random_state=0)
k_means.fit(toronto_venue_top_clustering)

toronto_df_clustered=toronto_boroughs_df
toronto_df_clustered['Cluster']=k_means.labels_

toronto_df_clustered=toronto_df_clustered.join(toronto_venue_top.drop(['Borough','Neighborhood'],1).set_index('PostalCode'),on='PostalCode')
#sorting freqColumns and Cluster column collectively

toronto_df_clustered=toronto_df_clustered.sort_values(['Cluster']+freqCol).reset_index(drop=True)
toronto_df_clustered

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,M5V,Downtown Toronto,"CN Tower,Bathurst Quay,Island airport,Harbourf...",43.628947,-79.39442,0,Airport Service,Airport Lounge,Airport Terminal,Boutique,Harbor / Marina,Sculpture Garden,Boat or Ferry,Bar,Plane,Coffee Shop
1,M6J,West Toronto,"Little Portugal,Trinity",43.647927,-79.41975,0,Bar,Coffee Shop,Men's Store,Asian Restaurant,Café,Vietnamese Restaurant,French Restaurant,Pizza Place,Cocktail Bar,Restaurant
2,M6R,West Toronto,"Parkdale,Roncesvalles",43.64896,-79.456325,0,Breakfast Spot,Gift Shop,Dessert Shop,Bookstore,Eastern European Restaurant,Bar,Bank,Movie Theater,Restaurant,Italian Restaurant
3,M5S,Downtown Toronto,"Harbord,University of Toronto",43.662696,-79.400049,0,Café,Bookstore,Bakery,Italian Restaurant,Bar,Japanese Restaurant,Restaurant,Beer Bar,Beer Store,Sandwich Place
4,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Chinese Restaurant,Bar,Stationery Store,Clothing Store,Middle Eastern Restaurant
5,M6K,West Toronto,"Brockton,Exhibition Place,Parkdale Village",43.636847,-79.428191,0,Café,Yoga Studio,Breakfast Spot,Performing Arts Venue,Coffee Shop,Intersection,Burrito Place,Sandwich Place,Caribbean Restaurant,Restaurant
6,M5H,Downtown Toronto,"Adelaide,King,Richmond",43.650571,-79.384568,0,Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Sushi Restaurant,Hotel,Restaurant,Asian Restaurant,American Restaurant
7,M5K,Downtown Toronto,"Design Exchange,Toronto Dominion Centre",43.647177,-79.381576,0,Coffee Shop,Café,Hotel,Bar,Restaurant,Steakhouse,Seafood Restaurant,Italian Restaurant,Gastropub,Deli / Bodega
8,M5L,Downtown Toronto,"Commerce Court,Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gastropub,Seafood Restaurant,Gym,Steakhouse,Deli / Bodega
9,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Hotel,Restaurant,Clothing Store,Bakery,Italian Restaurant,Cosmetics Shop,Gastropub,Breakfast Spot


#### Visualizing these clusters on map

In [30]:
map_clusters=folium.Map(location=[43.651070, -79.347015],zoom_start=12)

# set color scheme for the clusters
x = np.arange(k_clusters)
ys = [i+x+(i*x)**2 for i in range(k_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_df_clustered['Latitude'], toronto_df_clustered['Longitude'], toronto_df_clustered['PostalCode'], toronto_df_clustered['Borough'], toronto_df_clustered['Neighborhood'], toronto_df_clustered['Cluster']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Clusters Formed:
### 1.  0-cluster with Airports, Shopping Complexes and Cafes
### 2.  1-cluster with mostly parks and playgrounds
### 3.  2-cluster with swimming schools and markets
### 4.  3-cluster with home service, garden and small businesses
### 5.  4-cluster with businesses mostly