## IBM DataScience Capstone Project: web_scraping

In [19]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import csv
import json

import os
from dotenv import load_dotenv
load_dotenv()

CLIENT_ID = os.environ["CLIENT_ID"]
CLIENT_SECRET = os.environ["CLIENT_SECRET"]
VERSION = os.environ["VERSION"]

VERSION_2 = os.environ["VERSION_2"]

In the cell above, we have imported all libraries necessary for this project. Additionally, I created an .env file to store my Api credentials and keep them secured.

Getting the source webpage and assigining the variable source to it and iniatilizing the beautifulsoup object to soup

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

src = requests.get(url).text 
soup = BeautifulSoup(src, 'lxml')

On this function, we scrap the given url in order to extract and parse to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe 

In [3]:
def url_par(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    
    for table in soup.find_all('table', class_="wikitable sortable"):
    # We search for the table that stores the info we want inside the class "wikitable_..."
        n_columns = 0
        n_rows=0
        column_names = []
        
        for row in table.find_all('tr'):
            td_tags = row.find_all('td')
            if len(td_tags) > 0:
                n_rows+=1
                if n_columns == 0:
                    n_columns = len(td_tags)
                        
            th_tags = row.find_all('th') 
            if len(th_tags) > 0 and len(column_names) == 0:
                for th in th_tags:
                    column_names.append(th.get_text())
            columns = row.find_all('td')
    
        if len(column_names) > 0 and len(column_names) != n_columns:
            raise Exception("Column titles != number columns")
    
        columns = column_names if len(column_names) > 0 else range(0,n_columns)
        
        df = pd.DataFrame(columns = columns, index= range(0,n_rows))
        row_marker = 0
       
        for row in table.find_all('tr'):
            column_marker = 0
            columns = row.find_all('td')
            for column in columns:
                df.iat[row_marker,column_marker] = column.get_text()
                column_marker += 1
            if len(columns) > 0:
                row_marker += 1
                    
        for col in df:
            try:
                df[col] = df[col].astype(float)
                
            except ValueError:
                pass
            
        return df

def cleanup(df):
    df = df[df.Borough != 'Not assigned']
    df = df[df['Neighbourhood\n'] != 'Not assigned']

    df = df.replace('\n',' ', regex=True)
    return df

In [4]:
table_init = url_par(url)
df_fin = cleanup(table_init)
df_fin.head()

Unnamed: 0,Postcode,Borough,Neighbourhood\n
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [5]:
df = df_fin.groupby(['Postcode','Borough'])['Neighbourhood\n'].apply(lambda x: ", ".join(x.astype(str))).reset_index()
df_final = df.sample(frac=1).reset_index(drop=True)

print("The dataframe shape is: ",df_final.shape)
display(df_final.head(10))

The dataframe shape is:  (103, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood\n
0,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village W..."
1,M4W,Downtown Toronto,Rosedale
2,M4M,East Toronto,Studio District
3,M4H,East York,Thorncliffe Park
4,M6B,North York,Glencairn
5,M4L,East Toronto,"The Beaches West , India Bazaar"
6,M4G,East York,Leaside
7,M2L,North York,"Silver Hills , York Mills"
8,M8W,Etobicoke,"Alderwood , Long Branch"
9,M4X,Downtown Toronto,"Cabbagetown , St. James Town"


In the cell above we finally get the table requested for the exercise. Therefore, we have the dataframe with the following requirements matched:
- The df consist of three columns: PostalCode, Borough, and Neighborhood
- We have ignored the cells with a borough that is Not assigned. Same with the neighborhood one.
- As more than one neighborhood can exist in one postal code area, these two rows will be combined into one row with the neighborhoods separated with a comma.

## PART 2

In order to utilize the Foursquare location data, we need to get the latitude and the longitude coordinates of each neighborhood. We can use a link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

Therefore, we created the requested dataframe:

In [6]:
postal_codes = df_final['Postcode'].values

url_geo ="http://cocl.us/Geospatial_data"

geo_data=pd.read_csv(url_geo)
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
df_geo = pd.merge(left=df_final, right=geo_data, left_on='Postcode', right_on='Postal Code')
df_geo.head()

Unnamed: 0,Postcode,Borough,Neighbourhood\n,Postal Code,Latitude,Longitude
0,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village W...",M1M,43.716316,-79.239476
1,M4W,Downtown Toronto,Rosedale,M4W,43.679563,-79.377529
2,M4M,East Toronto,Studio District,M4M,43.659526,-79.340923
3,M4H,East York,Thorncliffe Park,M4H,43.705369,-79.349372
4,M6B,North York,Glencairn,M6B,43.709577,-79.445073


## Part 3

Finally, we are going to reproduce the NY City analysis here, at Toronto.

First, we import all the libraries we would like to use:

In [53]:
from geopy.geocoders import Nominatim 
import numpy as np
from pandas.io.json import json_normalize # Tranform JSON file into a pandas dataframe

# Visualisation
import matplotlib.cm as cm
import matplotlib.colors as colores
import folium 


#Modeling
from sklearn.cluster import KMeans

Then, we get the geographical coordinates of Toronto to start the clusterization

In [9]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [10]:
map_geo = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_geo['Latitude'], df_geo['Longitude'], df_geo['Neighbourhood\n']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_geo)  
    
map_geo

In [29]:
# Here, we define a function to obtain the categories
def get_category(row):
    try:
        cat_list = row['Category']
    except:
        cat_list = row['venue.categories']
        
    if len(cat_list) == 0:
        return None
    else:
        return cat_list[0]['name']
    

### Here we call the Foursquare API to get the data 

explore_df_list = []

for i, neigh_name in enumerate(df_geo['Neighbourhood\n']):  
    #try :         
        ### Getting the data of neighbourhood
    neigh_name = df_geo.loc[i, 'Neighbourhood\n'] #neigh_name
    neigh_lat = df_geo.loc[i, 'Latitude'] #Latitude
    neigh_lng = df_geo.loc[i, 'Longitude'] #Longitude
    radius = 500 
    LIMIT = 100 
    # LIMIT up to 100 venues

    url = f'https://api.foursquare.com/v2/venues/explore?client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&ll={neigh_lat},{neigh_lng}&v={VERSION}&radius={radius}&limit={LIMIT}'



            # Here we call the API

    
    results = json.loads(requests.get(url).text)
    results = results['response']['groups'][0]['items']

    near = pd.json_normalize(results) # Flattens JSON normalisation
    
    # Filtering the columns
    filtered_columns = ["venue.name","venue.categories","venue.location.lat","venue.location.lng"]
    near = near.filter(items=filtered_columns)

    # Renaming the columns
    columns = ['Name', 'Category', 'Latitude', 'Longitude']
    near.columns = columns

    # Gets the categories
    near['Category'] = near.apply(get_category, axis=1)
    # Gets the data required
    for i, name in enumerate(near['Name']):
        explore_df_list.append([neigh_name, neigh_lat, neigh_lng] + near.loc[i, :].values.tolist())
    
#except Exception as e:
#print(e)

Unnamed: 0,Name,Category,Latitude,Longitude
0,Have A Nap Motel,Motel,43.718256,-79.240135
1,Vincent's Spot,American Restaurant,43.717002,-79.242353


Unnamed: 0,Name,Category,Latitude,Longitude
0,Rosedale Park,Playground,43.682328,-79.378934
1,Whitney Park,Park,43.682036,-79.373788
2,Alex Murray Parkette,Park,43.6783,-79.382773
3,Milkman's Lane,Trail,43.676352,-79.373842


Unnamed: 0,Name,Category,Latitude,Longitude
0,Ed's Real Scoop,Ice Cream Shop,43.660656,-79.342019
1,Leslieville Pumps,Sandwich Place,43.660892,-79.340626
2,Queen Books,Bookstore,43.660651,-79.342267
3,Te Aro,Coffee Shop,43.661373,-79.338577
4,The Bone House,Pet Store,43.660894,-79.341097
5,Hooked,Fish Market,43.660407,-79.343257
6,Purple Penguin Cafe,Café,43.660501,-79.342565
7,Mercury Espresso Bar,Coffee Shop,43.660806,-79.341241
8,Leslieville,Neighborhood,43.66207,-79.337856
9,Brick Street Breads,Bakery,43.660685,-79.342501


Unnamed: 0,Name,Category,Latitude,Longitude
0,Costco,Warehouse Store,43.707051,-79.348093
1,Iqbal Kebab & Sweet Centre,Indian Restaurant,43.705923,-79.351521
2,Fit4Less,Gym,43.705689,-79.346018
3,Bikram Yoga East York,Yoga Studio,43.70545,-79.351448
4,Shoppers Drug Mart,Pharmacy,43.70581,-79.347044
5,Hero Certified Burgers,Burger Joint,43.705511,-79.347064
6,Iqbal foods,Grocery Store,43.705751,-79.352054
7,Subway,Sandwich Place,43.704596,-79.34967
8,Hakka Garden,Indian Restaurant,43.704578,-79.34977
9,Tim Hortons,Coffee Shop,43.70509,-79.350545


Unnamed: 0,Name,Category,Latitude,Longitude
0,Miyako Sushi Restaurant,Japanese Restaurant,43.709111,-79.44393
1,Domino's Pizza,Pizza Place,43.70717,-79.442658
2,Glencairn Subway Station,Metro Station,43.708872,-79.440801
3,"Chalker's Pub, Billiards and Bistro",Pub,43.705747,-79.442378
4,Fraserwood Park,Park,43.71355,-79.442482


Unnamed: 0,Name,Category,Latitude,Longitude
0,System Fitness,Gym,43.667171,-79.312733
1,British Style Fish & Chips,Fish & Chips Shop,43.668723,-79.317139
2,The Burger's Priest,Fast Food Restaurant,43.666731,-79.315556
3,Brett's Ice Cream,Ice Cream Shop,43.667222,-79.312831
4,Woodbine Park,Park,43.66486,-79.315109
5,Casa di Giorgio,Italian Restaurant,43.666645,-79.315204
6,Godspeed Brewery,Brewery,43.67262,-79.319228
7,O Sushi,Sushi Restaurant,43.666684,-79.316614
8,LCBO,Liquor Store,43.666732,-79.314966
9,Murphy's Law,Pub,43.667319,-79.312656


Unnamed: 0,Name,Category,Latitude,Longitude
0,Local Leaside,Sports Bar,43.710012,-79.363514
1,Olde Yorke Fish & Chips,Fish & Chips Shop,43.706141,-79.361829
2,Rack Attack,Sporting Goods Shop,43.706934,-79.362261
3,LCBO,Liquor Store,43.710571,-79.360287
4,CrossFit Toronto,Gym,43.7081,-79.35906
5,The Leaside Pub,Restaurant,43.710429,-79.363547
6,Enduro Sport,Bike Shop,43.706059,-79.361835
7,Aroma Espresso Bar,Coffee Shop,43.705611,-79.360775
8,Longo's,Supermarket,43.706433,-79.359753
9,Bulk Barn,Grocery Store,43.706116,-79.360541


Unnamed: 0,Name,Category,Latitude,Longitude
0,Mind Over Matter Karate School,Martial Arts Dojo,43.756101,-79.371296
1,Windfields Cafeteria,Cafeteria,43.755862,-79.370649
2,Vyner Greenbelt,Park,43.759642,-79.36959


Unnamed: 0,Name,Category,Latitude,Longitude
0,Il Paesano Pizzeria & Restaurant,Pizza Place,43.60128,-79.545028
1,Toronto Gymnastics International,Gym,43.599832,-79.542924
2,Timothy's Pub,Pub,43.600165,-79.544699
3,Tim Hortons,Coffee Shop,43.602396,-79.545048
4,Pizza Pizza,Pizza Place,43.60534,-79.547252
5,Subway,Sandwich Place,43.599152,-79.544395
6,Rexall,Pharmacy,43.601951,-79.545694
7,Alderwood Pool,Pool,43.601802,-79.547247
8,Sir Adam Beck Rink,Skating Rink,43.602526,-79.547455


Unnamed: 0,Name,Category,Latitude,Longitude
0,Butter Chicken Factory,Indian Restaurant,43.667072,-79.369184
1,Cranberries,Diner,43.667843,-79.369407
2,Kingyo Toronto,Japanese Restaurant,43.665895,-79.368415
3,Merryberry Cafe + Bistro,Café,43.66663,-79.368792
4,F'Amelia,Italian Restaurant,43.667536,-79.368613
5,Murgatroid,Restaurant,43.667381,-79.369311
6,Cabbagetown Brew,Café,43.666923,-79.369289
7,Absolute Bakery & Café,Bakery,43.667469,-79.369277
8,Fair Trade Jewellery Co.,Jewelry Store,43.665348,-79.368362
9,Toronto Dance Theatre,General Entertainment,43.666232,-79.367075


ValueError: Length mismatch: Expected axis has 0 elements, new values have 4 elements

#### We now turn our results to a DF

In [31]:
tor_df = pd.DataFrame([item for item in explore_df_list])
tor_df.columns = ['Neighbourhood', 'Neighbourhood Latitude', 'Neighbourhood Longitude', 'Venue Name', 'Venue Category', 'Venue Latitude', 'Venue Longitude']
tor_df.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude
0,"Cliffcrest , Cliffside , Scarborough Village W...",43.716316,-79.239476,Have A Nap Motel,Motel,43.718256,-79.240135
1,"Cliffcrest , Cliffside , Scarborough Village W...",43.716316,-79.239476,Vincent's Spot,American Restaurant,43.717002,-79.242353
2,Rosedale,43.679563,-79.377529,Rosedale Park,Playground,43.682328,-79.378934
3,Rosedale,43.679563,-79.377529,Whitney Park,Park,43.682036,-79.373788
4,Rosedale,43.679563,-79.377529,Alex Murray Parkette,Park,43.6783,-79.382773


### One hot encoding
Now, I added neighborhood column to the df and moved it to the first column.

In [33]:
toronto_onehot = pd.get_dummies(tor_df[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Neighbourhood'] = tor_df['Neighbourhood'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Arts & Crafts Store,Bagel Shop,Bakery,Bank,Bar,Beer Store,Bike Shop,Bookstore,...,Steakhouse,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Warehouse Store,Wine Bar,Yoga Studio
0,"Cliffcrest , Cliffside , Scarborough Village W...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Cliffcrest , Cliffside , Scarborough Village W...",1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,American Restaurant,Arts & Crafts Store,Bagel Shop,Bakery,Bank,Bar,Beer Store,Bike Shop,Bookstore,...,Steakhouse,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Warehouse Store,Wine Bar,Yoga Studio
0,"Alderwood , Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Cabbagetown , St. James Town",0.0,0.0,0.0,0.045455,0.022727,0.0,0.022727,0.0,0.0,...,0.0,0.0,0.0,0.022727,0.022727,0.0,0.0,0.0,0.0,0.0
2,"Cliffcrest , Cliffside , Scarborough Village W...",0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Leaside,0.0,0.0,0.03125,0.0,0.03125,0.0,0.03125,0.03125,0.0,...,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
# With this function, we get the most common venues in our df. This way, we can create columns according 
# to number of top venues
def common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [36]:
num_class_venues = 11
indicators = ['st', 'nd', 'rd']

# Columns as number of class venues
columns = ['Neighbourhood']
for ind in np.arange(num_class_venues):
    columns.append(f'{ind+1} Most-common Type Venue')

# Create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = common_venues(toronto_grouped.iloc[ind, :], num_class_venues)

venues_sorted.head()

Unnamed: 0,Neighbourhood,1 Most-common Type Venue,2 Most-common Type Venue,3 Most-common Type Venue,4 Most-common Type Venue,5 Most-common Type Venue,6 Most-common Type Venue,7 Most-common Type Venue,8 Most-common Type Venue,9 Most-common Type Venue,10 Most-common Type Venue,11 Most-common Type Venue
0,"Alderwood , Long Branch",Pizza Place,Pharmacy,Skating Rink,Pool,Pub,Sandwich Place,Coffee Shop,Gym,Fish Market,Fish & Chips Shop,Comfort Food Restaurant
1,"Cabbagetown , St. James Town",Coffee Shop,Pub,Market,Restaurant,Pizza Place,Café,Italian Restaurant,Bakery,Liquor Store,Indian Restaurant,Chinese Restaurant
2,"Cliffcrest , Cliffside , Scarborough Village W...",American Restaurant,Motel,Fast Food Restaurant,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Electronics Store,Farmers Market,Fish & Chips Shop
3,Glencairn,Pizza Place,Park,Pub,Japanese Restaurant,Metro Station,Electronics Store,Convenience Store,Coworking Space,Deli / Bodega,Department Store,Dessert Shop
4,Leaside,Coffee Shop,Sporting Goods Shop,Furniture / Home Store,Burger Joint,Brewery,Fish & Chips Shop,Grocery Store,Gym,Electronics Store,Dessert Shop,Department Store


### K-Means

In [43]:
k = 5
tor_clusters = toronto_grouped.drop('Neighbourhood', 1)

# Run k-means clustering
kmeans = KMeans(n_clusters = k, random_state = 0).fit(tor_clusters)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# Add clustering labels
venues_sorted.insert(0, 'K-Labels', kmeans.labels_)

In [45]:
df_final = df_final.rename(columns = {'Neighbourhood\n':'Neighbourhood'})
df_final = pd.merge(left=df_final, right=geo_data, left_on='Postcode', right_on='Postal Code')
tor_merged = df_final

tor_merged = tor_merged.join(venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
tor_merged.dropna(inplace = True)
tor_merged['K-Labels'] = tor_merged['K-Labels'].astype(int)
tor_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude,K-Labels,Cluster Labels,1 Most-common Type Venue,2 Most-common Type Venue,3 Most-common Type Venue,4 Most-common Type Venue,5 Most-common Type Venue,6 Most-common Type Venue,7 Most-common Type Venue,8 Most-common Type Venue,9 Most-common Type Venue,10 Most-common Type Venue,11 Most-common Type Venue
0,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village W...",M1M,43.716316,-79.239476,2,2.0,American Restaurant,Motel,Fast Food Restaurant,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Electronics Store,Farmers Market,Fish & Chips Shop
1,M4W,Downtown Toronto,Rosedale,M4W,43.679563,-79.377529,4,4.0,Park,Playground,Trail,Yoga Studio,Farmers Market,Convenience Store,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner
2,M4M,East Toronto,Studio District,M4M,43.659526,-79.340923,1,1.0,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant,Brewery,Gastropub,Diner,Middle Eastern Restaurant,Latin American Restaurant,Ice Cream Shop
3,M4H,East York,Thorncliffe Park,M4H,43.705369,-79.349372,1,1.0,Indian Restaurant,Sandwich Place,Yoga Studio,Coffee Shop,Park,Gas Station,Pharmacy,Pizza Place,Liquor Store,Restaurant,Burger Joint
4,M6B,North York,Glencairn,M6B,43.709577,-79.445073,0,0.0,Pizza Place,Park,Pub,Japanese Restaurant,Metro Station,Electronics Store,Convenience Store,Coworking Space,Deli / Bodega,Department Store,Dessert Shop


### Visualisation

In [59]:
map_Kmeans = folium.Map(location=[latitude, longitude], zoom_start=11)


# Color for the clusters
x = np.arange(k)
y = [i + x + (i*x)**2 for i in range(k)]

colors_list = cm.rainbow(np.linspace(0, 1, len(y)))
rainbow = [colores.rgb2hex(i) for i in colors_list]

# Markers to the map
markers_colors = []
for lat, lon, i, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighbourhood'], tor_merged['K-Labels']):
    label = folium.Popup(str(i) + ' (Cluster ' + str(cluster) + ')', parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_Kmeans)
       
map_Kmeans