In [1]:
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

<b> Getting plain html from wiki page and viewing as the nested html version </b>

In [2]:
# get plain html of wiki needs scraping
scrape_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_url = requests.get(scrape_url).text

# view as nested version 
soup = BeautifulSoup(html_url,'lxml')
# print(soup.prettify())

<b> trying to find a tag to separate table </b>

In [3]:
# separate table
table = soup.find('table',{'class':'wikitable sortable'})

<b> trying to find a tag to separate fields </b>

In [4]:
# separate fields
table_parts = table.find_all('td')

<b> initiating empty dataframe with column names, 
 and looping over the html table, and appending 1 row at a time </b>

In [5]:
# initiate dataframe
col_names = ['Postcode','Borough','Neighborhood']
df = pd.DataFrame(columns=col_names)

In [6]:
# loop over separated html and append to df row by row

for i in range(0,len(table_parts),3):
    
    pc, boro, neigh = table_parts[i].text.strip(), table_parts[i+1].text.strip(), table_parts[i+2].text.strip()
    
    df = df.append({'Postcode':pc,
                   'Borough':boro,
                   'Neighborhood':neigh},ignore_index=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


<b> removing rows where borough is not assigned and 
 simply grouping by postcode and borough and aggregating 
 neighborhood by comma </b>

In [7]:
# removing rows where borough = not assigned
df['Borough'].replace(to_replace='Not assigned', value = np.nan ,inplace=True)
df.dropna(inplace=True)

# group by postcode and borough and join Neighborhood by ','
dft = df.groupby(['Postcode','Borough'])['Neighborhood'].apply(','.join).reset_index()
dft.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


<b>Only 1 row has neighborhood as not assigned, simply replacing that with the Borough name</b>

In [8]:
dft['Neighborhood'].replace(to_replace='Neighborhood',value="Queen's Park",inplace=True)

In [9]:
dft.shape

(103, 3)

In [10]:
# creating a df to join to the main df 
postcode_lat_lon = 'https://cocl.us/Geospatial_data'

!wget -q -O 'toronto_m.geospatial_data.csv' postcodes_lat_lon

postcodes_data = pd.read_csv(postcode_lat_lon)
postcodes_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
# joining 2 dataframes on postalcode
df2 = pd.merge(dft,postcodes_data,how='left',left_on='Postcode',right_on='Postal Code').drop(labels='Postal Code',axis=1)
df2.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
import folium 
from geopy import geocoders

In [14]:
address = 'Toronto, ON'

geolocator = geocoders.Nominatim(user_agent='toronto_explorer')

location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


In [15]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [16]:
# The code was removed by Watson Studio for sharing.

Your credentails:
CLIENT_ID: LUZTZGA4FOJOXBM3HKTEP1N1DNUAI1LHYR0SJT1FQILHKVXB
CLIENT_SECRET:Y1YJYNNZF1XFMWI013E5IDNL01ZWSVRAVZWSZJ2KR5OVWPHC


In [17]:
downtown = df2[df2['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937


In [18]:
suburb_lat = downtown.loc[0,'Latitude']
suburb_lon = downtown.loc[0,'Longitude']

suburb_name = downtown.loc[0,'Neighborhood']

print("{a}'s latitude is {b},longitude is {c}.".format(a=suburb_name, b=suburb_lat, c=suburb_lon))

Rosedale's latitude is 43.6795626,longitude is -79.37752940000001.


In [21]:
limit = 100
radius = 500

# url creation

rosedale_url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    suburb_lat, 
    suburb_lon, 
    radius, 
    limit)

results = requests.get(rosedale_url).json()

In [22]:
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [23]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Mooredale House,Building,43.678631,-79.380091
1,Rosedale Park,Playground,43.682328,-79.378934
2,Whitney Park,Park,43.682036,-79.373788
3,Alex Murray Parkette,Park,43.6783,-79.382773
4,Milkman's Lane,Trail,43.676352,-79.373842


In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
downtown_venues = getNearbyVenues(names=downtown['Neighborhood'],
                                   latitudes=downtown['Latitude'],
                                   longitudes=downtown['Longitude']
                                  )

Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Harbourfront,Regent Park
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place,Underground city
Christie


In [26]:
downtown_venues.shape

(1283, 7)

In [27]:
downtown_venues.groupby('Neighborhood')['Venue'].count().sort_values(ascending=False)

Neighborhood
Commerce Court,Victoria Hotel                                                                           100
First Canadian Place,Underground city                                                                   100
Chinatown,Grange Park,Kensington Market                                                                 100
St. James Town                                                                                          100
Design Exchange,Toronto Dominion Centre                                                                 100
Adelaide,King,Richmond                                                                                  100
Harbourfront East,Toronto Islands,Union Station                                                         100
Ryerson,Garden District                                                                                 100
Stn A PO Boxes 25 The Esplanade                                                                          94
Church and Well

# For ease of reading, filtering df for Neighborhoods without ','

In [37]:
downtown_venues = downtown_venues[~downtown_venues['Neighborhood'].str.contains(',')]

<b>Checking number of venues and venue categories in neighborhoods</b>

In [40]:
downtown_venues.groupby('Neighborhood')[['Venue','Venue Category']].count().sort_values(by="Venue",ascending=False)

Unnamed: 0_level_0,Venue,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
St. James Town,100,100
Stn A PO Boxes 25 The Esplanade,94,94
Church and Wellesley,85,85
Central Bay Street,83,83
Berczy Park,55,55
Christie,16,16
Rosedale,5,5


<b>finding out how many unique categories can be curated from all the returned venues</b>

In [42]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 134 uniques categories.


In [49]:
downtown_venues_dummied = pd.get_dummies(data=downtown_venues,columns=['Venue Category'],drop_first=True)

In [50]:
downtown_venues_dummied.drop(labels=['Neighborhood Latitude', 'Neighborhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude'],axis=1,inplace=True)
downtown_venues_dummied.columns = pd.Index(map(lambda x : str(x)[len('Venue Category_'):], downtown_venues_dummied.columns))

In [53]:
downtown_venues_dummied.rename({'':'Neighborhood'},axis=1,inplace=True)

In [56]:
downtown_grouped = downtown_venues_dummied.groupby('Neighborhood').mean().reset_index()
downtown_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Thai Restaurant,Theater,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,Berczy Park,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,...,0.018182,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0
1,Central Bay Street,0.012048,0.0,0.0,0.012048,0.0,0.0,0.0,0.0,0.0,...,0.012048,0.0,0.0,0.0,0.012048,0.0,0.0,0.012048,0.0,0.012048
2,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Church and Wellesley,0.011765,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,...,0.011765,0.011765,0.011765,0.0,0.0,0.011765,0.011765,0.0,0.011765,0.011765
4,Rosedale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
          venue  freq
0   Coffee Shop  0.11
1  Cocktail Bar  0.05
2        Bakery  0.04
3   Cheese Shop  0.04
4    Steakhouse  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.14
1                Café  0.05
2  Italian Restaurant  0.05
3      Ice Cream Shop  0.05
4        Burger Joint  0.04


----Christie----
           venue  freq
0  Grocery Store  0.19
1           Café  0.19
2           Park  0.12
3     Baby Store  0.06
4     Restaurant  0.06


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.07
1  Japanese Restaurant  0.06
2              Gay Bar  0.05
3     Sushi Restaurant  0.05
4           Restaurant  0.04


----Rosedale----
                             venue  freq
0                             Park   0.4
1                       Playground   0.2
2                         Building   0.2
3                            Trail   0.2
4  Molecular Gastronomy Restaurant   0.0


----St. James Town----

In [60]:
# function to return most common venue categories
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

In [65]:
# create a new empty dataframe
downtown_venues_sorted = pd.DataFrame(columns=columns)
downtown_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']


for ind in np.arange(downtown_grouped.shape[0]):
    downtown_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

downtown_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Bakery,Beer Bar,Seafood Restaurant,Steakhouse,Café,Farmers Market,Comfort Food Restaurant
1,Central Bay Street,Coffee Shop,Ice Cream Shop,Italian Restaurant,Café,Burger Joint,Sandwich Place,Bubble Tea Shop,Middle Eastern Restaurant,Spa,Salad Place
2,Christie,Grocery Store,Café,Park,Baby Store,Italian Restaurant,Diner,Convenience Store,Nightclub,Restaurant,Coffee Shop
3,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Gastropub,Fast Food Restaurant,Hotel,Pub,Gym
4,Rosedale,Park,Playground,Building,Trail,Dance Studio,Discount Store,Diner,Dessert Shop,Department Store,Deli / Bodega
5,St. James Town,Coffee Shop,Restaurant,Hotel,Café,Italian Restaurant,Cocktail Bar,Beer Bar,Breakfast Spot,Gastropub,Cosmetics Shop
6,Stn A PO Boxes 25 The Esplanade,Coffee Shop,Restaurant,Café,Beer Bar,Seafood Restaurant,Cocktail Bar,Hotel,Fast Food Restaurant,Cheese Shop,Park


# K means clustering

In [66]:
from sklearn.cluster import KMeans


<b>Will create and group neighborhoods in 3 clusters  </b>

In [80]:
# set number of clusters
k = 3

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 2, 1, 0, 1, 1], dtype=int32)

<b> Will create a new df by joining neighborhoods, boroughs, clusters and most common venues</b>

In [81]:
# add clustering labels
downtown_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [82]:
df_cluster = pd.merge(downtown_venues_sorted,df2,how='left',left_on='Neighborhood',right_on='Neighborhood')

In [83]:
df_cluster.set_index('Neighborhood')

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Postcode,Borough,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Berczy Park,1,Coffee Shop,Cocktail Bar,Cheese Shop,Bakery,Beer Bar,Seafood Restaurant,Steakhouse,Café,Farmers Market,Comfort Food Restaurant,M5E,Downtown Toronto,43.644771,-79.373306
Central Bay Street,1,Coffee Shop,Ice Cream Shop,Italian Restaurant,Café,Burger Joint,Sandwich Place,Bubble Tea Shop,Middle Eastern Restaurant,Spa,Salad Place,M5G,Downtown Toronto,43.657952,-79.387383
Christie,2,Grocery Store,Café,Park,Baby Store,Italian Restaurant,Diner,Convenience Store,Nightclub,Restaurant,Coffee Shop,M6G,Downtown Toronto,43.669542,-79.422564
Church and Wellesley,1,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Gastropub,Fast Food Restaurant,Hotel,Pub,Gym,M4Y,Downtown Toronto,43.66586,-79.38316
Rosedale,0,Park,Playground,Building,Trail,Dance Studio,Discount Store,Diner,Dessert Shop,Department Store,Deli / Bodega,M4W,Downtown Toronto,43.679563,-79.377529
St. James Town,1,Coffee Shop,Restaurant,Hotel,Café,Italian Restaurant,Cocktail Bar,Beer Bar,Breakfast Spot,Gastropub,Cosmetics Shop,M5C,Downtown Toronto,43.651494,-79.375418
Stn A PO Boxes 25 The Esplanade,1,Coffee Shop,Restaurant,Café,Beer Bar,Seafood Restaurant,Cocktail Bar,Hotel,Fast Food Restaurant,Cheese Shop,Park,M5W,Downtown Toronto,43.646435,-79.374846


In [75]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [84]:
# create map
map_clustered = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_cluster['Latitude'], df_cluster['Longitude'], df_cluster['Neighborhood'], df_cluster['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clustered)
       
map_clustered

In [95]:
cluster1 = df_cluster[df_cluster['Cluster Labels'] == 1].drop(labels=['Neighborhood', 'Postcode', 'Borough', 'Latitude', 'Longitude','Cluster Labels'],axis=1)
cluster1

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Coffee Shop,Cocktail Bar,Cheese Shop,Bakery,Beer Bar,Seafood Restaurant,Steakhouse,Café,Farmers Market,Comfort Food Restaurant
1,Coffee Shop,Ice Cream Shop,Italian Restaurant,Café,Burger Joint,Sandwich Place,Bubble Tea Shop,Middle Eastern Restaurant,Spa,Salad Place
3,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Gastropub,Fast Food Restaurant,Hotel,Pub,Gym
5,Coffee Shop,Restaurant,Hotel,Café,Italian Restaurant,Cocktail Bar,Beer Bar,Breakfast Spot,Gastropub,Cosmetics Shop
6,Coffee Shop,Restaurant,Café,Beer Bar,Seafood Restaurant,Cocktail Bar,Hotel,Fast Food Restaurant,Cheese Shop,Park


In [94]:
cluster2 = df_cluster[df_cluster['Cluster Labels'] == 2].drop(labels=['Neighborhood', 'Postcode', 'Borough', 'Latitude', 'Longitude','Cluster Labels'],axis=1)
cluster2

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Grocery Store,Café,Park,Baby Store,Italian Restaurant,Diner,Convenience Store,Nightclub,Restaurant,Coffee Shop


In [96]:
cluster0 = df_cluster[df_cluster['Cluster Labels'] == 0].drop(labels=['Neighborhood', 'Postcode', 'Borough', 'Latitude', 'Longitude','Cluster Labels'],axis=1)
cluster0

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Park,Playground,Building,Trail,Dance Studio,Discount Store,Diner,Dessert Shop,Department Store,Deli / Bodega
