In [1]:
import pandas as pd
import numpy as np
import folium
import requests
from sklearn.cluster import KMeans

In [2]:
# download the postcode dataset from Internet and store it
url  = "http://www.citypostcodes.com.au/Canberra"
page = requests.get(url)
if page.status_code == 200:
    print('Page download successful')
else:
    print('Page download error. Error code: {}'.format(page.status_code))

Page download successful


In [3]:
df = pd.read_html(url, header=0)[0]
df.head()

Unnamed: 0,Suburb,Postcode,City
0,Acton Postcode,2601,Canberra
1,Ainslie Postcode,2602,Canberra
2,Amaroo Postcode,2914,Canberra
3,Aranda Postcode,2614,Canberra
4,Australian National University Postcode,200,Canberra


In [4]:
# delete the last rows since it is not the postcode data
df = df.drop([132])

In [5]:
# delete last nine words in 'Suburb'
df['Suburb'] = df['Suburb'].str[:-9]

In [6]:
df.head()

Unnamed: 0,Suburb,Postcode,City
0,Acton,2601,Canberra
1,Ainslie,2602,Canberra
2,Amaroo,2914,Canberra
3,Aranda,2614,Canberra
4,Australian National University,200,Canberra


In [7]:
# check the unique suburb
len(df['Suburb'].unique())

130

In [8]:
# find the duplicated rows
df[df['Suburb'].duplicated()]

Unnamed: 0,Suburb,Postcode,City
8,Belconnen,2616,Canberra
17,Canberra,2600,Canberra


In [9]:
# delete the duplicate rows
df = df.drop([8])
df = df.drop([17])
df.head()

Unnamed: 0,Suburb,Postcode,City
0,Acton,2601,Canberra
1,Ainslie,2602,Canberra
2,Amaroo,2914,Canberra
3,Aranda,2614,Canberra
4,Australian National University,200,Canberra


In [10]:
df.shape

(130, 3)

In [11]:
# download the coordinates dataset of canberra
url  = "https://www.geonames.org/postal-codes/AU/ACT/australian-capital-territory.html"
page = requests.get(url)
if page.status_code == 200:
    print('Page download successful')
else:
    print('Page download error. Error code: {}'.format(page.status_code))

Page download successful


In [12]:
dc = pd.read_csv('Canberra location.csv')
dc.head()

Unnamed: 0,Suburb,Country,State,Latitude,Longitude
0,Braddon,Australia,Australian Capital Territory,-35.27,149.13
1,Kambah,Australia,Australian Capital Territory,-35.39,149.06
2,Deakin,Australia,Australian Capital Territory,-35.32,149.1
3,Yarralumla,Australia,Australian Capital Territory,-35.31,149.1
4,Barton,Australia,Australian Capital Territory,-35.31,149.14


In [13]:
# use the same way as before to delete the duplicates
len(dc['Suburb'].unique())

151

In [14]:
dc[dc['Suburb'].duplicated()]

Unnamed: 0,Suburb,Country,State,Latitude,Longitude
122,Canberra,Australia,Australian Capital Territory,-35.28,149.13
123,Barton,Australia,Australian Capital Territory,-35.31,149.14
138,Belconnen,Australia,Australian Capital Territory,-35.22,149.08


In [15]:
dc = dc.drop([122])
dc = dc.drop([123])
dc = dc.drop([138])

In [16]:
dc.head()

Unnamed: 0,Suburb,Country,State,Latitude,Longitude
0,Braddon,Australia,Australian Capital Territory,-35.27,149.13
1,Kambah,Australia,Australian Capital Territory,-35.39,149.06
2,Deakin,Australia,Australian Capital Territory,-35.32,149.1
3,Yarralumla,Australia,Australian Capital Territory,-35.31,149.1
4,Barton,Australia,Australian Capital Territory,-35.31,149.14


In [17]:
# merge two dataset
df_merge = pd.merge(df, dc, on='Suburb')
df_merge.head()

Unnamed: 0,Suburb,Postcode,City,Country,State,Latitude,Longitude
0,Acton,2601,Canberra,Australia,Australian Capital Territory,-35.28,149.12
1,Ainslie,2602,Canberra,Australia,Australian Capital Territory,-35.26,149.15
2,Amaroo,2914,Canberra,Australia,Australian Capital Territory,-35.17,149.13
3,Aranda,2614,Canberra,Australia,Australian Capital Territory,-35.26,149.08
4,Australian National University,200,Canberra,Australia,Australian Capital Territory,-35.28,149.12


In [18]:
# check the unique Suburb of merged dataset
len(df_merge['Suburb'].unique())

121

In [19]:
df_merge.dtypes

Suburb        object
Postcode      object
City          object
Country       object
State         object
Latitude     float64
Longitude    float64
dtype: object

In [20]:
# Obtain the coordinates of Canberra
lat_cbr = df_merge['Latitude'].mean()
lon_cbr = df_merge['Longitude'].mean()
print('The geographical coordinates of Canberra are {}, {}'.format(lat_cbr, lon_cbr))

The geographical coordinates of Canberra are -35.30322314049588, 149.10900826446286


In [21]:
# create map of Canberra using latitude and longitude values
map_cbr = folium.Map(location=[lat_cbr, lon_cbr], zoom_start=11)

# add markers to map
for lat, lng, sub in zip(df_merge['Latitude'], 
                        df_merge['Longitude'],
                        df_merge['Suburb']):
    label_text = sub
    label = folium.Popup(label_text, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        tooltip = label_text,
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_cbr)  
    
map_cbr

In [22]:
CLIENT_ID = 'XXXX' # your Foursquare ID
CLIENT_SECRET = 'XXXXX' # your Foursquare Secret
VERSION = '20200313' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [23]:
def getNearbyVenues(post, suburb, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for post, sub, lat, lng in zip(post, suburb, latitudes, longitudes):
        print(post, sub)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            post, 
            sub,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                             'Suburb', 
                             'SuburbLatitude', 
                             'SuburbLongitude', 
                             'VenueName', 
                             'VenueLatitude', 
                             'VenueLongitude', 
                             'VenueCategory']   
    return(nearby_venues)

In [26]:
#Get venues for all neighbourhoods in our dataset
venues_df = getNearbyVenues(post=df_merge['Postcode'],
                                suburb=df_merge['Suburb'],
                                latitudes=df_merge['Latitude'],
                                longitudes=df_merge['Longitude'])

2601 Acton
2602 Ainslie
2914 Amaroo
2614 Aranda
0200 Australian National University
2906 Banks
2600 Barton
2617 Belconnen
2601 Black Mountain
2914 Bonner
2905 Bonython
2612 Braddon
2617 Bruce
2905 Calwell
2612 Campbell
2601 Canberra
2604 Causeway
2611 Chapman
2615 Charnwood
2606 Chifley
2905 Chisholm
2608 Civic Square
2906 Conder
2614 Cook
2605 Curtin
2600 Deakin
2600 Deakin West
2602 Dickson
2602 Downer
2611 Duffy
2615 Dunlop
2600 Duntroon
2903 Erindale Centre
2617 Evatt
2904 Fadden
2607 Farrer
2611 Fisher
2615 Florey
2615 Flynn
2914 Forde
2603 Forrest
2615 Fraser
2609 Fyshwick
2605 Garran
2905 Gilmore
2913 Ginninderra Village
2617 Giralang
2906 Gordon
2904 Gowrie
2900 Greenway
2603 Griffith
2912 Gungahlin
2602 Hackett
2618 Hall
2600 Harman
2914 Harrison
2614 Hawker
2615 Higgins
2611 Holder
2615 Holt
2605 Hughes
2620 Hume
2607 Isaacs
2905 Isabella Plains
2614 Jamison Centre
2540 Jervis Bay
2617 Kaleen
2902 Kambah
2604 Kingston
2615 Kippax
2615 Kippax Centre
2615 Latham
2617 Lawson
260

In [27]:
#Check size of resulting dataframe
venues_df.shape

(707, 8)

In [28]:
venues_df.head()

Unnamed: 0,Postcode,Suburb,SuburbLatitude,SuburbLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,2601,Acton,-35.28,149.12,Llewellyn Hall,-35.280604,149.123442,Concert Hall
1,2601,Acton,-35.28,149.12,Group Seven Espresso,-35.281864,149.124639,Coffee Shop
2,2601,Acton,-35.28,149.12,Mr Papa,-35.27855,149.122566,Food Truck
3,2601,Acton,-35.28,149.12,National Film & Sound Archive,-35.283131,149.121143,Museum
4,2601,Acton,-35.28,149.12,Union Court,-35.277345,149.120897,Plaza


In [29]:
# save the wenues info
venues_df.to_csv('CBR_Venues.csv')

In [30]:
#Number of venues per neighbourhood
venues_df.groupby('Suburb').count()

Unnamed: 0_level_0,Postcode,SuburbLatitude,SuburbLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Suburb,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acton,12,12,12,12,12,12,12
Ainslie,4,4,4,4,4,4,4
Amaroo,2,2,2,2,2,2,2
Aranda,2,2,2,2,2,2,2
Australian National University,12,12,12,12,12,12,12
Banks,2,2,2,2,2,2,2
Barton,8,8,8,8,8,8,8
Belconnen,38,38,38,38,38,38,38
Black Mountain,4,4,4,4,4,4,4
Bonner,3,3,3,3,3,3,3


In [31]:
#Number of unique venue categories
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 169 uniques categories.


In [32]:
# one hot encoding
cbr_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
cbr_onehot['Postcode'] = venues_df['Postcode'] 
cbr_onehot['Suburb'] = venues_df['Suburb'] 

# move postal and suburb column to the first column
fixed_columns = list(cbr_onehot.columns[-3:]) + list(cbr_onehot.columns[:-3])
cbr_onehot = cbr_onehot[fixed_columns]

print(cbr_onehot.shape)
cbr_onehot.head()

(707, 171)


Unnamed: 0,Yoga Studio,Postcode,Suburb,Airport,Airport Lounge,Airport Terminal,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,...,Tiki Bar,Toy / Game Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Winery
0,0,2601,Acton,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,2601,Acton,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,2601,Acton,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,2601,Acton,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,2601,Acton,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
# group the rows by suburb
cbr_grouped = cbr_onehot.groupby(["Postcode", "Suburb"]).mean().reset_index()

print(cbr_grouped.shape)
cbr_grouped

(104, 171)


Unnamed: 0,Postcode,Suburb,Yoga Studio,Airport,Airport Lounge,Airport Terminal,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,...,Tiki Bar,Toy / Game Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Winery
0,0200,Australian National University,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
1,2540,Jervis Bay,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
2,2600,Barton,0.0,0.0,0.0,0.0,0.125000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
3,2600,Deakin,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
4,2600,Deakin West,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
5,2600,Duntroon,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
6,2600,Harman,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.333333,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
7,2600,Parkes,0.0,0.0,0.0,0.0,0.052632,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
8,2600,Parliament House,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0
9,2600,Russell,0.0,0.0,0.0,0.0,0.000000,0.00,0.000000,0.00,...,0.00,0.00,0.000000,0.00,0.000000,0.0000,0.00000,0.00,0.00,0.0


In [34]:
# display top 10 venues for each suburb
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['Postcode', 'Suburb']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
sub_venues_sorted = pd.DataFrame(columns=columns)
sub_venues_sorted['Postcode'] = cbr_grouped['Postcode']
sub_venues_sorted['Suburb'] = cbr_grouped['Suburb']

for ind in np.arange(cbr_grouped.shape[0]):
    row_categories = cbr_grouped.iloc[ind, :].iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    sub_venues_sorted.iloc[ind, 2:] = row_categories_sorted.index.values[0:num_top_venues]

# sub_venues_sorted.sort_values(freqColumns, inplace=True)
print(sub_venues_sorted.shape)
sub_venues_sorted

(104, 12)


Unnamed: 0,Postcode,Suburb,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0200,Australian National University,Café,Coffee Shop,Plaza,Concert Hall,Food Truck,Museum,Sandwich Place,Dim Sum Restaurant,Pub,Indian Restaurant
1,2540,Jervis Bay,Business Service,Winery,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
2,2600,Barton,Park,Hotel,Bar,Flea Market,Burger Joint,Sports Bar,Art Gallery,Health & Beauty Service,Fast Food Restaurant,Food Court
3,2600,Deakin,Café,Gym / Fitness Center,Winery,Fast Food Restaurant,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
4,2600,Deakin West,Café,Hotel,Coffee Shop,Breakfast Spot,Gym,Japanese Restaurant,French Restaurant,Music Venue,Event Space,Cantonese Restaurant
5,2600,Duntroon,Golf Course,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field,Fast Food Restaurant
6,2600,Harman,IT Services,Track,Gym,Winery,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop
7,2600,Parkes,Café,Plaza,Hotel Bar,Bookstore,Library,Garden,Movie Theater,Event Space,History Museum,Science Museum
8,2600,Parliament House,Café,Hotel,Coffee Shop,Breakfast Spot,Gym,Japanese Restaurant,French Restaurant,Music Venue,Event Space,Cantonese Restaurant
9,2600,Russell,Playground,Café,Restaurant,Park,Coffee Shop,Plaza,Farm,Flower Shop,Flea Market,Fish & Chips Shop


In [36]:
# set number of clusters
kclusters = 5

cbr_grouped_clustering = cbr_grouped.drop(["Postcode", "Suburb"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cbr_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:103]

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2,
       2, 4, 2, 2, 2, 0, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 4, 2, 4, 3, 2, 2, 1, 2, 2, 1, 2, 2, 3])

In [38]:
cbr_grouped.head()

Unnamed: 0,Postcode,Suburb,Yoga Studio,Airport,Airport Lounge,Airport Terminal,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,...,Tiki Bar,Toy / Game Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Winery
0,200,Australian National University,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2540,Jervis Bay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2600,Barton,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2600,Deakin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2600,Deakin West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
cbr_grouped.shape, df_merge.shape

((104, 171), (121, 7))

In [47]:
cbr_merged = pd.merge(df_merge, cbr_grouped, on=['Suburb', 'Postcode'])
cbr_merged = cbr_merged.loc[:, :"Longitude"]
cbr_merged.head()

Unnamed: 0,Suburb,Postcode,City,Country,State,Latitude,Longitude
0,Acton,2601,Canberra,Australia,Australian Capital Territory,-35.28,149.12
1,Ainslie,2602,Canberra,Australia,Australian Capital Territory,-35.26,149.15
2,Amaroo,2914,Canberra,Australia,Australian Capital Territory,-35.17,149.13
3,Aranda,2614,Canberra,Australia,Australian Capital Territory,-35.26,149.08
4,Australian National University,200,Canberra,Australia,Australian Capital Territory,-35.28,149.12


In [48]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
# cbr_merged = cbr_grouped.copy()

# add clustering labels
cbr_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
cbr_merged = cbr_merged.join(sub_venues_sorted.drop(["Postcode"], 1).set_index("Suburb"), on="Suburb")

print(cbr_merged.shape)
cbr_merged.head() # check the last columns!

(104, 18)


Unnamed: 0,Suburb,Postcode,City,Country,State,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Acton,2601,Canberra,Australia,Australian Capital Territory,-35.28,149.12,2,Café,Coffee Shop,Plaza,Concert Hall,Food Truck,Museum,Sandwich Place,Dim Sum Restaurant,Pub,Indian Restaurant
1,Ainslie,2602,Canberra,Australia,Australian Capital Territory,-35.26,149.15,2,Business Service,Café,Shopping Plaza,Grocery Store,Winery,Field,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop
2,Amaroo,2914,Canberra,Australia,Australian Capital Territory,-35.17,149.13,2,Pharmacy,Athletics & Sports,Winery,Fast Food Restaurant,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop
3,Aranda,2614,Canberra,Australia,Australian Capital Territory,-35.26,149.08,2,Bar,Café,Fountain,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
4,Australian National University,200,Canberra,Australia,Australian Capital Territory,-35.28,149.12,2,Café,Coffee Shop,Plaza,Concert Hall,Food Truck,Museum,Sandwich Place,Dim Sum Restaurant,Pub,Indian Restaurant


In [49]:
# sort the results by Cluster Labels
print(cbr_merged.shape)
cbr_merged.sort_values(["Cluster Labels"], inplace=True)
cbr_merged

(104, 18)


Unnamed: 0,Suburb,Postcode,City,Country,State,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
103,Yarralumla,2600,Canberra,Australia,Australian Capital Territory,-35.31,149.10,0,Café,Doner Restaurant,Pizza Place,Grocery Store,Winery,Field,Food Court,Food & Drink Shop,Flower Shop,Flea Market
36,Florey,2615,Canberra,Australia,Australian Capital Territory,-35.23,149.05,0,Bus Stop,Winery,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
88,Richardson,2905,Canberra,Australia,Australian Capital Territory,-35.43,149.12,0,Grocery Store,Winery,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field
49,Hall,2618,Canberra,Australia,Australian Capital Territory,-35.17,149.07,0,Home Service,Market,Trail,Shop & Service,Fast Food Restaurant,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop
99,Waramanga,2611,Canberra,Australia,Australian Capital Territory,-35.35,149.06,1,Grocery Store,Winery,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field
96,Torrens,2607,Canberra,Australia,Australian Capital Territory,-35.37,149.09,1,Burger Joint,Rugby Pitch,Shop & Service,Bus Station,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market
70,Mawson,2607,Canberra,Australia,Australian Capital Territory,-35.36,149.10,2,Locksmith,Winery,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
69,Manuka,2603,Canberra,Australia,Australian Capital Territory,-35.31,149.13,2,Café,Hotel,Coffee Shop,Breakfast Spot,Gym,Japanese Restaurant,French Restaurant,Music Venue,Event Space,Cantonese Restaurant
68,Macquarie,2614,Canberra,Australia,Australian Capital Territory,-35.25,149.06,2,Vietnamese Restaurant,Pharmacy,Shopping Plaza,Winery,Farm,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
66,Macarthur,2904,Canberra,Australia,Australian Capital Territory,-35.41,149.13,2,Supermarket,Pizza Place,Gym / Fitness Center,Gym,Café,Sports Club,Shopping Mall,Gas Station,Winery,Fast Food Restaurant


In [53]:
# create map
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[lat_cbr, lon_cbr], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, sub, cluster in zip(cbr_merged['Latitude'], cbr_merged['Longitude'], cbr_merged['Postcode'], cbr_merged['Suburb'], cbr_merged['Cluster Labels']):
    label = folium.Popup('{} : {} - Cluster {}'.format(sub, post, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [54]:
# cluster 1
cbr_merged.loc[cbr_merged['Cluster Labels'] == 0, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
103,2600,-35.31,149.1,0,Café,Doner Restaurant,Pizza Place,Grocery Store,Winery,Field,Food Court,Food & Drink Shop,Flower Shop,Flea Market
36,2615,-35.23,149.05,0,Bus Stop,Winery,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
88,2905,-35.43,149.12,0,Grocery Store,Winery,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field
49,2618,-35.17,149.07,0,Home Service,Market,Trail,Shop & Service,Fast Food Restaurant,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop


In [55]:
# cluster 2
cbr_merged.loc[cbr_merged['Cluster Labels'] == 1, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
99,2611,-35.35,149.06,1,Grocery Store,Winery,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field
96,2607,-35.37,149.09,1,Burger Joint,Rugby Pitch,Shop & Service,Bus Station,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market


In [56]:
# cluster 3
cbr_merged.loc[cbr_merged['Cluster Labels'] == 2, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
70,2607,-35.36,149.10,2,Locksmith,Winery,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
69,2603,-35.31,149.13,2,Café,Hotel,Coffee Shop,Breakfast Spot,Gym,Japanese Restaurant,French Restaurant,Music Venue,Event Space,Cantonese Restaurant
68,2614,-35.25,149.06,2,Vietnamese Restaurant,Pharmacy,Shopping Plaza,Winery,Farm,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
66,2904,-35.41,149.13,2,Supermarket,Pizza Place,Gym / Fitness Center,Gym,Café,Sports Club,Shopping Mall,Gas Station,Winery,Fast Food Restaurant
65,2606,-35.34,149.08,2,Café,Skating Rink,Gym / Fitness Center,Italian Restaurant,Asian Restaurant,Grocery Store,Winery,Fast Food Restaurant,Food & Drink Shop,Flower Shop
64,2602,-35.25,149.13,2,Athletics & Sports,Light Rail Station,Thrift / Vintage Store,Bus Station,Winery,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop
63,2615,-35.22,149.03,2,Soccer Field,Grocery Store,Winery,Fast Food Restaurant,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant
101,2611,-35.31,149.07,2,Historic Site,Winery,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field
62,2604,-35.32,149.15,2,Go Kart Track,Hotel,Train Station,Liquor Store,Thrift / Vintage Store,Museum,Fast Food Restaurant,Food & Drink Shop,Flower Shop,Flea Market
98,2903,-35.40,149.09,2,Public Bathroom,Winery,Farm,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field


In [57]:
# cluster 4
cbr_merged.loc[cbr_merged['Cluster Labels'] == 3, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
102,2606,-35.35,149.09,3,Sandwich Place,Burger Joint,Liquor Store,Fast Food Restaurant,Coffee Shop,Multiplex,Café,Food Court,Sports Bar,Steakhouse
16,2604,-35.32,149.15,3,Go Kart Track,Hotel,Train Station,Liquor Store,Thrift / Vintage Store,Museum,Fast Food Restaurant,Food & Drink Shop,Flower Shop,Flea Market
93,2606,-35.35,149.09,3,Sandwich Place,Burger Joint,Liquor Store,Fast Food Restaurant,Coffee Shop,Multiplex,Café,Food Court,Sports Bar,Steakhouse


In [58]:
# cluster 5
cbr_merged.loc[cbr_merged['Cluster Labels'] == 4, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
92,2611,-35.35,149.05,4,Bar,Australian Restaurant,Optical Shop,Fast Food Restaurant,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop
90,2600,-35.3,149.15,4,Playground,Café,Restaurant,Park,Coffee Shop,Plaza,Farm,Flower Shop,Flea Market,Fish & Chips Shop
67,2615,-35.21,149.01,4,Pool,Bus Stop,Coffee Shop,Field,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop
45,2904,-35.41,149.11,4,Café,Pharmacy,Shopping Mall,Field,Winery,Fast Food Restaurant,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop
51,2914,-35.2,149.16,4,Soccer Field,Park,Winery,Farm,Food & Drink Shop,Flower Shop,Flea Market,Fish & Chips Shop,Filipino Restaurant,Field
