In [1]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
import folium # plotting library
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# libraries for displaying images
from IPython.display import Image
from IPython.core.display import HTML
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

In [2]:
# read data file
df = pd.read_csv('mrt_lrt_data2.csv')
df.head()

Unnamed: 0,station_name,type,lat,lng
0,Jurong East,MRT,1.333207,103.742308
1,Bukit Batok,MRT,1.349069,103.749596
2,Bukit Gombak,MRT,1.359043,103.751863
3,Choa Chu Kang,MRT,1.385417,103.744316
4,Yew Tee,MRT,1.397383,103.747523


In [3]:
# Basic type information about data
df.dtypes

station_name     object
type             object
lat             float64
lng             float64
dtype: object

In [4]:
#check number of LRT and MRT stations
df_group = df.groupby('type').count()
df_group

Unnamed: 0_level_0,station_name,lat,lng
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LRT,38,38,38
MRT,119,119,119


In [5]:
#Get MRT data into new dataframe - confine analysis to MRT stations
df_MRT = df.loc[df['type'] == 'MRT']
df_MRT.head()

Unnamed: 0,station_name,type,lat,lng
0,Jurong East,MRT,1.333207,103.742308
1,Bukit Batok,MRT,1.349069,103.749596
2,Bukit Gombak,MRT,1.359043,103.751863
3,Choa Chu Kang,MRT,1.385417,103.744316
4,Yew Tee,MRT,1.397383,103.747523


In [6]:
# check size of MRT data
df_MRT.shape

(119, 4)

In [7]:
# create map of MRT stations using latitude and longitude values
latitude = 1.333207
longitude = 103.742308
map_stations = folium.Map(location=[latitude, longitude], zoom_start=11)
# add markers to map
for lat, lng, label in zip(df_MRT['lat'], df_MRT['lng'], df_MRT['station_name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_stations)
map_stations

In [8]:
#Define Foursquare Credentials and Version
CLIENT_ID = 'FDY3QVWSW23NMBO4PHY5FOJQ5UVROZ5F0ZX1E1TIUFY0AF2P' # your Foursquare ID
CLIENT_SECRET = 'SWWAWUV1E1JRRGXIBXWOBVRDZINMQD5FCYJBULLTZ0GLPLGB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FDY3QVWSW23NMBO4PHY5FOJQ5UVROZ5F0ZX1E1TIUFY0AF2P
CLIENT_SECRET:SWWAWUV1E1JRRGXIBXWOBVRDZINMQD5FCYJBULLTZ0GLPLGB


In [9]:
#first station locaton 
df_MRT.loc[0, 'station_name']

'Jurong East'

In [10]:
#get latitude and longitude of Jurong East
station_latitude = df_MRT.loc[0, 'lat'] # station latitude value
station_longitude = df_MRT.loc[0, 'lng'] # station longitude value
station_name = df_MRT.loc[0, 'station_name'] # station name
print('Latitude and longitude values of {} are {}, {}.'.format(station_name, station_latitude, station_longitude))

Latitude and longitude values of Jurong East are 1.333207, 103.742308.


In [11]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    station_latitude,
    station_longitude,
    radius,
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=FDY3QVWSW23NMBO4PHY5FOJQ5UVROZ5F0ZX1E1TIUFY0AF2P&client_secret=SWWAWUV1E1JRRGXIBXWOBVRDZINMQD5FCYJBULLTZ0GLPLGB&v=20180605&ll=1.333207,103.742308&radius=500&limit=100'

In [12]:
results = requests.get(url).json() #get results
results

{'meta': {'code': 200, 'requestId': '5e70eed6963d29001b3e06fe'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Jurong East',
  'headerFullLocation': 'Jurong East, Singapore',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 74,
  'suggestedBounds': {'ne': {'lat': 1.3377070045000046,
    'lng': 103.74680081871648},
   'sw': {'lat': 1.3287069954999955, 'lng': 103.73781518128351}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '51c0356c498e19c820f5e48e',
       'name': 'UNIQLO',
       'location': {'address': '#02-37 & #03-30, Jem',
        'crossStreet': '50 Jurong Gateway Rd',
        'lat': 1.333175096970959,
        'lng': 103.74316037528905,
        'labeledLat

In [13]:
#define get_category_type function
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [14]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)

In [15]:
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,UNIQLO,Clothing Store,1.333175,103.74316
1,MUJI 無印良品,Furniture / Home Store,1.333187,103.743064
2,Song Fa Bak Kut Teh 松發肉骨茶,Chinese Restaurant,1.333394,103.74342
3,Johan Paris,Bakery,1.334083,103.742384
4,The Rink,Skating Rink,1.333424,103.740345


In [16]:
#Map for regions
venues_map = folium.Map(location=[station_latitude, station_longitude], zoom_start=20)
# add a red circle marker to represent the Jurong East station
folium.features.CircleMarker(
    [station_latitude, station_longitude],
    radius=10,
    color='red',
    popup='Jurong East',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)
# add all venues as blue circle markers
for lat, lng, label in zip(nearby_venues.lat, nearby_venues.lng, nearby_venues.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

venues_map

In [17]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0])) # number of venues returned

74 venues were returned by Foursquare.


In [18]:
#create function to repeat same process
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Station', 
                  'Station_Latitude', 
                  'Station_Longitude', 
                  'Venue', 
                  'Venue_Latitude', 
                  'Venue_Longitude', 
                  'Venue_Category']
    return(nearby_venues)

In [19]:
station_venues = getNearbyVenues(names=df_MRT['station_name'],
                                 latitudes=df_MRT['lat'], 
                                 longitudes=df_MRT['lng'])

Jurong East
Bukit Batok
Bukit Gombak
Choa Chu Kang
Yew Tee
Kranji
Marsiling
Woodlands
Admiralty
Sembawang
Yishun
Khatib
Yio Chu Kang
Ang Mo Kio
Bishan
Braddell
Toa Payoh
Novena
Newton
Orchard
Somerset
Dhoby Ghaut
City Hall
Raffles Place
Marina Bay
Marina South Pier
Tuas Link
Tuas West Road
Tuas Crescent
Gul Circle
Joo Koon
Pioneer
Boon Lay
Lakeside
Chinese Garden
Clementi
Dover
Buona Vista
Commonwealth
Queenstown
Redhill
Tiong Bahru
Outram Park
Tanjong Pagar
Bugis
Lavender
Kallang
Aljunied
Paya Lebar
Eunos
Kembangan
Bedok
Tanah Merah
Simei
Tampines
Pasir Ris
Expo
Changi Airport
HarbourFront
Chinatown
Clarke Quay
Little India
Farrer Park
Boon Keng
Potong Pasir
Woodleigh
Serangoon
Kovan
Hougang
Buangkok
Sengkang
Punggol
Bras Basah
Esplanade
Promenade
Nicoll Highway
Stadium
Mountbatten
Dakota
MacPherson
Tai Seng
Bartley
Lorong Chuan
Marymount
Caldecott
Botanic Gardens
Farrer Road
Holland Village
one-north
Kent Ridge
Haw Par Villa
Pasir Panjang
Labrador Park
Telok Blangah
Bayfront
Bukit Pa

In [20]:
#check size of station_venues
print(station_venues.shape)
station_venues.sample(10)

(4729, 7)


Unnamed: 0,Station,Station_Latitude,Station_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
2953,Punggol,1.405191,103.902367,Ichiban Boshi,1.406971,103.902436,Japanese Restaurant
2129,Tampines,1.354467,103.943325,Häagen-Dazs,1.352681,103.944228,Ice Cream Shop
1081,Marina South Pier,1.271422,103.863581,Marina South Pier Cafe,1.271344,103.863916,Breakfast Spot
888,City Hall,1.293119,103.852089,JOE & THE JUICE,1.293493,103.852928,Juice Bar
1602,Tanjong Pagar,1.276385,103.846771,The Wallich Grill-Bar-Lounge,1.273573,103.844342,Restaurant
1697,Bugis,1.300747,103.855873,The Masses,1.297132,103.856812,French Restaurant
3576,Holland Village,1.311189,103.796119,Sunday Folks,1.311676,103.796848,Ice Cream Shop
1456,Outram Park,1.280319,103.839459,Keong Saik Bakery,1.280301,103.841595,Café
1041,Raffles Place,1.284001,103.85155,Dimbulah,1.280787,103.85145,Café
4221,Telok Ayer,1.282285,103.848584,Tandoori Corner,1.281576,103.847753,Indian Restaurant


In [21]:
#find unique categories number
print('There are {} unique venue categories.'.format(len(station_venues['Venue_Category'].unique())))

There are 318 unique venue categories.


In [22]:
#find unique venue number
print('There are {} uniques venues.'.format(len(station_venues['Venue'].unique())))#number of unique venues

There are 3225 uniques venues.


In [23]:
station_venues.groupby('Station').count()

Unnamed: 0_level_0,Station_Latitude,Station_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Admiralty,9,9,9,9,9,9
Aljunied,51,51,51,51,51,51
Ang Mo Kio,40,40,40,40,40,40
Bartley,7,7,7,7,7,7
Bayfront,49,49,49,49,49,49
Beauty World,76,76,76,76,76,76
Bedok,60,60,60,60,60,60
Bedok North,22,22,22,22,22,22
Bedok Reservoir,14,14,14,14,14,14
Bencoolen,100,100,100,100,100,100


In [24]:
# one hot encoding
station_onehot = pd.get_dummies(station_venues[['Venue_Category']], prefix="", prefix_sep="")
# add station column back to dataframe
station_onehot['Station'] = station_venues['Station']
# move station column to the first column
fixed_columns = [station_onehot.columns[-1]] + list(station_onehot.columns[:-1])
station_onehot = station_onehot[fixed_columns]
station_onehot.head()

Unnamed: 0,Station,Accessories Store,Airport,Airport Lounge,American Restaurant,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,...,Water Park,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Yunnan Restaurant
0,Jurong East,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Jurong East,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Jurong East,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Jurong East,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Jurong East,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
#size of new one-hot dataframe
station_onehot.shape

(4729, 319)

In [26]:
#group rows by station and by taking the mean of the frequency of occurrence of each category
station_grouped = station_onehot.groupby('Station').mean().reset_index()
station_grouped

Unnamed: 0,Station,Accessories Store,Airport,Airport Lounge,American Restaurant,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,...,Water Park,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Yunnan Restaurant
0,Admiralty,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1,Aljunied,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,Ang Mo Kio,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
3,Bartley,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
4,Bayfront,0.020408,0.000000,0.000000,0.000000,0.000000,0.00,0.020408,0.00,0.000000,...,0.000000,0.000000,0.040816,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
5,Beauty World,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
6,Bedok,0.000000,0.000000,0.000000,0.016667,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.016667,0.000000,0.000000,0.000000
7,Bedok North,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
8,Bedok Reservoir,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
9,Bencoolen,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.030000,0.02,0.010000,...,0.000000,0.000000,0.000000,0.020000,0.000000,0.0,0.000000,0.000000,0.020000,0.000000


In [27]:
#get size
station_grouped.shape

(118, 319)

In [28]:
#print each station along with the top 5 most common venues
num_top_venues = 5
for stn in station_grouped['Station']:
    print("----"+stn+"----")
    temp = station_grouped[station_grouped['Station'] == stn].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Admiralty----
               venue  freq
0        Supermarket  0.22
1             Bakery  0.22
2  Food & Drink Shop  0.11
3         Food Court  0.11
4        Bus Station  0.11


----Aljunied----
                venue  freq
0  Chinese Restaurant  0.12
1        Noodle House  0.10
2    Asian Restaurant  0.08
3                Café  0.06
4  Dim Sum Restaurant  0.06


----Ang Mo Kio----
            venue  freq
0     Coffee Shop  0.12
1    Dessert Shop  0.08
2      Food Court  0.05
3  Sandwich Place  0.05
4     Supermarket  0.05


----Bartley----
           venue  freq
0   Noodle House  0.29
1    Bus Station  0.29
2     Food Truck  0.14
3           Café  0.14
4  Metro Station  0.14


----Bayfront----
      venue  freq
0     Hotel  0.08
1  Boutique  0.06
2   Theater  0.06
3    Lounge  0.04
4  Tea Room  0.04


----Beauty World----
                venue  freq
0  Chinese Restaurant  0.11
1                Café  0.11
2   Korean Restaurant  0.09
3          Food Court  0.05
4              Bakery 

               venue  freq
0        Coffee Shop  0.50
1               Park  0.25
2  Electronics Store  0.25
3  Accessories Store  0.00
4       Outlet Store  0.00


----HarbourFront----
                  venue  freq
0    Chinese Restaurant  0.07
1   Japanese Restaurant  0.06
2  Fast Food Restaurant  0.04
3           Coffee Shop  0.04
4      Toy / Game Store  0.04


----Haw Par Villa----
           venue  freq
0  Metro Station   0.1
1     Food Court   0.1
2         Office   0.1
3     Theme Park   0.1
4  Shopping Mall   0.1


----Hillview----
           venue  freq
0           Café  0.10
1    Supermarket  0.07
2    Coffee Shop  0.07
3  Shopping Mall  0.07
4           Pool  0.07


----Holland Village----
                venue  freq
0              Bakery  0.10
1      Ice Cream Shop  0.07
2                 Bar  0.05
3                 Spa  0.05
4  Italian Restaurant  0.05


----Hougang----
                  venue  freq
0    Chinese Restaurant  0.09
1            Food Court  0.09
2  Fast Food R

               venue  freq
0             Bakery  0.09
1  Indian Restaurant  0.09
2         Food Court  0.06
3        Coffee Shop  0.06
4           Wine Bar  0.06


----Somerset----
                 venue  freq
0        Shopping Mall  0.07
1                Hotel  0.07
2  Japanese Restaurant  0.07
3       Clothing Store  0.05
4   Chinese Restaurant  0.04


----Stadium----
                 venue  freq
0                 Café  0.07
1              Stadium  0.07
2  Japanese Restaurant  0.04
3       Ice Cream Shop  0.04
4       Clothing Store  0.04


----Stevens----
                  venue  freq
0   Japanese Restaurant  0.22
1                 Diner  0.22
2                  Café  0.11
3    Italian Restaurant  0.11
4  Fast Food Restaurant  0.11


----Tai Seng----
                venue  freq
0  Chinese Restaurant  0.13
1          Food Court  0.09
2  Seafood Restaurant  0.09
3    Asian Restaurant  0.09
4                Café  0.07


----Tampines----
                 venue  freq
0               Bake

In [37]:
#Sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [38]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Station']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
station_venues_sorted = pd.DataFrame(columns=columns)
station_venues_sorted['Station'] = station_grouped['Station']
for ind in np.arange(station_grouped.shape[0]):
    station_venues_sorted.iloc[ind, 1:] = return_most_common_venues(station_grouped.iloc[ind, :], num_top_venues)
station_venues_sorted.head()

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Admiralty,Bakery,Supermarket,Paper / Office Supplies Store,Bus Station,Food & Drink Shop,Food Court,Coffee Shop,Yunnan Restaurant,Food Stand,Flea Market
1,Aljunied,Chinese Restaurant,Noodle House,Asian Restaurant,Café,Vegetarian / Vegan Restaurant,Dim Sum Restaurant,Coffee Shop,Seafood Restaurant,Food Court,Indian Restaurant
2,Ang Mo Kio,Coffee Shop,Dessert Shop,Fast Food Restaurant,Supermarket,Sandwich Place,Food Court,Bubble Tea Shop,Japanese Restaurant,Malay Restaurant,Chinese Restaurant
3,Bartley,Noodle House,Bus Station,Café,Metro Station,Food Truck,Yunnan Restaurant,Flower Shop,Food,Food & Drink Shop,Food Court
4,Bayfront,Hotel,Theater,Boutique,Italian Restaurant,Waterfront,Japanese Restaurant,Lounge,Tea Room,Bridge,Bar


In [39]:
# set number of clusters
kclusters = 5
station_grouped_clustering = station_grouped.drop('Station', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(station_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 2, 4, 0, 4, 4, 4, 0, 0, 4])

In [43]:
# add clustering labels
station_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
station_merged = df_MRT
# merge station_grouped with station_data to add latitude/longitude for each station
station_merged = station_merged.join(station_venues_sorted.set_index('Station'), on='station_name')
station_merged.head() # check dataframe

Unnamed: 0,station_name,type,lat,lng,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Jurong East,MRT,1.333207,103.742308,4.0,Japanese Restaurant,Chinese Restaurant,Coffee Shop,Food Court,Sandwich Place,Shopping Mall,Café,Multiplex,Sporting Goods Shop,Department Store
1,Bukit Batok,MRT,1.349069,103.749596,4.0,Coffee Shop,Chinese Restaurant,Food Court,Department Store,Grocery Store,Bus Station,Multiplex,Frozen Yogurt Shop,Malay Restaurant,Bowling Alley
2,Bukit Gombak,MRT,1.359043,103.751863,4.0,Food Court,Coffee Shop,Stadium,Restaurant,Sandwich Place,Fast Food Restaurant,Malay Restaurant,Gym / Fitness Center,Flea Market,Supermarket
3,Choa Chu Kang,MRT,1.385417,103.744316,4.0,Coffee Shop,Fast Food Restaurant,Italian Restaurant,Food Court,Mexican Restaurant,Sushi Restaurant,Supermarket,Bubble Tea Shop,Burrito Place,Shoe Store
4,Yew Tee,MRT,1.397383,103.747523,4.0,Fast Food Restaurant,Shopping Mall,Japanese Restaurant,Pool,Electronics Store,Sandwich Place,Diner,Food Court,Café,Yunnan Restaurant


In [44]:
station_merged.isnull() #check for NaN values

Unnamed: 0,station_name,type,lat,lng,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [45]:
station_merged.dropna(axis = 0, inplace = True)
station_merged.isnull()

Unnamed: 0,station_name,type,lat,lng,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [46]:
#cast cluster labels as integer for map
station_merged['Cluster Labels'] = station_merged['Cluster Labels'].astype(int)

In [47]:
#visualise results
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(station_merged['lat'], station_merged['lng'], station_merged['station_name'], station_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
    [lat, lon],
    radius=5,
    popup=label,
    color=rainbow[cluster - 1],
    fill=True,
    fill_color=rainbow[cluster - 1],
    fill_opacity=0.7).add_to(map_clusters)

map_clusters


In [None]:
#End