## 1.In this project, I get the most common venue categories in each neighborhood in Hong Kong, and then use this feature to group the neighborhoods into clusters. I will use the k-means clustering algorithm to complete this task. Finally, I will use the Folium library to visualize the neighborhoods in Hong Kong and their emerging clusters

## 2.I will use the geodata from https://geodata.gov.hk/gs/? to get the district in Hong Kong and use Foursquare to collect other data as well.

In [1]:
import numpy as np
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 


In [2]:
column_names = ['Neighborhood', 'Latitude', 'Longitude'] 

In [3]:
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods

Unnamed: 0,Neighborhood,Latitude,Longitude


In [4]:
dis_data = pd.read_excel('DC_16BC.xlsx')

In [5]:
dis_data.rename(columns={'區議會分區（英文名稱)' : 'District'}, inplace = True)

In [6]:
neighborhoods['Neighborhood']= dis_data['District'][4:22]

In [7]:
neighborhoods.reset_index(drop=True, inplace = True)
neighborhoods['Neighborhood'][13] = 'North District'
neighborhoods['Neighborhood'][3] = 'Southern District'

In [8]:
for i in neighborhoods['Neighborhood']:

    address = i +', HK'

    geolocator = Nominatim(user_agent="hk_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    neighborhoods.Latitude[neighborhoods.Neighborhood == i] = latitude
    neighborhoods.Longitude[neighborhoods.Neighborhood == i] = longitude

In [9]:
neighborhoods

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Central and Western,22.2818,114.158
1,Wan Chai,22.279,114.172
2,Eastern,22.2731,114.234
3,Southern District,22.2193,114.225
4,Sham Shui Po,22.3282,114.161
5,Kowloon City,22.3302,114.19
6,Wong Tai Sin,22.3417,114.194
7,Kwun Tong,22.3129,114.226
8,Yau Tsim Mong,22.3188,114.162
9,Kwai Tsing,22.341,114.104


In [10]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)


for lat, lng, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [11]:
CLIENT_ID = 'OXM0XB5KMQBK1WEWY10DAOWS51W2OW0WXN2MMFE5HUEI1O03'
CLIENT_SECRET = 'UKGBWJ4LMOAZSQZNUFDGHAGYKO0M3HPV5VC1LMP2HK1XVJJW'
VERSION = '20180605'

In [12]:
neighborhoods.loc[0, 'Neighborhood']

'Central and Western'

In [13]:
neighborhood_latitude = neighborhoods.loc[0, 'Latitude'] 
neighborhood_longitude = neighborhoods.loc[0, 'Longitude'] 

neighborhood_name = neighborhoods.loc[0, 'Neighborhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Central and Western are 22.281824, 114.1578631.


In [14]:
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=OXM0XB5KMQBK1WEWY10DAOWS51W2OW0WXN2MMFE5HUEI1O03&client_secret=UKGBWJ4LMOAZSQZNUFDGHAGYKO0M3HPV5VC1LMP2HK1XVJJW&v=20180605&ll=22.281824,114.1578631&radius=500'

In [15]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f1c1e341b8cb27516b331a5'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Central and Western District',
  'headerFullLocation': 'Central and Western District, Hong Kong',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 148,
  'suggestedBounds': {'ne': {'lat': 22.286324004500006,
    'lng': 114.16271715449736},
   'sw': {'lat': 22.277323995499994, 'lng': 114.15300904550264}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4c7fa26501df37045bb4e6ac',
       'name': 'Mandarin Grill + Bar (文華扒房＋酒吧)',
       'location': {'address': '1/F, Mandarin Oriental Hong Kong, 5 Connaught Road Central, Central',
        'lat': 22.281928,
   

In [16]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [17]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) 


filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]


nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)


nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues)


Unnamed: 0,name,categories,lat,lng
0,Mandarin Grill + Bar (文華扒房＋酒吧),Steakhouse,22.281928,114.159408
1,Mandarin Oriental Hong Kong (香港文華東方酒店),Hotel,22.281857,114.159382
2,Mott 32 (卅二公館),Dim Sum Restaurant,22.280286,114.15908
3,XYZ,Cycle Studio,22.280877,114.157108
4,Man Wah (文華廳),Cantonese Restaurant,22.281993,114.159242


In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, )
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
hk_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Central and Western
Wan Chai
Eastern
Southern District
Sham Shui Po
Kowloon City
Wong Tai Sin
Kwun Tong
Yau Tsim Mong
Kwai Tsing
Tsuen Wan
Tuen Mun
Yuen Long
North District
Tai Po
Sha Tin
Sai Kung
Islands


In [20]:
print(hk_venues.shape)
hk_venues.head()

(371, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Central and Western,22.281824,114.157863,Mandarin Grill + Bar (文華扒房＋酒吧),22.281928,114.159408,Steakhouse
1,Central and Western,22.281824,114.157863,Mandarin Oriental Hong Kong (香港文華東方酒店),22.281857,114.159382,Hotel
2,Central and Western,22.281824,114.157863,Mott 32 (卅二公館),22.280286,114.15908,Dim Sum Restaurant
3,Central and Western,22.281824,114.157863,XYZ,22.280877,114.157108,Cycle Studio
4,Central and Western,22.281824,114.157863,Man Wah (文華廳),22.281993,114.159242,Cantonese Restaurant


In [21]:
hk_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central and Western,30,30,30,30,30,30
Eastern,2,2,2,2,2,2
Islands,1,1,1,1,1,1
Kowloon City,30,30,30,30,30,30
Kwai Tsing,3,3,3,3,3,3
Kwun Tong,30,30,30,30,30,30
North District,1,1,1,1,1,1
Sai Kung,30,30,30,30,30,30
Sha Tin,30,30,30,30,30,30
Sham Shui Po,30,30,30,30,30,30


In [22]:
print('There are {} uniques categories.'.format(len(hk_venues['Venue Category'].unique())))

There are 104 uniques categories.


In [23]:

hk_onehot = pd.get_dummies(hk_venues[['Venue Category']], prefix="", prefix_sep="")

hk_onehot['Neighborhood'] = hk_venues['Neighborhood'] 

fixed_columns = [hk_onehot.columns[-1]] + list(hk_onehot.columns[:-1])
hk_onehot = hk_onehot[fixed_columns]

hk_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Art Gallery,Asian Restaurant,Astrologer,BBQ Joint,Bakery,Bar,Beijing Restaurant,Bookstore,Buffet,Burger Joint,Bus Station,Bus Stop,Butcher,Café,Cantonese Restaurant,Cha Chaan Teng,Chinese Breakfast Place,Chinese Restaurant,City Hall,Climbing Gym,Clothing Store,Coffee Shop,Cupcake Shop,Cycle Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Flea Market,French Restaurant,Furniture / Home Store,Gastropub,German Restaurant,Gym,Gym / Fitness Center,Halal Restaurant,Hong Kong Restaurant,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Lebanese Restaurant,Light Rail Station,Lounge,Malay Restaurant,Market,Massage Studio,Middle Eastern Restaurant,Modern European Restaurant,Multiplex,Noodle House,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Pizza Place,Playground,Plaza,Pool,Pub,Ramen Restaurant,Reservoir,Restaurant,River,Sandwich Place,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shopping Mall,Snack Place,Spanish Restaurant,Speakeasy,Sports Bar,Sri Lankan Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Temple,Thai Restaurant,Theme Park,Toy / Game Store,Tunnel,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfall,Wine Shop,Yoga Studio,Yunnan Restaurant,Zoo
0,Central and Western,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Central and Western,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Central and Western,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Central and Western,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Central and Western,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [24]:
hk_onehot.shape

(371, 105)

In [25]:
hk_grouped = hk_onehot.groupby('Neighborhood').mean().reset_index()
hk_grouped

Unnamed: 0,Neighborhood,American Restaurant,Art Gallery,Asian Restaurant,Astrologer,BBQ Joint,Bakery,Bar,Beijing Restaurant,Bookstore,Buffet,Burger Joint,Bus Station,Bus Stop,Butcher,Café,Cantonese Restaurant,Cha Chaan Teng,Chinese Breakfast Place,Chinese Restaurant,City Hall,Climbing Gym,Clothing Store,Coffee Shop,Cupcake Shop,Cycle Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Flea Market,French Restaurant,Furniture / Home Store,Gastropub,German Restaurant,Gym,Gym / Fitness Center,Halal Restaurant,Hong Kong Restaurant,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Lebanese Restaurant,Light Rail Station,Lounge,Malay Restaurant,Market,Massage Studio,Middle Eastern Restaurant,Modern European Restaurant,Multiplex,Noodle House,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Pizza Place,Playground,Plaza,Pool,Pub,Ramen Restaurant,Reservoir,Restaurant,River,Sandwich Place,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shopping Mall,Snack Place,Spanish Restaurant,Speakeasy,Sports Bar,Sri Lankan Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Temple,Thai Restaurant,Theme Park,Toy / Game Store,Tunnel,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfall,Wine Shop,Yoga Studio,Yunnan Restaurant,Zoo
0,Central and Western,0.0,0.0,0.0,0.0,0.033333,0.033333,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.066667,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0
1,Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Islands,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Kowloon City,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.033333,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.2,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Kwai Tsing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Kwun Tong,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.066667,0.0,0.166667,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0
6,North District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,Sai Kung,0.033333,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0
8,Sha Tin,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.033333,0.0,0.0,0.066667,0.033333,0.0,0.0,0.0,0.033333,0.066667,0.0,0.0,0.0,0.033333,0.033333,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0
9,Sham Shui Po,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.033333,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0


In [26]:
hk_grouped.shape

(17, 105)

In [27]:
num_top_venues = 5

for hood in hk_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = hk_grouped[hk_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central and Western----
                venue  freq
0  Italian Restaurant  0.07
1               Hotel  0.07
2           Speakeasy  0.07
3           Hotel Bar  0.07
4              Lounge  0.07


----Eastern----
                   venue  freq
0                   Park   0.5
1               Bus Stop   0.5
2    American Restaurant   0.0
3  Performing Arts Venue   0.0
4         Sandwich Place   0.0


----Islands----
                 venue  freq
0            Reservoir   1.0
1  American Restaurant   0.0
2          Art Gallery   0.0
3       Sandwich Place   0.0
4                River   0.0


----Kowloon City----
               venue  freq
0    Thai Restaurant  0.20
1       Dessert Shop  0.20
2  Hotpot Restaurant  0.07
3   Halal Restaurant  0.07
4       Noodle House  0.07


----Kwai Tsing----
                   venue  freq
0                 Tunnel  0.33
1     Chinese Restaurant  0.33
2        Thai Restaurant  0.33
3    American Restaurant  0.00
4  Performing Arts Venue  0.00


----Kwun Tong-

In [28]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [29]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = hk_grouped['Neighborhood']

for ind in np.arange(hk_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(hk_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central and Western,Japanese Restaurant,Speakeasy,Hotel,Italian Restaurant,Lounge,Hotel Bar,BBQ Joint,Café,Yoga Studio,Gym / Fitness Center
1,Eastern,Bus Stop,Park,Department Store,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Flea Market
2,Islands,Reservoir,Zoo,Italian Restaurant,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant
3,Kowloon City,Dessert Shop,Thai Restaurant,Noodle House,Hotpot Restaurant,Halal Restaurant,Japanese Restaurant,Restaurant,Coffee Shop,Pastry Shop,Park
4,Kwai Tsing,Chinese Restaurant,Tunnel,Thai Restaurant,Zoo,Gastropub,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant


In [30]:
kclusters = 5

hk_grouped_clustering = hk_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(hk_grouped_clustering)

kmeans.labels_[0:10] 

array([2, 0, 1, 2, 4, 2, 3, 2, 2, 2], dtype=int32)

In [31]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

hk_merged = neighborhoods

hk_merged = hk_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
hk_merged.dropna(axis=0, inplace=True)
hk_merged['Cluster Labels'] = hk_merged['Cluster Labels'].astype('int32')
hk_merged

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central and Western,22.2818,114.158,2,Japanese Restaurant,Speakeasy,Hotel,Italian Restaurant,Lounge,Hotel Bar,BBQ Joint,Café,Yoga Studio,Gym / Fitness Center
1,Wan Chai,22.279,114.172,2,Japanese Restaurant,Hotel,Cantonese Restaurant,Hong Kong Restaurant,Chinese Restaurant,Steakhouse,Dumpling Restaurant,Noodle House,Hotel Bar,Coffee Shop
2,Eastern,22.2731,114.234,0,Bus Stop,Park,Department Store,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Flea Market
4,Sham Shui Po,22.3282,114.161,2,Noodle House,Dessert Shop,Shopping Mall,Chinese Restaurant,Snack Place,Hong Kong Restaurant,Italian Restaurant,Vietnamese Restaurant,Sushi Restaurant,Hostel
5,Kowloon City,22.3302,114.19,2,Dessert Shop,Thai Restaurant,Noodle House,Hotpot Restaurant,Halal Restaurant,Japanese Restaurant,Restaurant,Coffee Shop,Pastry Shop,Park
6,Wong Tai Sin,22.3417,114.194,2,Coffee Shop,Fast Food Restaurant,Park,Bus Station,Pool,Chinese Restaurant,Astrologer,Hong Kong Restaurant,Market,Szechuan Restaurant
7,Kwun Tong,22.3129,114.226,2,Chinese Restaurant,Cha Chaan Teng,Japanese Restaurant,Pizza Place,Shopping Mall,Climbing Gym,Restaurant,Department Store,Paper / Office Supplies Store,Café
8,Yau Tsim Mong,22.3188,114.162,2,Japanese Restaurant,Noodle House,Coffee Shop,Clothing Store,Dim Sum Restaurant,Vietnamese Restaurant,Modern European Restaurant,Chinese Restaurant,Shanghai Restaurant,Shopping Mall
9,Kwai Tsing,22.341,114.104,4,Chinese Restaurant,Tunnel,Thai Restaurant,Zoo,Gastropub,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant
10,Tsuen Wan,22.3717,114.113,2,Dessert Shop,Noodle House,Japanese Restaurant,Italian Restaurant,Coffee Shop,Shopping Mall,Cha Chaan Teng,Sushi Restaurant,Burger Joint,Ramen Restaurant


In [39]:

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(hk_merged['Latitude'], hk_merged['Longitude'], hk_merged['Neighborhood'], hk_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
      
map_clusters

In [52]:
#Cluster1
hk_merged.loc[hk_merged['Cluster Labels'] == 0, hk_merged.columns[[0] + list(range(4, hk_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Eastern,Bus Stop,Park,Department Store,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Flea Market


In [53]:
#Cluster2
hk_merged.loc[hk_merged['Cluster Labels'] == 1, hk_merged.columns[[0] + list(range(4, hk_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Islands,Reservoir,Zoo,Italian Restaurant,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant


In [55]:
#Cluster3
hk_merged.loc[hk_merged['Cluster Labels'] == 2, hk_merged.columns[[0] + list(range(4, hk_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central and Western,Japanese Restaurant,Speakeasy,Hotel,Italian Restaurant,Lounge,Hotel Bar,BBQ Joint,Café,Yoga Studio,Gym / Fitness Center
1,Wan Chai,Japanese Restaurant,Hotel,Cantonese Restaurant,Hong Kong Restaurant,Chinese Restaurant,Steakhouse,Dumpling Restaurant,Noodle House,Hotel Bar,Coffee Shop
4,Sham Shui Po,Noodle House,Dessert Shop,Shopping Mall,Chinese Restaurant,Snack Place,Hong Kong Restaurant,Italian Restaurant,Vietnamese Restaurant,Sushi Restaurant,Hostel
5,Kowloon City,Dessert Shop,Thai Restaurant,Noodle House,Hotpot Restaurant,Halal Restaurant,Japanese Restaurant,Restaurant,Coffee Shop,Pastry Shop,Park
6,Wong Tai Sin,Coffee Shop,Fast Food Restaurant,Park,Bus Station,Pool,Chinese Restaurant,Astrologer,Hong Kong Restaurant,Market,Szechuan Restaurant
7,Kwun Tong,Chinese Restaurant,Cha Chaan Teng,Japanese Restaurant,Pizza Place,Shopping Mall,Climbing Gym,Restaurant,Department Store,Paper / Office Supplies Store,Café
8,Yau Tsim Mong,Japanese Restaurant,Noodle House,Coffee Shop,Clothing Store,Dim Sum Restaurant,Vietnamese Restaurant,Modern European Restaurant,Chinese Restaurant,Shanghai Restaurant,Shopping Mall
10,Tsuen Wan,Dessert Shop,Noodle House,Japanese Restaurant,Italian Restaurant,Coffee Shop,Shopping Mall,Cha Chaan Teng,Sushi Restaurant,Burger Joint,Ramen Restaurant
11,Tuen Mun,Coffee Shop,Shopping Mall,Department Store,Cantonese Restaurant,Shanghai Restaurant,Light Rail Station,Hong Kong Restaurant,German Restaurant,Fast Food Restaurant,Park
12,Yuen Long,Noodle House,Chinese Restaurant,Fast Food Restaurant,Japanese Restaurant,Market,Shopping Mall,Sushi Restaurant,Bakery,Bookstore,Coffee Shop


In [56]:
#Cluster4
hk_merged.loc[hk_merged['Cluster Labels'] == 3, hk_merged.columns[[0] + list(range(4, hk_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,North District,Waterfall,Zoo,Gastropub,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Electronics Store,Fast Food Restaurant


In [57]:
#Cluster5
hk_merged.loc[hk_merged['Cluster Labels'] == 4, hk_merged.columns[[0] + list(range(4, hk_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Kwai Tsing,Chinese Restaurant,Tunnel,Thai Restaurant,Zoo,Gastropub,Dessert Shop,Dim Sum Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant
