# SEGMENTING AND CLUSTERING HOTELS OF ALMATY CITY

## 1. Import necessary libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Libraries imported.


## 2. Define Foursquare Credentials (this cell is hidden as of confidencial)

In [2]:
# The code was removed by Watson Studio for sharing.

## 3. Let's assume that our trip will start from the center point of the city - it is the Dostyk Street. Let's convert this point to l&l coordinates

In [3]:
address = 'Dostyk street, Almaty, KZ'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.2613956 76.9545801


## 4. Then let's find hotels within radius of 2 km of the square

In [4]:
search_query = 'hotel'
radius = 2000
print(search_query + ' .... OK!')

hotel .... OK!


### 4.1. Define the corresponding URL

In [5]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=SQCME2S0VW4BPLHXPLXKJDIVVBWJ3ON43S1XKQYCRBCS1O1R&client_secret=Q221TQPUD2R31SRCYO44NQK4EBCILHVSI45DHCIKOYRDBQEB&ll=43.2613956,76.9545801&v=20190926&query=hotel&radius=2000&limit=30'

### 4.2. Examine results with GET request

In [6]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d90feb2c267e9002bce21ae'},
 'response': {'venues': [{'id': '4dd6a5e852b1a5c64438ed7e',
    'name': 'Grand Hotel Tien Shan',
    'location': {'address': 'ул. Богенбай батыра, 115',
     'crossStreet': 'уг. ул. Кунаева',
     'lat': 43.254025551701226,
     'lng': 76.94917524968251,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.254025551701226,
       'lng': 76.94917524968251}],
     'distance': 930,
     'postalCode': '050000',
     'cc': 'KZ',
     'city': 'Алматы',
     'state': 'Алматы',
     'country': 'Қазақстан',
     'formattedAddress': ['050000',
      'Алматы',
      'Алматы',
      'ул. Богенбай батыра, 115 (уг. ул. Кунаева)',
      'Қазақстан']},
    'categories': [{'id': '4bf58dd8d48988d1fa931735',
      'name': 'Hotel',
      'pluralName': 'Hotels',
      'shortName': 'Hotel',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/travel/hotel_',
       'suffix': '.png'},
      'primary': True}],
    'referralI

## 5. Work with Data

### 5.1. Transform data to Pandas dataframe with some formatting

In [7]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.tail()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId,venuePage.id
25,"[{'id': '52f2ab2ebcbc57f1066b8b4f', 'name': 'B...",False,5353872d498e98da6de2f4e1,,KZ,,Қазақстан,,1821,[Қазақстан],"[{'label': 'display', 'lat': 43.24510422990856...",43.245104,76.956669,,,Bus stop. In front of hotel Kazakhstan,v-1569783474,
26,"[{'id': '4bf58dd8d48988d127941735', 'name': 'C...",False,4d12d6dd957fa1cdfa26719f,,KZ,,Қазақстан,,1890,[Қазақстан],"[{'label': 'display', 'lat': 43.244531, 'lng':...",43.244531,76.95733,,,Premium Hall @ Dostyk Hotel,v-1569783474,
27,"[{'id': '4bf58dd8d48988d1fa931735', 'name': 'H...",False,59ca0fb68ad62e0832f359c7,,KZ,Алма-Ата,Қазақстан,,1950,"[Алмаатинская область, Алма-Ата, Қазақстан]","[{'label': 'display', 'lat': 43.247426, 'lng':...",43.247426,76.940058,,Алмаатинская область,Grand Opera Hotel,v-1569783474,
28,"[{'id': '4bf58dd8d48988d176941735', 'name': 'G...",False,511a421de4b08e47829ec983,,KZ,,Қазақстан,,1911,[Қазақстан],"[{'label': 'display', 'lat': 43.24441848948258...",43.244418,76.951025,,,Dostyk Hotel Gym,v-1569783474,
29,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",False,51f1774c498e51a8c533b741,,KZ,,Қазақстан,,1952,[Қазақстан],"[{'label': 'display', 'lat': 43.24405205973003...",43.244052,76.95098,,,Bar Dostyk In Dostyk Hotel,v-1569783474,


### 5.2. Define required information and filter data

In [8]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
dataframe_hotels = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_hotels['categories'] = dataframe_hotels.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_hotels.columns = [column.split('.')[-1] for column in dataframe_hotels.columns]

dataframe_hotels

Unnamed: 0,name,categories,lat,lng
0,Grand Hotel Tien Shan,Hotel,43.254026,76.949175
1,Гостиница «Казахстан» / Kazakhstan Hotel,Hotel,43.244809,76.957196
2,Достық / The Dostyk Hotel («Достық» қонақ үйі),Hotel,43.244793,76.951078
3,Уют / Uyut Hotel,Hotel,43.25994,76.933866
4,City Hotel Tien Shan,Hotel,43.247956,76.951013
5,Ambassador Hotel,Hotel,43.251647,76.940106
6,hotel complex Otrar,Club House,43.260502,76.950768
7,Hotel Voyage,Hotel,43.258795,76.946049
8,Turkistan Hotel,Hotel,43.262868,76.951259
9,Hotel grand Saphire&Spa,Hotel,43.254383,76.949371


## 6. Visualize hotels nearby Dostyk Street

In [9]:
dataframe_hotels.name

0                              Grand Hotel Tien Shan
1           Гостиница «Казахстан» / Kazakhstan Hotel
2     Достық / The Dostyk Hotel («Достық» қонақ үйі)
3                                   Уют / Uyut Hotel
4                               City Hotel Tien Shan
5                                   Ambassador Hotel
6                                hotel complex Otrar
7                                       Hotel Voyage
8                                    Turkistan Hotel
9                            Hotel grand Saphire&Spa
10                               Soluxe Hotel Almaty
11                        Marriott Astana Saad Hotel
12                                     Hotel Berkana
13                  Hotel D' Rami Гостиница "Д'Рами"
14                                     Hotel Berkana
15                     Hotel Kazhol Conference Rooms
16                                         The Hotel
17                                Гостиница «Алматы»
18                                     Отрар /

In [11]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Republic Square

# add a red circle marker to represent the Republic Square
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Dostyk Street',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Hotels as blue circle markers
for lat, lng, label in zip(dataframe_hotels.lat, dataframe_hotels.lng, dataframe_hotels.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

## 7. Data Exploring

### 7.1. Exploring first hotel

In [12]:
dataframe_hotels.loc[0, 'name']

'Grand Hotel Tien Shan'

### 7.2. Get Hotel's l&l values

In [13]:
hotel_latitude = dataframe_hotels.loc[0, 'lat'] # hotel latitude value
hotel_longitude = dataframe_hotels.loc[0, 'lng'] # hotel longitude value

hotel_name = dataframe_hotels.loc[0, 'name'] # hotel name

print('Latitude and longitude values of {} are {}, {}.'.format(hotel_name, 
                                                               hotel_latitude, 
                                                               hotel_longitude))

Latitude and longitude values of Grand Hotel Tien Shan are 43.254025551701226, 76.94917524968251.


### 7.3. Getting the Hotel's Venues

In [14]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    hotel_latitude, 
    hotel_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=SQCME2S0VW4BPLHXPLXKJDIVVBWJ3ON43S1XKQYCRBCS1O1R&client_secret=Q221TQPUD2R31SRCYO44NQK4EBCILHVSI45DHCIKOYRDBQEB&v=20190926&ll=43.254025551701226,76.94917524968251&radius=500&limit=100'

In [15]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d90feeb724750002c17d5f5'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Almaty',
  'headerFullLocation': 'Almaty',
  'headerLocationGranularity': 'city',
  'totalResults': 37,
  'suggestedBounds': {'ne': {'lat': 43.25852555620123,
    'lng': 76.95534229855934},
   'sw': {'lat': 43.24952554720122, 'lng': 76.94300820080568}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4dbd34916a23e294ba40708f',
       'name': 'Кунаевские фонтаны',
       'location': {'address': 'Кунаева',
        'crossStreet': 'Богенбай Батыра',
        'lat': 43.25298864996133,
        'lng': 76.94978746164514,
        'labeledLatLngs': [{'label': 'display',
          '

### 7.4. Gettig categories

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### 7.5. Check the data in dataframe

In [17]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Кунаевские фонтаны,Park,43.252989,76.949787
1,ШЕФ,Comfort Food Restaurant,43.253548,76.952662
2,Театральная мастерская «ДОМ Q»,Theater,43.253646,76.94686
3,Дареджани,Caucasian Restaurant,43.256409,76.949907
4,Grand Hotel Tien Shan,Hotel,43.254026,76.949175


In [18]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

37 venues were returned by Foursquare.


### 7.6. Function for finding nearby venues for each hotel

In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['hotel_name', 
                  'hotel_latitude', 
                  'hotel_longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## 8. Create new dataframe for hotels' venues and explore

In [20]:
hotels_venues = getNearbyVenues(names=dataframe_hotels['name'],
                                   latitudes=dataframe_hotels['lat'],
                                   longitudes=dataframe_hotels['lng']
                                  )

Grand Hotel Tien Shan
Гостиница «Казахстан» / Kazakhstan Hotel
Достық / The Dostyk Hotel («Достық» қонақ үйі)
Уют / Uyut Hotel
City Hotel Tien Shan
Ambassador Hotel
hotel complex Otrar
Hotel Voyage
Turkistan Hotel
Hotel grand Saphire&Spa
Soluxe Hotel Almaty
Marriott Astana Saad Hotel
Hotel Berkana
Hotel D' Rami Гостиница "Д'Рами"
Hotel Berkana
Hotel Kazhol Conference Rooms
The Hotel
Гостиница «Алматы»
Отрар / Otrar
Premium Hall Kazakhstan Hotel
Renion Residence
Kazzhol Hotel Almaty
Казжол / Kazzhol
The Shilla hotel
The Dostyk Hotel SPA-Center
Bus stop. In front of hotel Kazakhstan
Premium Hall @ Dostyk Hotel
Grand Opera Hotel
Dostyk Hotel Gym
Bar Dostyk In Dostyk Hotel


### 8.1. Check the size of hotels' venues new dataframe

In [21]:
print(hotels_venues.shape)
hotels_venues.head()

(1330, 7)


Unnamed: 0,hotel_name,hotel_latitude,hotel_longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Grand Hotel Tien Shan,43.254026,76.949175,Кунаевские фонтаны,43.252989,76.949787,Park
1,Grand Hotel Tien Shan,43.254026,76.949175,ШЕФ,43.253548,76.952662,Comfort Food Restaurant
2,Grand Hotel Tien Shan,43.254026,76.949175,Театральная мастерская «ДОМ Q»,43.253646,76.94686,Theater
3,Grand Hotel Tien Shan,43.254026,76.949175,Дареджани,43.256409,76.949907,Caucasian Restaurant
4,Grand Hotel Tien Shan,43.254026,76.949175,Grand Hotel Tien Shan,43.254026,76.949175,Hotel


### 8.2. How many venues returned for each hotel?

In [22]:
hotels_venues.groupby('hotel_name').count()

Unnamed: 0_level_0,hotel_latitude,hotel_longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
hotel_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ambassador Hotel,74,74,74,74,74,74
Bar Dostyk In Dostyk Hotel,37,37,37,37,37,37
Bus stop. In front of hotel Kazakhstan,45,45,45,45,45,45
City Hotel Tien Shan,38,38,38,38,38,38
Dostyk Hotel Gym,38,38,38,38,38,38
Grand Hotel Tien Shan,37,37,37,37,37,37
Grand Opera Hotel,68,68,68,68,68,68
Hotel Berkana,80,80,80,80,80,80
"Hotel D' Rami Гостиница ""Д'Рами""",54,54,54,54,54,54
Hotel Kazhol Conference Rooms,36,36,36,36,36,36


### 8.3. How many unique categories of venues?

In [23]:
print('There are {} uniques categories.'.format(len(hotels_venues['Venue Category'].unique())))

There are 123 uniques categories.


## 9. Analyze Each Hotel

In [24]:
# one hot encoding
hotels_onehot = pd.get_dummies(hotels_venues[['Venue Category']], prefix="", prefix_sep="")

# add hotel_name column back to dataframe
hotels_onehot['hotel_name'] = hotels_venues['hotel_name'] 

# move hotel_name column to the first column
fixed_columns = [hotels_onehot.columns[-1]] + list(hotels_onehot.columns[:-1])
hotels_onehot = hotels_onehot[fixed_columns]

hotels_onehot.head()

Unnamed: 0,hotel_name,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Bath House,Beer Bar,Big Box Store,Bike Shop,Bookstore,Boutique,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Business Center,Cafeteria,Café,Candy Store,Caucasian Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Dance Studio,Diner,Eastern European Restaurant,Electronics Store,Factory,Falafel Restaurant,Fast Food Restaurant,Flea Market,Fountain,Furniture / Home Store,Gaming Cafe,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Health & Beauty Service,Historic Site,Hobby Shop,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Italian Restaurant,Karaoke Bar,Korean Restaurant,Lingerie Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Moroccan Restaurant,Movie Theater,Multiplex,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Other Nightlife,Park,Pedestrian Plaza,Pet Service,Pet Store,Pharmacy,Pizza Place,Plaza,Pub,Restaurant,Russian Restaurant,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Seafood Restaurant,Shoe Store,Shopping Mall,Snack Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Turkish Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Shop,Women's Store,Yoga Studio
0,Grand Hotel Tien Shan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Grand Hotel Tien Shan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Grand Hotel Tien Shan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,Grand Hotel Tien Shan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Grand Hotel Tien Shan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [25]:
# Size of the new dataframe
hotels_onehot.shape

(1330, 124)

### 9.1. Grouping rows by hotel and by taking the mean of the frequency of occurrence of each category

In [26]:
hotels_grouped = hotels_onehot.groupby('hotel_name').mean().reset_index()
hotels_grouped

Unnamed: 0,hotel_name,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Bath House,Beer Bar,Big Box Store,Bike Shop,Bookstore,Boutique,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Business Center,Cafeteria,Café,Candy Store,Caucasian Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Dance Studio,Diner,Eastern European Restaurant,Electronics Store,Factory,Falafel Restaurant,Fast Food Restaurant,Flea Market,Fountain,Furniture / Home Store,Gaming Cafe,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Health & Beauty Service,Historic Site,Hobby Shop,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Italian Restaurant,Karaoke Bar,Korean Restaurant,Lingerie Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Moroccan Restaurant,Movie Theater,Multiplex,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Other Nightlife,Park,Pedestrian Plaza,Pet Service,Pet Store,Pharmacy,Pizza Place,Plaza,Pub,Restaurant,Russian Restaurant,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Seafood Restaurant,Shoe Store,Shopping Mall,Snack Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Turkish Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Shop,Women's Store,Yoga Studio
0,Ambassador Hotel,0.013514,0.0,0.0,0.0,0.040541,0.0,0.027027,0.013514,0.0,0.067568,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.081081,0.013514,0.013514,0.0,0.0,0.0,0.148649,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.013514,0.0,0.013514,0.013514,0.013514,0.013514,0.0,0.0,0.0,0.0,0.027027,0.0,0.013514,0.013514,0.0,0.013514,0.013514,0.013514,0.0,0.027027,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.0,0.013514,0.013514,0.013514,0.067568,0.0,0.013514,0.0,0.013514,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.040541,0.027027,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0
1,Bar Dostyk In Dostyk Hotel,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.054054,0.0,0.0,0.0,0.0,0.0,0.027027,0.027027,0.0,0.0,0.0,0.0,0.0,0.081081,0.0,0.027027,0.027027,0.0,0.0,0.108108,0.027027,0.0,0.027027,0.027027,0.0,0.0,0.081081,0.027027,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.027027,0.027027,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054054,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.054054,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bus stop. In front of hotel Kazakhstan,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.044444,0.022222,0.0,0.044444,0.088889,0.0,0.022222,0.022222,0.0,0.0,0.0,0.044444,0.022222,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.088889,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.044444,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,City Hotel Tien Shan,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.026316,0.131579,0.0,0.0,0.026316,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.052632,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Dostyk Hotel Gym,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.078947,0.0,0.026316,0.026316,0.0,0.0,0.105263,0.026316,0.0,0.026316,0.052632,0.0,0.0,0.078947,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.026316,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.052632,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Grand Hotel Tien Shan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054054,0.0,0.054054,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054054,0.0,0.0,0.081081,0.0,0.027027,0.0,0.0,0.027027,0.108108,0.027027,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.027027,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.027027,0.0,0.0,0.0,0.027027,0.027027,0.0,0.027027,0.027027,0.0,0.0,0.0,0.027027,0.027027,0.0,0.027027,0.027027,0.0,0.027027,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.027027
6,Grand Opera Hotel,0.014706,0.014706,0.0,0.0,0.029412,0.0,0.029412,0.029412,0.0,0.073529,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.102941,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.044118,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.029412,0.014706,0.014706,0.014706,0.0,0.014706,0.0,0.029412,0.0,0.014706,0.0,0.029412,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.073529,0.0,0.014706,0.0,0.014706,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.029412,0.014706,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706
7,Hotel Berkana,0.0,0.0,0.0,0.0,0.025,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.125,0.025,0.025,0.0,0.025,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.025,0.0,0.0,0.1,0.0,0.025,0.0,0.0,0.0,0.025,0.0,0.025,0.0,0.0,0.025,0.0,0.0,0.025,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0
8,"Hotel D' Rami Гостиница ""Д'Рами""",0.0,0.0,0.0,0.0,0.018519,0.0,0.018519,0.018519,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.018519,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.185185,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.018519,0.018519,0.018519,0.0,0.0,0.0,0.0,0.018519,0.0,0.018519,0.018519,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.018519,0.092593,0.0,0.018519,0.0,0.018519,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.018519,0.018519,0.018519,0.018519,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0
9,Hotel Kazhol Conference Rooms,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.055556,0.0,0.083333,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.055556,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.055556,0.0,0.027778,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0


In [28]:
# Size of the new dataframe
hotels_grouped.shape

(29, 124)

### 9.2. Printing each hotel along with the top 5 most common venues

In [29]:
num_top_venues = 5

for htl in hotels_grouped['hotel_name']:
    print("----"+htl+"----")
    temp = hotels_grouped[hotels_grouped['hotel_name'] == htl].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Ambassador Hotel----
              venue  freq
0       Coffee Shop  0.15
1              Café  0.08
2               Bar  0.07
3        Restaurant  0.07
4  Asian Restaurant  0.04


----Bar Dostyk In Dostyk Hotel----
         venue  freq
0  Coffee Shop  0.11
1        Diner  0.08
2         Café  0.08
3          Bar  0.05
4   Restaurant  0.05


----Bus stop. In front of hotel Kazakhstan----
          venue  freq
0         Hotel  0.09
1   Coffee Shop  0.09
2          Café  0.07
3    Steakhouse  0.04
4  Cocktail Bar  0.04


----City Hotel Tien Shan----
         venue  freq
0  Coffee Shop  0.13
1         Café  0.11
2        Hotel  0.05
3   Restaurant  0.05
4     Fountain  0.05


----Dostyk Hotel Gym----
            venue  freq
0     Coffee Shop  0.11
1           Diner  0.08
2            Café  0.08
3      Restaurant  0.05
4  Cosmetics Shop  0.05


----Grand Hotel Tien Shan----
          venue  freq
0   Coffee Shop  0.11
1          Café  0.08
2  Burger Joint  0.05
3           Bar  0.05
4    

### 9.3. Create new pandas dataframe for above most common venues

In [30]:
# Sort venues before creating dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### 9.4. Creating the new dataframe and display the top 10 venues for each hotel

In [31]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['hotel_name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
hotels_venues_sorted = pd.DataFrame(columns=columns)
hotels_venues_sorted['hotel_name'] = hotels_grouped['hotel_name']

for ind in np.arange(hotels_grouped.shape[0]):
    hotels_venues_sorted.iloc[ind, 1:] = return_most_common_venues(hotels_grouped.iloc[ind, :], num_top_venues)

hotels_venues_sorted.head()

Unnamed: 0,hotel_name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ambassador Hotel,Coffee Shop,Café,Restaurant,Bar,Sushi Restaurant,Asian Restaurant,Eastern European Restaurant,Seafood Restaurant,Hotel,Tea Room
1,Bar Dostyk In Dostyk Hotel,Coffee Shop,Diner,Café,Bar,Restaurant,Hotel,Office,Gastropub,Cosmetics Shop,Nightclub
2,Bus stop. In front of hotel Kazakhstan,Hotel,Coffee Shop,Café,Steakhouse,Caucasian Restaurant,Cocktail Bar,Diner,Fast Food Restaurant,Korean Restaurant,Gay Bar
3,City Hotel Tien Shan,Coffee Shop,Café,Restaurant,Fountain,Hotel,Asian Restaurant,Gastropub,Lounge,Arts & Crafts Store,Pharmacy
4,Dostyk Hotel Gym,Coffee Shop,Diner,Café,Hotel,Cosmetics Shop,Restaurant,Gastropub,Gaming Cafe,Fountain,Middle Eastern Restaurant


## 10. Cluster analysis

### 10.1. Using kmeans let's cluster our hotel-neighborhoods into 5 clusters

In [32]:
# set number of clusters
kclusters = 5

hotels_grouped_clustering = hotels_grouped.drop('hotel_name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(hotels_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 1, 2, 1, 0, 2, 3, 2, 3], dtype=int32)

### 10.2. Create new dataframe with clusters information

In [33]:
# 1. some draft coding for edit datafratme columns
dataframe_hotels.tail()

Unnamed: 0,name,categories,lat,lng
25,Bus stop. In front of hotel Kazakhstan,Bus Stop,43.245104,76.956669
26,Premium Hall @ Dostyk Hotel,Conference Room,43.244531,76.95733
27,Grand Opera Hotel,Hotel,43.247426,76.940058
28,Dostyk Hotel Gym,Gym,43.244418,76.951025
29,Bar Dostyk In Dostyk Hotel,Bar,43.244052,76.95098


In [34]:
# 2. some draft coding for edit datafratme columns
dataframe_hotels = dataframe_hotels.rename({'name': 'hotel_name'}, axis=1)

In [35]:
# 3. some draft coding for edit datafratme columns
dataframe_hotels.tail()

Unnamed: 0,hotel_name,categories,lat,lng
25,Bus stop. In front of hotel Kazakhstan,Bus Stop,43.245104,76.956669
26,Premium Hall @ Dostyk Hotel,Conference Room,43.244531,76.95733
27,Grand Opera Hotel,Hotel,43.247426,76.940058
28,Dostyk Hotel Gym,Gym,43.244418,76.951025
29,Bar Dostyk In Dostyk Hotel,Bar,43.244052,76.95098


In [36]:
# add clustering labels
hotels_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

hotels_merged = dataframe_hotels

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
hotels_merged = hotels_merged.join(hotels_venues_sorted.set_index('hotel_name'), on='hotel_name')

hotels_merged.head() # check the last columns!

Unnamed: 0,hotel_name,categories,lat,lng,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Grand Hotel Tien Shan,Hotel,43.254026,76.949175,0,Coffee Shop,Café,Bar,Beer Bar,Burger Joint,Yoga Studio,Gym,Plaza,Pizza Place,Pedestrian Plaza
1,Гостиница «Казахстан» / Kazakhstan Hotel,Hotel,43.244809,76.957196,1,Hotel,Coffee Shop,Café,Diner,Caucasian Restaurant,Steakhouse,Park,Chinese Restaurant,Noodle House,Nightclub
2,Достық / The Dostyk Hotel («Достық» қонақ үйі),Hotel,43.244793,76.951078,1,Coffee Shop,Hotel,Diner,Café,Cosmetics Shop,Restaurant,Gaming Cafe,Fountain,Middle Eastern Restaurant,Modern European Restaurant
3,Уют / Uyut Hotel,Hotel,43.25994,76.933866,3,Restaurant,Hotel,Clothing Store,Comfort Food Restaurant,Bridal Shop,Salon / Barbershop,Café,Pet Store,Coffee Shop,New American Restaurant
4,City Hotel Tien Shan,Hotel,43.247956,76.951013,2,Coffee Shop,Café,Restaurant,Fountain,Hotel,Asian Restaurant,Gastropub,Lounge,Arts & Crafts Store,Pharmacy


### 10.3. Visualize Clusters

In [37]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(hotels_merged['lat'], hotels_merged['lng'], hotels_merged['hotel_name'], hotels_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 11. Examine and Concerns

In [38]:
hotels_merged.head()

Unnamed: 0,hotel_name,categories,lat,lng,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Grand Hotel Tien Shan,Hotel,43.254026,76.949175,0,Coffee Shop,Café,Bar,Beer Bar,Burger Joint,Yoga Studio,Gym,Plaza,Pizza Place,Pedestrian Plaza
1,Гостиница «Казахстан» / Kazakhstan Hotel,Hotel,43.244809,76.957196,1,Hotel,Coffee Shop,Café,Diner,Caucasian Restaurant,Steakhouse,Park,Chinese Restaurant,Noodle House,Nightclub
2,Достық / The Dostyk Hotel («Достық» қонақ үйі),Hotel,43.244793,76.951078,1,Coffee Shop,Hotel,Diner,Café,Cosmetics Shop,Restaurant,Gaming Cafe,Fountain,Middle Eastern Restaurant,Modern European Restaurant
3,Уют / Uyut Hotel,Hotel,43.25994,76.933866,3,Restaurant,Hotel,Clothing Store,Comfort Food Restaurant,Bridal Shop,Salon / Barbershop,Café,Pet Store,Coffee Shop,New American Restaurant
4,City Hotel Tien Shan,Hotel,43.247956,76.951013,2,Coffee Shop,Café,Restaurant,Fountain,Hotel,Asian Restaurant,Gastropub,Lounge,Arts & Crafts Store,Pharmacy


In [39]:
colTitles = ['categories', 'hotel_name', 'lat', 'lng', 'Cluster Labels', '1st Most Common Venue', '2nd Most Common Venue', '3rd Most Common Venue', '4th Most Common Venue', '5th Most Common Venue', '6th Most Common Venue', '7th Most Common Venue', '8th Most Common Venue', '9th Most Common Venue', '10th Most Common Venue']
hotels_merged = hotels_merged.reindex(columns = colTitles)

#### CLUSTER 1 - 5

In [40]:
# 1
hotels_merged.loc[hotels_merged['Cluster Labels'] == 0, hotels_merged.columns[[1] + list(range(5, hotels_merged.shape[1]))]]

Unnamed: 0,hotel_name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Grand Hotel Tien Shan,Coffee Shop,Café,Bar,Beer Bar,Burger Joint,Yoga Studio,Gym,Plaza,Pizza Place,Pedestrian Plaza
9,Hotel grand Saphire&Spa,Coffee Shop,Bar,Café,Beer Bar,Burger Joint,Yoga Studio,Gym,Plaza,Pedestrian Plaza,Park
10,Soluxe Hotel Almaty,Beer Bar,Burger Joint,Café,Coffee Shop,Yoga Studio,Comfort Food Restaurant,Plaza,Pedestrian Plaza,Park,Italian Restaurant


#### Examine Cluster 1:
This Cluster can be named as **"Short Business Trip Hotels"**. The most common venues are small cafes, bars, fast foods - all necessary venues for short trip or transit tourists. There aren't any sightseing venues, so it can be assumed that only transit or business tourists visit these hotels. 

In [41]:
# 2
hotels_merged.loc[hotels_merged['Cluster Labels'] == 1, hotels_merged.columns[[1] + list(range(5, hotels_merged.shape[1]))]]

Unnamed: 0,hotel_name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Гостиница «Казахстан» / Kazakhstan Hotel,Hotel,Coffee Shop,Café,Diner,Caucasian Restaurant,Steakhouse,Park,Chinese Restaurant,Noodle House,Nightclub
2,Достық / The Dostyk Hotel («Достық» қонақ үйі),Coffee Shop,Hotel,Diner,Café,Cosmetics Shop,Restaurant,Gaming Cafe,Fountain,Middle Eastern Restaurant,Modern European Restaurant
16,The Hotel,Hotel,Coffee Shop,Café,Diner,Caucasian Restaurant,Steakhouse,Park,Chinese Restaurant,Noodle House,Nightclub
19,Premium Hall Kazakhstan Hotel,Coffee Shop,Hotel,Café,Diner,Caucasian Restaurant,Steakhouse,Park,Chinese Restaurant,Noodle House,Nightclub
24,The Dostyk Hotel SPA-Center,Coffee Shop,Diner,Café,Bar,Restaurant,Hotel,Office,Gaming Cafe,Cosmetics Shop,Convenience Store
25,Bus stop. In front of hotel Kazakhstan,Hotel,Coffee Shop,Café,Steakhouse,Caucasian Restaurant,Cocktail Bar,Diner,Fast Food Restaurant,Korean Restaurant,Gay Bar
26,Premium Hall @ Dostyk Hotel,Coffee Shop,Hotel,Café,Diner,Caucasian Restaurant,Steakhouse,Park,Chinese Restaurant,Noodle House,Nightclub
28,Dostyk Hotel Gym,Coffee Shop,Diner,Café,Hotel,Cosmetics Shop,Restaurant,Gastropub,Gaming Cafe,Fountain,Middle Eastern Restaurant
29,Bar Dostyk In Dostyk Hotel,Coffee Shop,Diner,Café,Bar,Restaurant,Hotel,Office,Gastropub,Cosmetics Shop,Nightclub


#### Examine Cluster 2:
This Cluster can be named as **"Long Business Trip Hotels"**. As it is seen the most common venues are restraunts and steakhouses which require more time. 

In [42]:
# 3
hotels_merged.loc[hotels_merged['Cluster Labels'] == 2, hotels_merged.columns[[1] + list(range(5, hotels_merged.shape[1]))]]

Unnamed: 0,hotel_name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,City Hotel Tien Shan,Coffee Shop,Café,Restaurant,Fountain,Hotel,Asian Restaurant,Gastropub,Lounge,Arts & Crafts Store,Pharmacy
5,Ambassador Hotel,Coffee Shop,Café,Restaurant,Bar,Sushi Restaurant,Asian Restaurant,Eastern European Restaurant,Seafood Restaurant,Hotel,Tea Room
13,"Hotel D' Rami Гостиница ""Д'Рами""",Coffee Shop,Restaurant,Café,Bar,Mediterranean Restaurant,Fountain,Eastern European Restaurant,Gift Shop,Pub,Pizza Place
17,Гостиница «Алматы»,Coffee Shop,Café,Bar,Restaurant,Fountain,Burger Joint,Seafood Restaurant,Hotel,Salon / Barbershop,Pizza Place
27,Grand Opera Hotel,Coffee Shop,Restaurant,Bar,Café,Eastern European Restaurant,Sushi Restaurant,Korean Restaurant,Steakhouse,Hotel,Tea Room


#### Examine Cluster 3:
This Cluster can be named as **"Culture Research Trip Hotels"**. There are theatres, walking areas, markets and stores in most common venues. 

In [43]:
# 4
hotels_merged.loc[hotels_merged['Cluster Labels'] == 3, hotels_merged.columns[[1] + list(range(5, hotels_merged.shape[1]))]]

Unnamed: 0,hotel_name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Уют / Uyut Hotel,Restaurant,Hotel,Clothing Store,Comfort Food Restaurant,Bridal Shop,Salon / Barbershop,Café,Pet Store,Coffee Shop,New American Restaurant
12,Hotel Berkana,Coffee Shop,Restaurant,Hotel,Korean Restaurant,Clothing Store,Café,Asian Restaurant,Salon / Barbershop,Concert Hall,Comfort Food Restaurant
14,Hotel Berkana,Coffee Shop,Restaurant,Hotel,Korean Restaurant,Clothing Store,Café,Asian Restaurant,Salon / Barbershop,Concert Hall,Comfort Food Restaurant
15,Hotel Kazhol Conference Rooms,Coffee Shop,Hotel,Restaurant,Clothing Store,Fast Food Restaurant,Vietnamese Restaurant,Cosmetics Shop,Movie Theater,Comfort Food Restaurant,New American Restaurant
22,Казжол / Kazzhol,Restaurant,Coffee Shop,Hotel,Italian Restaurant,Clothing Store,Fast Food Restaurant,Pet Store,Salon / Barbershop,Korean Restaurant,Burger Joint
23,The Shilla hotel,Clothing Store,Hotel,Coffee Shop,Restaurant,Boutique,Korean Restaurant,Eastern European Restaurant,Moroccan Restaurant,Cosmetics Shop,New American Restaurant


#### Examine Cluster 4:
This Cluster can be named as **"Domestic Trip to Almaty"**. It seems that these hotels are visited by domestic tourists from other small cities of Kazakhstan for weekend shopping and so on.

In [44]:
# 5
hotels_merged.loc[hotels_merged['Cluster Labels'] == 4, hotels_merged.columns[[1] + list(range(5, hotels_merged.shape[1]))]]

Unnamed: 0,hotel_name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,hotel complex Otrar,Coffee Shop,Nightclub,Pizza Place,Fast Food Restaurant,Chinese Restaurant,Noodle House,Hotel,Gym,Theater,Lounge
7,Hotel Voyage,Coffee Shop,Noodle House,Lounge,Burger Joint,Chinese Restaurant,Seafood Restaurant,Gym,Yoga Studio,German Restaurant,Nightclub
8,Turkistan Hotel,Nightclub,Coffee Shop,Fast Food Restaurant,Shopping Mall,Noodle House,Pizza Place,Theater,Chinese Restaurant,Restaurant,Asian Restaurant
11,Marriott Astana Saad Hotel,Coffee Shop,Noodle House,Chinese Restaurant,Seafood Restaurant,Lounge,Burger Joint,Clothing Store,Greek Restaurant,Hookah Bar,Italian Restaurant
18,Отрар / Otrar,Coffee Shop,Nightclub,Pizza Place,Fast Food Restaurant,Chinese Restaurant,Noodle House,Hotel,Gym,Theater,Lounge
20,Renion Residence,Bar,Hotel,Chinese Restaurant,Historic Site,Grocery Store,Indian Restaurant,Italian Restaurant,Korean Restaurant,Mediterranean Restaurant,Electronics Store
21,Kazzhol Hotel Almaty,Coffee Shop,Nightclub,Gym,Gym / Fitness Center,Fast Food Restaurant,Movie Theater,Pub,Pharmacy,Park,Convenience Store


#### Examine Cluster 5:
This Cluster can be named as **"Enjoy Fun Hotels"**. It seems that this hotels are visited by tourists who like entertainment.

# Thank you for reviewing!