# Applied Data Science Capstone

*This NoteBook is used for Applied Data Science Capstone*

### Importing the Libraries

In [1]:
import pandas as pd
import numpy as np

import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup

import ssl
from geopy.geocoders import Nominatim

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library


import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


### Setting Up the Beautiful Soup to read Website

In [2]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,city,lat,lng,population,population_proper
0,Mumbai,18.987807,72.836447,18978000.0,12691836.0
1,Delhi,28.651952,77.231495,15926000.0,7633213.0
2,Kolkata,22.562627,88.363044,14787000.0,4631392.0
3,Chennai,13.084622,80.248357,7163000.0,4328063.0
4,Bengalūru,12.977063,77.587106,6787000.0,5104047.0


### Retrieving all the Required Data into Data Frame

In [3]:
India = India.rename(columns = {'lat':'Latitude', 'lng': 'Longitude', 'city': 'City'})
India.head()

Unnamed: 0,City,Latitude,Longitude,population,population_proper
0,Mumbai,18.987807,72.836447,18978000.0,12691836.0
1,Delhi,28.651952,77.231495,15926000.0,7633213.0
2,Kolkata,22.562627,88.363044,14787000.0,4631392.0
3,Chennai,13.084622,80.248357,7163000.0,4328063.0
4,Bengalūru,12.977063,77.587106,6787000.0,5104047.0


In [4]:
India_Clean=India.drop(columns= ['population', 'population_proper'])
India_Clean.head()

Unnamed: 0,City,Latitude,Longitude
0,Mumbai,18.987807,72.836447
1,Delhi,28.651952,77.231495
2,Kolkata,22.562627,88.363044
3,Chennai,13.084622,80.248357
4,Bengalūru,12.977063,77.587106


### Getting Toronto Co-ordinates and Displaying Map

In [5]:

address = 'India'

geolocator = Nominatim(user_agent="india")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of India are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of India are 22.3511148, 78.6677428.


In [6]:
# create map of New York using latitude and longitude values
map_india = folium.Map(location=[latitude, longitude], zoom_start=4)

# add markers to map
for lat, lng, city in zip(India_Clean['Latitude'], India_Clean['Longitude'], India_Clean['City']):
    label = ' {}'.format(city)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_india)  
    
map_india

### Using FourSquare Api for additional Information

In [7]:

CLIENT_ID = 'PE2PMUZURJHESEPZGJSBOI3CCY422YUBSEOBY4UVXMN4QYU0' # your Foursquare ID
CLIENT_SECRET = 'XAR5KYUE3MOQF4AFJPBXKT2HGVKG5PGXN2ZKBHJNULSFAFQQ'
VERSION = '20180605' # Foursquare API version
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PE2PMUZURJHESEPZGJSBOI3CCY422YUBSEOBY4UVXMN4QYU0
CLIENT_SECRET:XAR5KYUE3MOQF4AFJPBXKT2HGVKG5PGXN2ZKBHJNULSFAFQQ


In [8]:
#Function to apply it to every Postcode we have.
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
#         if(name=='Solāpur' or name == 'Mumbai'): continue    
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [9]:
India_venues = getNearbyVenues(names=India_Clean['City'],
                                   latitudes=India_Clean['Latitude'],
                                   longitudes=India_Clean['Longitude']
                                  )

Mumbai
Delhi
Kolkata
Chennai
Bengalūru
Hyderabad
Ahmadābād
Hāora
Pune
Sūrat
Mardānpur
Rāmpura
Lucknow
Nāra
Patna
Indore
Vadodara
Bhopal
Coimbatore
Ludhiāna
Āgra
Kalyān
Vishākhapatnam
Kochi
Nāsik
Meerut
Farīdābād
Vārānasi
Ghāziābād
Āsansol
Jamshedpur
Madurai
Jabalpur
Rājkot
Dhanbād
Amritsar
Warangal
Allahābād
Srīnagar
Aurangābād
Bhilai
Solāpur
Ranchi
Jodhpur
Guwāhāti
Chandigarh
Gwalior
Thiruvananthapuram
Tiruchchirāppalli
Hubli
Mysore
Raipur
Salem
Bhubaneshwar
Kota
Jhānsi
Bareilly
Alīgarh
Bhiwandi
Jammu
Morādābād
Mangalore
Kolhāpur
Amrāvati
Dehra Dūn
Mālegaon Camp
Nellore
Gopālpur
Shimoga
Tiruppūr
Raurkela
Nānded
Belgaum
Sāngli
Chānda
Ajmer
Cuttack
Bīkaner
Bhāvnagar
Hisar
Bilāspur
Tirunelveli
Guntūr
Shiliguri
Ujjain
Davangere
Akola
Sahāranpur
Gulbarga
Bhātpāra
Dhūlia
Udaipur
Bellary
Tuticorin
Kurnool
Gaya
Sīkar
Tumkūr
Kollam
Ahmadnagar
Bhīlwāra
Nizāmābād
Parbhani
Shillong
Lātūr
Rājapālaiyam
Bhāgalpur
Muzaffarnagar
Muzaffarpur
Mathura
Patiāla
Saugor
Brahmapur
Shāhbāzpur
New Delhi
Rohtak


In [10]:
India_venues.groupby('City').count().head()
print('There are {} uniques categories.'.format(len(India_venues['Venue Category'].unique())))

There are 156 uniques categories.


In [11]:
# one hot encoding
India_onehot = pd.get_dummies(India_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
India_onehot['City'] = India_venues['City'] 

# move neighborhood column to the first column
fixed_columns = [India_onehot.columns[-1]] + list(India_onehot.columns[:-1])
India_onehot = India_onehot[fixed_columns]
India_onehot.head()

Unnamed: 0,Yoga Studio,ATM,Accessories Store,Adult Boutique,American Restaurant,Andhra Restaurant,Arcade,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Thrift / Vintage Store,Tourist Information Center,Train Station,Travel & Transport,Udupi Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vineyard,Watch Shop,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
India_onehot.shape
#Here we are showing the mean of what each neighborhood has close by
India_grouped = India_onehot.groupby('City').mean().reset_index()
India_grouped.head()

Unnamed: 0,City,Yoga Studio,ATM,Accessories Store,Adult Boutique,American Restaurant,Andhra Restaurant,Arcade,Art Museum,Arts & Crafts Store,...,Thrift / Vintage Store,Tourist Information Center,Train Station,Travel & Transport,Udupi Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vineyard,Watch Shop,Women's Store
0,Abohar,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agartala,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Ahmadnagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Ahmadābād,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Aizawl,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
#Now, we are going to show what has more frequency in each of those neighborhoods.
num_top_venues = 5

for hood in India_grouped['City']:
    print("----"+hood+"----")
    temp = India_grouped[India_grouped['City'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Abohar----
                venue  freq
0   Convenience Store  0.17
1                Café  0.17
2                 ATM  0.17
3  Photography Studio  0.17
4       Train Station  0.17


----Agartala----
           venue  freq
0  Historic Site  0.14
1    Coffee Shop  0.14
2      Multiplex  0.14
3           Lake  0.14
4          Hotel  0.14


----Ahmadnagar----
                         venue  freq
0            Indian Restaurant   1.0
1                  Yoga Studio   0.0
2  Northeast Indian Restaurant   0.0
3                Movie Theater   0.0
4           Mughlai Restaurant   0.0


----Ahmadābād----
                  venue  freq
0     Indian Restaurant  0.25
1  Fast Food Restaurant  0.25
2           Snack Place  0.25
3         Historic Site  0.25
4           Yoga Studio  0.00


----Aizawl----
                         venue  freq
0                        Hotel  0.50
1                Shopping Mall  0.25
2    Indian Chinese Restaurant  0.25
3                  Yoga Studio  0.00
4  Northeast In

In [14]:

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [15]:
#And add it into a pandas dataframe to be able to apply afterwards Kclustering.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
cities_sorted = pd.DataFrame(columns=columns)
cities_sorted['City'] = India_grouped['City']

for ind in np.arange(India_grouped.shape[0]):
    cities_sorted.iloc[ind, 1:] = return_most_common_venues(India_grouped.iloc[ind, :], num_top_venues)

cities_sorted.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Abohar,Café,Train Station,Fast Food Restaurant,Convenience Store,Photography Studio,ATM,Dairy Store,Electronics Store,Food,Flea Market
1,Agartala,Lake,Coffee Shop,Hotel,Historic Site,Science Museum,Multiplex,Salad Place,Dumpling Restaurant,Fast Food Restaurant,Farmers Market
2,Ahmadnagar,Indian Restaurant,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Electronics Store,Donut Shop
3,Ahmadābād,Snack Place,Historic Site,Fast Food Restaurant,Indian Restaurant,Hotel Bar,Hotel,Flea Market,Field,Ice Cream Shop,Farmers Market
4,Aizawl,Hotel,Indian Chinese Restaurant,Shopping Mall,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop


### Using Kmeans to Form 3 Clusters

In [16]:

# set number of clusters, in this case we choose 3 clusters because if its >3 we will have clusters with only one data in it.
kclusters = 4

India_grouped_clustering = India_grouped.drop('City', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(India_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 0, 0, 2, 0, 2, 0, 2, 2], dtype=int32)

In [17]:
India_Clean.head()

Unnamed: 0,City,Latitude,Longitude
0,Mumbai,18.987807,72.836447
1,Delhi,28.651952,77.231495
2,Kolkata,22.562627,88.363044
3,Chennai,13.084622,80.248357
4,Bengalūru,12.977063,77.587106


In [18]:
# add clustering labels
cities_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

India_merged = India_Clean

# merge netherlands_merged with cities_sorted to add latitude/longitude for each neighborhood
India_merged = India_merged.join(cities_sorted.set_index('City'), on='City')

India_merged.head() # check the last columns!


Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Mumbai,18.987807,72.836447,2.0,Multiplex,Snack Place,Vegetarian / Vegan Restaurant,Plaza,Fast Food Restaurant,Train Station,Women's Store,Field,Farmers Market,Fabric Shop
1,Delhi,28.651952,77.231495,0.0,Indian Restaurant,Snack Place,Market,Flea Market,Hotel,Historic Site,Paper / Office Supplies Store,Hardware Store,Dessert Shop,Mosque
2,Kolkata,22.562627,88.363044,2.0,IT Services,Video Store,Hotel,Park,Market,Multiplex,Dumpling Restaurant,Field,Fast Food Restaurant,Farmers Market
3,Chennai,13.084622,80.248357,0.0,Indian Restaurant,Italian Restaurant,Fast Food Restaurant,Yoga Studio,Dairy Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop
4,Bengalūru,12.977063,77.587106,2.0,Vineyard,Plaza,Coffee Shop,Hotel,Metro Station,Women's Store,Electronics Store,Flea Market,Field,Fast Food Restaurant


In [19]:
cities_sorted.head()

Unnamed: 0,Cluster Labels,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,Abohar,Café,Train Station,Fast Food Restaurant,Convenience Store,Photography Studio,ATM,Dairy Store,Electronics Store,Food,Flea Market
1,2,Agartala,Lake,Coffee Shop,Hotel,Historic Site,Science Museum,Multiplex,Salad Place,Dumpling Restaurant,Fast Food Restaurant,Farmers Market
2,0,Ahmadnagar,Indian Restaurant,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Electronics Store,Donut Shop
3,0,Ahmadābād,Snack Place,Historic Site,Fast Food Restaurant,Indian Restaurant,Hotel Bar,Hotel,Flea Market,Field,Ice Cream Shop,Farmers Market
4,2,Aizawl,Hotel,Indian Chinese Restaurant,Shopping Mall,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop


In [20]:
#Deletint the NaN Cluster Labels and making them int for ploting purposes in the future.
cities_sorted.shape
cities_sorted = cities_sorted.dropna(axis='rows')
cities_sorted['Cluster Labels'] = cities_sorted['Cluster Labels'].astype(int)
cities_sorted.head()

Unnamed: 0,Cluster Labels,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,Abohar,Café,Train Station,Fast Food Restaurant,Convenience Store,Photography Studio,ATM,Dairy Store,Electronics Store,Food,Flea Market
1,2,Agartala,Lake,Coffee Shop,Hotel,Historic Site,Science Museum,Multiplex,Salad Place,Dumpling Restaurant,Fast Food Restaurant,Farmers Market
2,0,Ahmadnagar,Indian Restaurant,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Electronics Store,Donut Shop
3,0,Ahmadābād,Snack Place,Historic Site,Fast Food Restaurant,Indian Restaurant,Hotel Bar,Hotel,Flea Market,Field,Ice Cream Shop,Farmers Market
4,2,Aizawl,Hotel,Indian Chinese Restaurant,Shopping Mall,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop


## Clusters on Map

In [21]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(India_merged['Latitude'], India_merged['Longitude'], India_merged['City'], cities_sorted['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Display Diferent Cluster Data Frames 

In [22]:
#Cluster 0 (Brown color)
India_merged.loc[India_merged['Cluster Labels'] == 0, India_merged.columns[[0] + list(range(4, India_merged.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Delhi,Indian Restaurant,Snack Place,Market,Flea Market,Hotel,Historic Site,Paper / Office Supplies Store,Hardware Store,Dessert Shop,Mosque
3,Chennai,Indian Restaurant,Italian Restaurant,Fast Food Restaurant,Yoga Studio,Dairy Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop
6,Ahmadābād,Snack Place,Historic Site,Fast Food Restaurant,Indian Restaurant,Hotel Bar,Hotel,Flea Market,Field,Ice Cream Shop,Farmers Market
8,Pune,Indian Restaurant,Ice Cream Shop,Women's Store,Jewelry Store,Seafood Restaurant,Road,Coffee Shop,Snack Place,Restaurant,Bar
12,Lucknow,Indian Restaurant,ATM,Market,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market
18,Coimbatore,Indian Restaurant,Stadium,Ice Cream Shop,Hotel,Chinese Restaurant,Travel & Transport,Train Station,Park,Women's Store,Field
19,Ludhiāna,Indian Restaurant,Fast Food Restaurant,Plaza,Shopping Mall,Electronics Store,Food,Flea Market,Field,Farmers Market,Fabric Shop
24,Nāsik,Bus Station,Indian Restaurant,Hotel,Motel,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market
26,Farīdābād,Indian Restaurant,Convenience Store,Shopping Mall,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
31,Madurai,Indian Restaurant,Department Store,Clothing Store,IT Services,Hotel Bar,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop


In [23]:
#Cluster 1 (Red color)
India_merged.loc[India_merged['Cluster Labels'] == 1, India_merged.columns[[0] + list(range(4, India_merged.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Sūrat,Clothing Store,Indian Restaurant,Ice Cream Shop,Food Court,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Electronics Store
40,Bhilai,Clothing Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Dumpling Restaurant
134,Imphal,Clothing Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Dumpling Restaurant
141,Hāpur,Clothing Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Dumpling Restaurant
187,Pīlibhīt,Clothing Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Dumpling Restaurant


In [24]:
#Cluster 2 (Blue color)
India_merged.loc[India_merged['Cluster Labels'] == 2, India_merged.columns[[0] + list(range(4, India_merged.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Mumbai,Multiplex,Snack Place,Vegetarian / Vegan Restaurant,Plaza,Fast Food Restaurant,Train Station,Women's Store,Field,Farmers Market,Fabric Shop
2,Kolkata,IT Services,Video Store,Hotel,Park,Market,Multiplex,Dumpling Restaurant,Field,Fast Food Restaurant,Farmers Market
4,Bengalūru,Vineyard,Plaza,Coffee Shop,Hotel,Metro Station,Women's Store,Electronics Store,Flea Market,Field,Fast Food Restaurant
5,Hyderabad,Home Service,Women's Store,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Electronics Store
7,Hāora,Indian Sweet Shop,Business Service,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Dumpling Restaurant
11,Rāmpura,Café,ATM,IT Services,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
14,Patna,Board Shop,Pizza Place,Men's Store,Fabric Shop,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market
15,Indore,Historic Site,Café,Hotel Bar,Donut Shop,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop,Electronics Store
16,Vadodara,Ice Cream Shop,Lake,Market,Clothing Store,Hotel,Dumpling Restaurant,Flea Market,Field,Fast Food Restaurant,Farmers Market
17,Bhopal,Food Court,Market,Historic Site,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop


In [25]:
#Cluster 3 (Violet color)
India_merged.loc[India_merged['Cluster Labels'] == 3, India_merged.columns[[0] + list(range(4, India_merged.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,Solāpur,Business Service,ATM,Women's Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
46,Gwalior,ATM,Women's Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
67,Gopālpur,ATM,Hotel,Women's Store,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
79,Hisar,ATM,Train Station,Health & Beauty Service,Women's Store,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market
81,Tirunelveli,ATM,Women's Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
89,Bhātpāra,Platform,ATM,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
95,Gaya,ATM,Fried Chicken Joint,Women's Store,Electronics Store,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
102,Parbhani,ATM,Women's Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
108,Muzaffarpur,Health & Beauty Service,ATM,Electronics Store,Dumpling Restaurant,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market,Fabric Shop
115,Rohtak,ATM,Park,Women's Store,Electronics Store,Food Court,Food,Flea Market,Field,Fast Food Restaurant,Farmers Market


1. Cluster 0 (Brown) is mainly Indian Restaurant.
2. Cluster 1 (Red) is Clothing Store.
3. Cluster 2 (Blue) is multiplexes.
4. Cluster 3 (Violet) is ATMS.

## Thank You !👍🏻