# Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
import requests
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
from pandas.io.json import json_normalize
import pgeocode
from geopy.geocoders import Nominatim

# Read dataset

In [2]:
temp=pd.read_csv('Locality_village_pincode_final_mar-2017.csv',encoding= 'unicode_escape')

In [3]:
temp.head()

Unnamed: 0,Village/Locality name,Officename ( BO/SO/HO),Pincode,Sub-distname,Districtname,StateName
0,Aliganj,Lodi Road H.O,110003,Defence Colony,SOUTH EAST DELHI,DELHI
1,Kasturba Nagar,Lodi Road H.O,110003,Defence Colony,SOUTH EAST DELHI,DELHI
2,Jeewan Nagar,Jungpura S.O,110014,Defence Colony,SOUTH EAST DELHI,DELHI
3,Tehkhand,Okhla Industrial Estate S.O,110020,Defence Colony,SOUTH EAST DELHI,DELHI
4,Zakir Nagar SO,New Friends Colony S.O,110025,Defence Colony,SOUTH EAST DELHI,DELHI


# Drop unnecessary columns

In [4]:
temp.drop(['Officename ( BO/SO/HO)','Sub-distname','StateName'],axis=1,inplace=True)

# Renaming columns according to project

In [5]:
temp.columns=['Neighborhood', 'Pincode', 'Districtname']
temp.head()

Unnamed: 0,Neighborhood,Pincode,Districtname
0,Aliganj,110003,SOUTH EAST DELHI
1,Kasturba Nagar,110003,SOUTH EAST DELHI
2,Jeewan Nagar,110014,SOUTH EAST DELHI
3,Tehkhand,110020,SOUTH EAST DELHI
4,Zakir Nagar SO,110025,SOUTH EAST DELHI


# Separate data of Amritsar and Delhi into 2 dataframes

In [6]:
Asr_df=pd.DataFrame(columns=temp.columns)
Del_df=pd.DataFrame(columns=temp.columns)

In [7]:
Asr_idx=0
Del_idx=0
for i in temp.index:
    if ('amritsar' in temp.iloc[i]['Districtname'].lower()):
        Asr_df.loc[Asr_idx]=temp.loc[i]
        Asr_idx+=1
    elif  ('delhi' in temp.iloc[i]['Districtname'].lower()):
        Del_df.loc[Del_idx]=temp.loc[i]
        Del_idx+=1

# Combine Neighborhoods for same pincode

In [8]:
Asr_df=Asr_df.groupby(['Pincode','Districtname'])['Neighborhood'].apply(','.join).reset_index()
Asr_df.head()

Unnamed: 0,Pincode,Districtname,Neighborhood
0,143001,AMRITSAR,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -..."
1,143002,AMRITSAR,"Amritsar -I,Amritsar -I"
2,143005,AMRITSAR,Amritsar -I
3,143006,AMRITSAR,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -..."
4,143008,AMRITSAR,"Gumtala Sub Urban,Kherabad,Naushehra,Nangli,Pa..."


In [9]:
Del_df=Del_df.groupby(['Pincode','Districtname'])['Neighborhood'].apply(','.join).reset_index()
Del_df.head()

Unnamed: 0,Pincode,Districtname,Neighborhood
0,110003,SOUTH EAST DELHI,"Aliganj,Kasturba Nagar"
1,110014,SOUTH EAST DELHI,Jeewan Nagar
2,110020,SOUTH EAST DELHI,Tehkhand
3,110025,SOUTH EAST DELHI,Zakir Nagar SO
4,110036,NORTH DELHI,Mungeshpur


# Get latitude and longitude for pincodes

In [10]:
def coord(df):
    nomi=pgeocode.Nominatim('in')
    coordinates=pd.DataFrame(columns=['Pincode','latitude','longitude'])
    for i in df.index:
        pin=str(df.iloc[i][0])
        ll=nomi.query_postal_code(pin)[['latitude','longitude']]
        coordinates.loc[i]=[int(pin)]+[ll[0]]+[ll[1]]
    return coordinates

In [11]:
Asr_coordinates=coord(Asr_df)
Del_coordinates=coord(Del_df)

In [12]:
Asr_coordinates.head()

Unnamed: 0,Pincode,latitude,longitude
0,143001.0,31.5729,75.0058
1,143002.0,31.6029,74.8732
2,143005.0,31.6029,74.8732
3,143006.0,31.6035,74.909
4,143008.0,31.665,74.870817


In [13]:
Del_coordinates.head()

Unnamed: 0,Pincode,latitude,longitude
0,110003.0,28.6431,77.22527
1,110014.0,28.6109,77.1792
2,110020.0,28.5345,77.2779
3,110025.0,28.5672,77.2725
4,110036.0,28.7986,77.16158


In [14]:
Asr_coordinates['Pincode']=Asr_coordinates['Pincode'].astype(int)

In [15]:
Del_coordinates['Pincode']=Del_coordinates['Pincode'].astype(int)

# Merging dataframe with coordinates dataframe for each city

In [16]:
Asr_df=pd.merge(Asr_df,Asr_coordinates,on='Pincode')
Asr_df.head()

Unnamed: 0,Pincode,Districtname,Neighborhood,latitude,longitude
0,143001,AMRITSAR,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",31.5729,75.0058
1,143002,AMRITSAR,"Amritsar -I,Amritsar -I",31.6029,74.8732
2,143005,AMRITSAR,Amritsar -I,31.6029,74.8732
3,143006,AMRITSAR,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",31.6035,74.909
4,143008,AMRITSAR,"Gumtala Sub Urban,Kherabad,Naushehra,Nangli,Pa...",31.665,74.870817


In [17]:
Del_df=pd.merge(Del_df,Del_coordinates,on='Pincode')
Del_df.head()

Unnamed: 0,Pincode,Districtname,Neighborhood,latitude,longitude
0,110003,SOUTH EAST DELHI,"Aliganj,Kasturba Nagar",28.6431,77.22527
1,110014,SOUTH EAST DELHI,Jeewan Nagar,28.6109,77.1792
2,110020,SOUTH EAST DELHI,Tehkhand,28.5345,77.2779
3,110025,SOUTH EAST DELHI,Zakir Nagar SO,28.5672,77.2725
4,110036,NORTH DELHI,Mungeshpur,28.7986,77.16158


# Get coordinates of Amritsar and plot data on map

In [18]:
address = 'Amritsar'
geolocator = Nominatim(user_agent="my")
location = geolocator.geocode(address,timeout=30)
latitude = location.latitude
longitude = location.longitude
print('Coordinates of Amritsar are {}, {}.'.format(latitude, longitude))

Coordinates of Amritsar are 31.6343083, 74.8736788.


In [19]:
Asr_map=folium.Map(location=[latitude,longitude])
for bor,neigh,lat,long in zip(Asr_df['Districtname'],Asr_df['Neighborhood'],Asr_df['latitude'],Asr_df['longitude']):
    label='{},{}'.format(neigh,bor)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,long],
                       radius=5,
                       popup=label,
                       color='blue',
                       fill=True,
                       fill_color='#3572as',
                       fill_opacity=0.8).add_to(Asr_map)
Asr_map

# Get coordinates of Delhi and plot data on map

In [20]:
address = 'Delhi'
geolocator = Nominatim(user_agent="my")
location = geolocator.geocode(address,timeout=30)
latitude = location.latitude
longitude = location.longitude
print('Coordinates of Delhi are {}, {}.'.format(latitude, longitude))

Coordinates of Delhi are 28.6517178, 77.2219388.


In [21]:
Del_map=folium.Map(location=[latitude,longitude])
for bor,neigh,lat,long in zip(Del_df['Districtname'],Del_df['Neighborhood'],Del_df['latitude'],Del_df['longitude']):
    label='{},{}'.format(neigh,bor)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,long],
                       radius=5,
                       popup=label,
                       color='red',
                       fill=True,
                       fill_color='#3222as',
                       fill_opacity=0.8).add_to(Del_map)
Del_map

# Foursquare api credentials

In [22]:
CLIENT_ID='K5KEGKFN5IT23AQKZTNVFJUI3VGIT3PQLAR3WBCB5Y1YBFDQ'
CLIENT_SECRET='UWKQH1X1GT1L0XFCBV3502IFF0FRFE1BVCPSERAVMFRENWKB'
VERSION = '20200328' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: K5KEGKFN5IT23AQKZTNVFJUI3VGIT3PQLAR3WBCB5Y1YBFDQ
CLIENT_SECRET:UWKQH1X1GT1L0XFCBV3502IFF0FRFE1BVCPSERAVMFRENWKB


# Function to get category of the venue

In [23]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# Function to retreive nearby venues

In [24]:
LIMIT=500
def getNearbyVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# get venues in the neighborhood of amritsar within 2000 metres

In [25]:
Asr_venues = getNearbyVenues(names=Asr_df['Neighborhood'],
                                   latitudes=Asr_df['latitude'],
                                   longitudes=Asr_df['longitude']
                                  )

Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I
Amritsar -I,Amritsar -I
Amritsar -I
Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Thanda,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I
Gumtala Sub Urban,Kherabad,Naushehra,Nangli,Pandori Waraich,Naushehra,Malawali,Gumtala Sub Urban,Pandori Waraich,Amritsar -I,Amritsar -I,District Jail,Nangli,Amritsar- II
Angarh,Fatahpur,Iban Kalan,Thande,Budh Singh Wala,Iban Khurd,Suwahe Wali Havelian,Bohru,Mule Chak,Bohru,Sangna,Amritsar -I,Bharariwal,Kirtangarh Thande,Sangna,Bohad,Dhapai,Ibban Kalan,Ibban Kalan,Mandiala,Narianpur,Thathgarh
Amritsar- II,Chabba,Chatiwind,Warpal,Gohalwar Warpal,Chabba,Daburji,Gilwali,Mehma,Wanchari,Mehma,Warpal,Chatiwind,Bala Chak,Gilwali,Warpal
Adliwala,Bagga,Heir,Jagdev Kalan,Jhanjoti,Kotli Muglan,Adliwala,Dhariwal,Dhaul Kalan,Malu Nangal,Bal,Heir,Jagdev Kalan,Jhanjoti,Malu Nangal,Miran Kot Khurd,Bagga,Bua Nang

Dalla Rajputan
Dhariwal
Amritsar- II,Mudhal,Jahangir,Khan Kot,Nizampura,Makhanwindi,Mudhal,Nizampura,Othian,Vallah,Vallah,Fatehgarh Shukarchak,Fatehgarh Shukarchak,Khankot Garden Colony,Mehoka,Sohian Khurd,Sohian Khurd,Makhan Windi,Qila Jiwan Singh,Jahangir
Abdal,Jhande,Maan,Pakhar Pura,Ajaibwali,Bhoya Fatehgarh,Kotla Saidan,Pakhar Pura,Kotla Ahangaran,Ludhar,Mahanian Koharan,Talwandi Dasaundha Singh,Waryam Nangal,Abdal,Jaintipura,Jethuwal,Mago Soe,Rangilpura,Talwandi Ghuman,Alkare,Chogawan Roopowali,Dudhala,Gopalpura,Jaintipura,Kaler Mangat,Mehnian Brahmana,Waryam Nangal,Ajaibwali,Chachowali,Dadian,Kathu Nangal,Mago Soe,Chogawan Roopowali,Jagatpur Bajaj,Jethuwal,Ludhar,Talwandi Dasaundha Singh
Hadaitpur,Kotli Aulakh,Lehrka,Darike,Kadrabad,Kadrabad,Kotli Mallian,Kotli Mallian,Mukandpura,Ram Diwali Hinduan,Bhullar Hans,Dhadde,Kotli Dhole Shah,Sarhala,Kadrabad Khurd,Mukandpura,Ram Dewali Urf Brahmanan,Shahzada,Chawinda Devi,Kairon Nangal,Kaler Bala Pa,Khizarpura,Kotli Dhole Shah,Kuralian

# How many venues are returned by the api

In [26]:
Asr_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adliwala,Bagga,Heir,Jagdev Kalan,Jhanjoti,Kotli Muglan,Adliwala,Dhariwal,Dhaul Kalan,Malu Nangal,Bal,Heir,Jagdev Kalan,Jhanjoti,Malu Nangal,Miran Kot Khurd,Bagga,Bua Nangali,Dhaul Khurd,Gaunsabad,Harse Chhina,Loharka,Bhitte Wadh,Bhitte Wadh,Chainpur,Kotli Sakka,Lalla Afganan,Loharka Khurd,Raja Sansi,Kanboh,Loharka Kalan,Miran Kot Kalan,Miran Kot Kalan,Salimpura,Sehchandar,Balaggan,Harse Chhina,Nangal Tola,Teli Chak",1,1,1,1,1,1
"Akalgarh Dhupaian,Bhattike,Bolara,Jabbowal,Lola,Bhattike,Chatiwind Lehal,Dehriwala,Kot Khera,Lola,Dehriwala,Gosal Afgana,Jabbowal,Tanel,Berianwala,Dhing Nangal,Malowal,Sangrai,Sarai,Shahpur Khurd,Akalgarh Dhupaian,Chogawan Sadhpur,Jiwan Pandher,Joohanagri,Mian Pandher,Saido Lehal,Tanel,Kot Hayat,Kotla Bathungarh,Raipur Khurd,Bhullar Hans,Boparai,Malowal,Talwandi,Tarsikka,Kot Khera,Mallowal,Sadhpura,Shahpur,Taharpura",1,1,1,1,1,1
Amritsar -I,6,6,6,6,6,6
"Amritsar -I,Amritsar -I",6,6,6,6,6,6
"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I",6,6,6,6,6,6
"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I,Thanda,Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -I",8,8,8,8,8,8
"Amritsar -I,Dhaul Khurd,Kotli Mian Khan,Kotli Nasir Khan,Wadala Bhittewad,Kathania,Kotla Dal Singh,Mahal Sub Urban,Wadala Bhitewadh,Amritsar -I,Chota Pind,Dhand,Gumanpura,Heir,Dhaul Kalan,Kale Ghanupur,Mahal Sub Urban,Basarke Gillan,Basarke Gillan,Gumanpura,Kale Ghanupur,Kathanian,Ramuwal,Tajuchak,Wadala Bhitewadh,Wadali Guru,Mahal,Rudala,Dhatal,Kasail,Kathania,Mulla Behram,Wadali Guru,Khapar Kheri,Khapar Kheri,Khaper Kheri",6,6,6,6,6,6
"Amritsar- II,Chabba,Chatiwind,Warpal,Gohalwar Warpal,Chabba,Daburji,Gilwali,Mehma,Wanchari,Mehma,Warpal,Chatiwind,Bala Chak,Gilwali,Warpal",4,4,4,4,4,4
"Amritsar- II,Mudhal,Jahangir,Khan Kot,Nizampura,Makhanwindi,Mudhal,Nizampura,Othian,Vallah,Vallah,Fatehgarh Shukarchak,Fatehgarh Shukarchak,Khankot Garden Colony,Mehoka,Sohian Khurd,Sohian Khurd,Makhan Windi,Qila Jiwan Singh,Jahangir",1,1,1,1,1,1
"Athwal,Budha Theh,Dadupura,Jajjiani,Jathu Nangal,Rampura,Thariwal,Bhaini Gillan,Bhangali,Borewal Afgana,Harian,Jagdev Bar,Nangal Pannuwan,Wadala Viram,Bal Kalan,Bal Khurd,Bhoma,Budha Theh,Gosal Afgana,Kotla Pitu,Kotla Sultan Singh,Marari Kalan,Shafipur,Sham Nagar,Amritsar -I,Burj Nauabad,Dialpur,Jagdev Bar,Kotla Majhewala,Marari Khurd,Nag,Tarpai,Umarpura,Begowal,Birbalpura,Gosal Zimidaran,Hamja,Marari Kalan,Bhaini Lidhran,Chande,Kotla Sultan Singh,Nag,Rakhnag,Sham Nagar,Sohian Kalan,Sohian Kalan,Taragarh,Bal Khurd,Begewal,Bhangali,Borewal Kang,Saprai Wind,Tarpai,Wadala Viram,Bal Kalan,Dhing Nangal,Jalalpura,Majitha,Muradpura,Ramana Chak",2,2,2,2,2,2


In [27]:
print('There are {} uniques categories.'.format(len(Asr_venues['Venue Category'].unique())))

There are 32 uniques categories.


# Analyzing each area

In [28]:
Asr_onehot = pd.get_dummies(Asr_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Asr_onehot['Neighborhood']=Asr_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [Asr_onehot.columns[-1]] + list(Asr_onehot.columns[:-1])
Asr_onehot = Asr_onehot[fixed_columns]

Asr_onehot.shape

(67, 33)

In [29]:
Asr_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Asian Restaurant,Bakery,Breakfast Spot,Business Service,Café,Chinese Restaurant,Clothing Store,...,Punjabi Restaurant,Resort,Restaurant,Sandwich Place,Shopping Mall,Snack Place,Supermarket,Theme Park,Toll Booth,Vegetarian / Vegan Restaurant
0,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
Asr_grouped = Asr_onehot.groupby('Neighborhood').mean().reset_index()
Asr_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Asian Restaurant,Bakery,Breakfast Spot,Business Service,Café,Chinese Restaurant,Clothing Store,...,Punjabi Restaurant,Resort,Restaurant,Sandwich Place,Shopping Mall,Snack Place,Supermarket,Theme Park,Toll Booth,Vegetarian / Vegan Restaurant
0,"Adliwala,Bagga,Heir,Jagdev Kalan,Jhanjoti,Kotl...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Akalgarh Dhupaian,Bhattike,Bolara,Jabbowal,Lol...",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Amritsar -I,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
3,"Amritsar -I,Amritsar -I",0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
4,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",0.0,0.0,0.0,0.166667,0.0,0.0,0.333333,0.0,0.0,...,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.166667,0.0
5,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.125,0.0,...,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0
6,"Amritsar -I,Dhaul Khurd,Kotli Mian Khan,Kotli ...",0.166667,0.0,0.166667,0.0,0.0,0.0,0.166667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Amritsar- II,Chabba,Chatiwind,Warpal,Gohalwar ...",0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0
8,"Amritsar- II,Mudhal,Jahangir,Khan Kot,Nizampur...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Athwal,Budha Theh,Dadupura,Jajjiani,Jathu Nang...",0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Function to get most common venue

In [31]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# Get top 5 venues for neighborhoods of Amritsar and storing it in a dataframe

In [32]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Asr_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
Asr_neighborhoods_venues_sorted['Neighborhood'] = Asr_grouped['Neighborhood']
for ind in np.arange(Asr_grouped.shape[0]):
    Asr_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Asr_grouped.iloc[ind, :], num_top_venues)

Asr_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Adliwala,Bagga,Heir,Jagdev Kalan,Jhanjoti,Kotl...",Farm,Vegetarian / Vegan Restaurant,Toll Booth,Accessories Store,Asian Restaurant
1,"Akalgarh Dhupaian,Bhattike,Bolara,Jabbowal,Lol...",ATM,Toll Booth,Accessories Store,Asian Restaurant,Bakery
2,Amritsar -I,Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
3,"Amritsar -I,Amritsar -I",Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
4,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",Café,Bakery,Punjabi Restaurant,Sandwich Place,Toll Booth


In [33]:
Asr_neighborhoods_venues_sorted.count()

Neighborhood             21
1st Most Common Venue    21
2nd Most Common Venue    21
3rd Most Common Venue    21
4th Most Common Venue    21
5th Most Common Venue    21
dtype: int64

# Making clusters using k-means

In [34]:
kclusters = 5

Asr_grouped_clustering = Asr_grouped.drop('Neighborhood',1)

# run k-means clustering
kmeans = KMeans(init='k-means++',n_clusters=kclusters,n_init=25).fit(Asr_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 0, 0, 0, 0, 0, 0, 4, 1], dtype=int32)

In [35]:
Asr_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Asr_merged = Asr_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Asr_merged = Asr_merged.merge(Asr_neighborhoods_venues_sorted, on='Neighborhood')

Asr_merged.head()

Unnamed: 0,Pincode,Districtname,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,143001,AMRITSAR,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",31.5729,75.0058,0,Café,Bakery,Punjabi Restaurant,Sandwich Place,Toll Booth
1,143002,AMRITSAR,"Amritsar -I,Amritsar -I",31.6029,74.8732,0,Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
2,143005,AMRITSAR,Amritsar -I,31.6029,74.8732,0,Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
3,143407,AMRITSAR,Amritsar -I,31.5515,75.539283,0,Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
4,143006,AMRITSAR,"Amritsar -I,Amritsar -I,Amritsar -I,Amritsar -...",31.6035,74.909,0,Café,Department Store,Accessories Store,Diner,Shopping Mall


# Visualising clusters formed by k-means

In [36]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Asr_merged['latitude'], Asr_merged['longitude'], Asr_merged['Neighborhood'], Asr_merged['Cluster Labels']):
    cluster=int(cluster)
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Cluster 0
This cluster is popular for Cafe's,vegetarian restraunts,Fast food restraunts, Punjabi restraunts, Hotel's, ATM's, Supermarkets, Theme park, Departmental store, Clothing stores. This cluster will be suitable for foodies, shoppers and due to presence of theme parks this cluster will be good for people who have children.  

In [37]:
Asr_merged.loc[Asr_merged['Cluster Labels'] == 0, Asr_merged.columns[[1] + list(range(5, Asr_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,AMRITSAR,0,Café,Bakery,Punjabi Restaurant,Sandwich Place,Toll Booth
1,AMRITSAR,0,Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
2,AMRITSAR,0,Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
3,AMRITSAR,0,Vegetarian / Vegan Restaurant,Clothing Store,Jewelry Store,Market,Business Service
4,AMRITSAR,0,Café,Department Store,Accessories Store,Diner,Shopping Mall
5,AMRITSAR,0,Fast Food Restaurant,Hotel,Snack Place,Shopping Mall,Sandwich Place
6,AMRITSAR,0,ATM,Breakfast Spot,Sandwich Place,Toll Booth,Bakery
9,AMRITSAR,0,Indian Restaurant,Preschool,Asian Restaurant,Café,Hotel
10,AMRITSAR,0,ATM,Theme Park,Sandwich Place,Restaurant,Café
15,AMRITSAR,0,Punjabi Restaurant,Dhaba,Restaurant,Vegetarian / Vegan Restaurant,Farm


# Cluster 1
This Cluster is popular with ATM's, Toll booths, Accessories stores, Asian Restraunts and bakeries. This cluster shows that this cluster is popular with tourists and people who like Asian food and bakery products,

In [38]:
Asr_merged.loc[Asr_merged['Cluster Labels'] == 1, Asr_merged.columns[[1] + list(range(5, Asr_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
8,AMRITSAR,1,ATM,Toll Booth,Accessories Store,Asian Restaurant,Bakery
11,AMRITSAR,1,ATM,Dhaba,Toll Booth,Accessories Store,Asian Restaurant
12,AMRITSAR,1,ATM,Toll Booth,Accessories Store,Asian Restaurant,Bakery
13,AMRITSAR,1,ATM,Toll Booth,Accessories Store,Asian Restaurant,Bakery
14,AMRITSAR,1,ATM,Toll Booth,Accessories Store,Asian Restaurant,Bakery
16,AMRITSAR,1,ATM,Toll Booth,Accessories Store,Asian Restaurant,Bakery
17,AMRITSAR,1,ATM,Toll Booth,Accessories Store,Asian Restaurant,Bakery
18,AMRITSAR,1,ATM,Supermarket,Hotel,Accessories Store,Asian Restaurant
21,AMRITSAR,1,ATM,Dhaba,Toll Booth,Accessories Store,Asian Restaurant


# Cluster 2
There are very less neighborhoods that belong to this cluster. Farm, Vegetarian/Vegan Restaurant, Toll booth, Accessories store and Asian Restraunt are popular venues in this cluster. Presence of farm indicate this cluster lie on outskirts of the city. 

In [39]:
Asr_merged.loc[Asr_merged['Cluster Labels'] == 2, Asr_merged.columns[[1] + list(range(5, Asr_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
7,AMRITSAR,2,Farm,Vegetarian / Vegan Restaurant,Toll Booth,Accessories Store,Asian Restaurant


# Cluster 3
There are very less neighborhoods that belong to this cluster. Clothing stores, Vegetarian/Vegan Restaurant, Toll booth, Accessories store, Asian Restraunt are popular venues in this cluster.

In [40]:
Asr_merged.loc[Asr_merged['Cluster Labels'] == 3, Asr_merged.columns[[1] + list(range(5, Asr_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
19,AMRITSAR,3,Clothing Store,Vegetarian / Vegan Restaurant,Toll Booth,Accessories Store,Asian Restaurant


# Cluster 4
There are very less neighborhoods that belong to this cluster. Indian Restaurant, Toll booth, Accessories store, Asian Restraunt and bakery are popular venues in this cluster.

In [41]:
Asr_merged.loc[Asr_merged['Cluster Labels'] == 4, Asr_merged.columns[[1] + list(range(5, Asr_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
20,AMRITSAR,4,Indian Restaurant,Toll Booth,Accessories Store,Asian Restaurant,Bakery


# get venues in the neighborhood of Delhi within 2000 metres

In [43]:
Del_venues = getNearbyVenues(names=Del_df['Neighborhood'],
                                   latitudes=Del_df['latitude'],
                                   longitudes=Del_df['longitude']
                                  )

Aliganj,Kasturba Nagar
Jeewan Nagar
Tehkhand
Zakir Nagar SO
Mungeshpur
Daryapur Village,Sanot,Mungeshpur Village,Daryapur
Tikri Khurd village,tikri Khurd
Tikri Khurd village,tikri Khurd
Sanoth Village
Sanoth Village
Sahabad Dairy
Sahabad Dairy
Sahabad Daulat pur village
Sahabad Daulat pur village
Molar Band,Tughlakabad,Jaitpur,Lal Kuan,Pul Prahladpur
Jagdamba Bihar,Mohan nagar,sagarpur,Madanpuri,Sant Maholla,shivpuri,Nangal raya,Gandhi Market,Bharampuri,Veer nagar
Shahpur Jat
Majnu Ka tilla
Majnu Ka tilla
Majnu Ka Tilla area
Majnu Ka Tilla area
New Multan Nagar Village
Deoli
Goyla Khurd
Ali BO,Madanpur Khadar BO
Naharpur,Naharpur village
Prem Nagar, Agar Nagar,Prem Nagar
gokulpuri,gokulpuri


In [44]:
Del_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Aliganj,Kasturba Nagar",28.6431,77.22527,Naturals Ice Cream,28.634455,77.222139,Ice Cream Shop
1,"Aliganj,Kasturba Nagar",28.6431,77.22527,bloomrooms @ New Delhi Railway Station,28.645537,77.217701,Hotel
2,"Aliganj,Kasturba Nagar",28.6431,77.22527,Sagar Ratna,28.635487,77.220650,Indian Restaurant
3,"Aliganj,Kasturba Nagar",28.6431,77.22527,Karim's | करीम | کریم (Karim's),28.649498,77.233691,Indian Restaurant
4,"Aliganj,Kasturba Nagar",28.6431,77.22527,Connaught Place | कनॉट प्लेस (Connaught Place),28.632731,77.220018,Plaza
...,...,...,...,...,...,...,...
400,"gokulpuri,gokulpuri",28.6114,77.29820,"Mother Dairy Crossing, Bus Stand",28.618193,77.284735,Bus Station
401,"gokulpuri,gokulpuri",28.6114,77.29820,chawla's chicken corner,28.618381,77.283051,Restaurant
402,"gokulpuri,gokulpuri",28.6114,77.29820,Reliance Fresh,28.618315,77.283001,Convenience Store
403,"gokulpuri,gokulpuri",28.6114,77.29820,Kashmiri Bakery,28.600891,77.311802,Bakery


# Analyze each area

In [45]:
Del_onehot = pd.get_dummies(Del_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Del_onehot['Neighborhood'] =Del_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Del_onehot.columns[-1]] + list(Del_onehot.columns[:-1])
Del_onehot = Del_onehot[fixed_columns]

Del_onehot.head()

Unnamed: 0,Women's Store,ATM,Airport,Airport Food Court,Airport Lounge,Airport Service,Arcade,Art Gallery,Asian Restaurant,Auto Workshop,...,Sporting Goods Shop,Sports Bar,Tea Room,Thai Restaurant,Theme Park,Tibetan Restaurant,Trail,Train Station,Travel Lounge,Vietnamese Restaurant
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
Del_grouped = Del_onehot.groupby('Neighborhood').mean().reset_index()
Del_grouped

Unnamed: 0,Neighborhood,Women's Store,ATM,Airport,Airport Food Court,Airport Lounge,Airport Service,Arcade,Art Gallery,Asian Restaurant,...,Sporting Goods Shop,Sports Bar,Tea Room,Thai Restaurant,Theme Park,Tibetan Restaurant,Trail,Train Station,Travel Lounge,Vietnamese Restaurant
0,"Ali BO,Madanpur Khadar BO",0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0
1,"Aliganj,Kasturba Nagar",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,...,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0
2,"Daryapur Village,Sanot,Mungeshpur Village,Dary...",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Deoli,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0
4,Goyla Khurd,0.0,0.0,0.027778,0.027778,0.138889,0.055556,0.0,0.0,0.0,...,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0
5,"Jagdamba Bihar,Mohan nagar,sagarpur,Madanpuri,...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Jeewan Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,...,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.022727
7,Majnu Ka Tilla area,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,...,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0
8,Majnu Ka tilla,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,...,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0
9,"Molar Band,Tughlakabad,Jaitpur,Lal Kuan,Pul Pr...",0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0


# Get top 5 most common venues for each neighborhood in delhi and store it in a data frame

In [47]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Del_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
Del_neighborhoods_venues_sorted['Neighborhood'] = Del_grouped['Neighborhood']

for ind in np.arange(Asr_grouped.shape[0]):
    Del_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Del_grouped.iloc[ind, :], num_top_venues)

Del_neighborhoods_venues_sorted
#Del_neighborhoods_venues_sorted = Del_neighborhoods_venues_sorted[Del_neighborhoods_venues_sorted['1st Most Common Venue'].notna()]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Ali BO,Madanpur Khadar BO",Indian Restaurant,Train Station,ATM,Indian Sweet Shop,Duty-free Shop
1,"Aliganj,Kasturba Nagar",Indian Restaurant,Hotel,Café,Bar,Snack Place
2,"Daryapur Village,Sanot,Mungeshpur Village,Dary...",ATM,Vietnamese Restaurant,Food & Drink Shop,Deli / Bodega,Department Store
3,Deoli,Flea Market,Plaza,Mattress Store,Pool,Indian Restaurant
4,Goyla Khurd,Coffee Shop,Airport Lounge,Hotel,Airport Service,Café
5,"Jagdamba Bihar,Mohan nagar,sagarpur,Madanpuri,...",Indian Restaurant,Shoe Store,Sporting Goods Shop,Vietnamese Restaurant,Cricket Ground
6,Jeewan Nagar,Indian Restaurant,Café,Hotel,Chinese Restaurant,Hotel Bar
7,Majnu Ka Tilla area,Restaurant,Hotel,Asian Restaurant,Korean Restaurant,Metro Station
8,Majnu Ka tilla,Restaurant,Hotel,Asian Restaurant,Korean Restaurant,Metro Station
9,"Molar Band,Tughlakabad,Jaitpur,Lal Kuan,Pul Pr...",Train Station,Construction & Landscaping,ATM,Food Service,Liquor Store


# Make cluster using kmeans

In [48]:
kclusters = 5


Del_grouped_clustering=Del_grouped.drop('Neighborhood',1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Del_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 1, 3, 1, 1, 1, 1, 1, 1, 0, 4, 1, 1, 1, 1, 1, 2, 0, 1, 2, 1, 1],
      dtype=int32)

# Add kmeans labels to the sorted data frame and merge it with the Del_df

In [49]:
Del_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Del_merged = Del_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Del_merged = Del_merged.merge(Del_neighborhoods_venues_sorted, on='Neighborhood')

Del_merged.head()

Unnamed: 0,Pincode,Districtname,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,110003,SOUTH EAST DELHI,"Aliganj,Kasturba Nagar",28.6431,77.22527,1,Indian Restaurant,Hotel,Café,Bar,Snack Place
1,110014,SOUTH EAST DELHI,Jeewan Nagar,28.6109,77.1792,1,Indian Restaurant,Café,Hotel,Chinese Restaurant,Hotel Bar
2,110020,SOUTH EAST DELHI,Tehkhand,28.5345,77.2779,1,Café,Pizza Place,Hotel,Soccer Stadium,Restaurant
3,110025,SOUTH EAST DELHI,Zakir Nagar SO,28.5672,77.2725,1,Hotel,Café,Indian Restaurant,Pizza Place,Fast Food Restaurant
4,110036,NORTH DELHI,Mungeshpur,28.7986,77.16158,4,Farm,Resort,Event Service,Vietnamese Restaurant,Construction & Landscaping


# Remove rows from Del_merged for which venue is NaN

In [50]:
Del_merged = Del_merged[Del_merged['1st Most Common Venue'].notna()]

# Visualising cluster formed by kmeans

In [51]:
# create map
Del_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Del_merged['latitude'], Del_merged['longitude'], Del_merged['Neighborhood'], Del_merged['Cluster Labels']):
    cluster=int(cluster)
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(Del_map_clusters)
       
Del_map_clusters

# Cluster 0
This cluster doesnot comprises of many neighborhoods. This cluster is popular with Train Station, Construction sites, ATM, Indian Restraunts, Shoe Store, Pizza Palace, Indian sweet shop. This cluster would be interesting to those who depend more on the public commute since these neighborhoods are closer to train stations.

In [52]:
Del_merged.loc[Del_merged['Cluster Labels'] == 0, Del_merged.columns[[1] + list(range(5, Del_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
14,SOUTH EAST DELHI,0,Train Station,Construction & Landscaping,ATM,Food Service,Liquor Store
16,SOUTH EAST DELHI,0,ATM,Shoe Store,Pizza Place,Breakfast Spot,Soccer Field
24,SOUTH EAST DELHI,0,Indian Restaurant,Train Station,ATM,Indian Sweet Shop,Duty-free Shop


# Cluster 1
The neighborhood of this cluster is popular with a mix of Cafe's,Indian, Chinese, Vietnamese, Korean, Asian restraunts, Mobile phone shops, Fast food restraunts and many more. This cluster would be interesting to those who depend more on the public commute since these neighborhoods are closer to metro stations. This cluster is also good for foodies as it has a wide variety of restraunts with different cusines like Indian, Vietnamese, Korean, Chinese, Italian, Asian and is also popular for cafe's, coffee shops, food court. These neighborhoods also provide for some recreational places like Multiplex, Lake, Pool, Plaza, Spa. This cluster is also good for people who are interested in sports due to presence of soccer stadium, cricket ground, Sporting goods shop. This cluster is also good for shoppers due to presence of shopping malls, departmental store.

In [53]:
Del_merged.loc[Del_merged['Cluster Labels'] == 1, Del_merged.columns[[1] + list(range(5, Del_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,SOUTH EAST DELHI,1,Indian Restaurant,Hotel,Café,Bar,Snack Place
1,SOUTH EAST DELHI,1,Indian Restaurant,Café,Hotel,Chinese Restaurant,Hotel Bar
2,SOUTH EAST DELHI,1,Café,Pizza Place,Hotel,Soccer Stadium,Restaurant
3,SOUTH EAST DELHI,1,Hotel,Café,Indian Restaurant,Pizza Place,Fast Food Restaurant
10,NORTH DELHI,1,Mobile Phone Shop,Metro Station,Food Court,Bank,Plaza
11,NORTH DELHI,1,Mobile Phone Shop,Metro Station,Food Court,Bank,Plaza
12,NORTH WEST DELHI,1,Mobile Phone Shop,Metro Station,Food Court,Bank,Plaza
13,NORTH WEST DELHI,1,Mobile Phone Shop,Metro Station,Food Court,Bank,Plaza
15,WEST DELHI,1,Indian Restaurant,Shoe Store,Sporting Goods Shop,Vietnamese Restaurant,Cricket Ground
17,CENTRAL DELHI,1,Restaurant,Hotel,Asian Restaurant,Korean Restaurant,Metro Station


# Cluster 2
This cluster is popuar with Women's store, ATM's, Electronics, Food and drink shops and Deli / Bodega (restraunt).
This area is also good for foodies who prefer american food.

In [54]:
Del_merged.loc[Del_merged['Cluster Labels'] == 2, Del_merged.columns[[1] + list(range(5, Del_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
6,NORTH DELHI,2,Women's Store,ATM,Electronics Store,Food & Drink Shop,Deli / Bodega
7,NORTH DELHI,2,Women's Store,ATM,Electronics Store,Food & Drink Shop,Deli / Bodega
8,NORTH WEST DELHI,2,Women's Store,ATM,Electronics Store,Food & Drink Shop,Deli / Bodega
9,NORTH WEST DELHI,2,Women's Store,ATM,Electronics Store,Food & Drink Shop,Deli / Bodega


# Cluster 3 
There are very less neighborhoods that belong to this cluster. This cluster is popular with ATM's, Vietnamese Restraunts, Food and drink shop, Deli / Bodega and departmental store.

In [55]:
Del_merged.loc[Del_merged['Cluster Labels'] == 3, Del_merged.columns[[1] + list(range(5, Del_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
5,NORTH DELHI,3,ATM,Vietnamese Restaurant,Food & Drink Shop,Deli / Bodega,Department Store


# Cluster 4
There are not many neighborhoods that belong to this cluster. The popular areas in this cluster are Farm, Resort, Event Service, Vietnamese Restraunts and construction and landscaping. All popular venues nearby show that these areas lie on the outskirts of Delhi.

In [56]:
Del_merged.loc[Del_merged['Cluster Labels'] == 4, Del_merged.columns[[1] + list(range(5, Del_merged.shape[1]))]]

Unnamed: 0,Districtname,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,NORTH DELHI,4,Farm,Resort,Event Service,Vietnamese Restaurant,Construction & Landscaping
