### Importing all the required libraries

In [45]:
import pandas as pd
import numpy as np
import json
import requests
import geocoder
from sklearn.cluster import KMeans
import folium
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim 

### Scraping website to get neighborhoods of pune city

In [46]:
url="https://en.wikipedia.org/wiki/Template:Neighbourhoods_of_Pune"
data=requests.get(url).text
neighbor_data=BeautifulSoup(data,'lxml')
print(neighbor_data.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Template:Neighbourhoods of Pune - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"3ed801e2-97dd-43a9-a336-4d36829b72a1","wgCSPNonce":!1,"wgCanonicalNamespace":"Template","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":10,"wgPageName":"Template:Neighbourhoods_of_Pune","wgTitle":"Neighbourhoods of Pune","wgCurRevisionId":941587668,"wgRevisionId":941587668,"wgArticleId":26322286,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Neighbourhoods in Pune"],"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevan

#### Adding some more neighborhoods of Pune that were left in the data

In [47]:
column=['Neighborhood']
Neighbour=pd.DataFrame(columns=column)
table=neighbor_data.find('td',class_="navbox-list navbox-odd hlist")
for a in table.find_all('a'):
    if a.text:
       Neighbour=Neighbour.append({'Neighborhood':a.text},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Dhankawadi'},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Nigdi'},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Lavale Village'},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Mulshi'},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Akurdi'},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Fergusson College Road'},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Model Colony'},ignore_index=True)
Neighbour=Neighbour.append({'Neighborhood':'Shivaji Nagar'},ignore_index=True)    
Neighbour=Neighbour.append({'Neighborhood':'Deccan'},ignore_index=True) 
Neighbour=Neighbour.sort_values('Neighborhood')
Neighbour.reset_index(drop=True)



Unnamed: 0,Neighborhood
0,Akurdi
1,Aundh
2,Balewadi
3,Baner
4,Bavdhan
5,Bhosari
6,Camp
7,Chakan
8,Charholi Budruk
9,Chinchwad


### Getting latitude and longitude of each neighbour via geocoder

In [48]:
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Pune, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords
# Call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in Neighbour["Neighborhood"].tolist()]


In [49]:
# Create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
# Merge the coordinates into the original dataframe
Neighbour['Latitude'] = df_coords['Latitude']
Neighbour['Longitude'] = df_coords['Longitude']
print(Neighbour.shape)
Neighbour

(50, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
45,Akurdi,18.56522,73.91391
0,Aundh,18.76408,73.69573
1,Balewadi,18.56345,73.81227
2,Baner,18.57602,73.77983
3,Bavdhan,18.5482,73.77316
4,Bhosari,18.50747,73.78236
5,Camp,18.63873,73.83748
6,Chakan,18.52459,73.8788
7,Charholi Budruk,18.73415,73.85856
8,Chinchwad,18.64072,73.90397



### Using FOURSQUARE to get venues at each neighborhood

In [50]:
CLIENT_ID = 'ROWO3FEENYIKSG5PLO04KNTQPOAWGOCSWZ1XSJADC1MOIMHY' # Put Your Client Id
CLIENT_SECRET = 'BELNUT4PTLW5D0B50RTTQG00F2XAMWMFQIHGYOIC03TJNTEV' # Put You Client Secret 
VERSION = '20210707'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: Hidden')
print('CLIENT_SECRET: Hidden')

Your credentails:
CLIENT_ID: Hidden
CLIENT_SECRET: Hidden


In [51]:
LIMIT=100
def getNearbyVenues( neighborhoods, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for neighborhood, lat, lng in zip( neighborhoods, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        print(results)
        
        # return only relevant information for each nearby venue
        venues_list.append([(
        
            neighborhood, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
                  
                  'Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [52]:
neighborhood_venues = getNearbyVenues(
                                             neighborhoods=Neighbour['Neighborhood'],
                                             latitudes=Neighbour['Latitude'],
                                             longitudes=Neighbour['Longitude'])

[{'reasons': {'count': 0, 'items': [{'summary': 'This spot is popular', 'type': 'general', 'reasonName': 'globalInteractionReason'}]}, 'venue': {'id': '5104f72fe4b0b154d1eeb637', 'name': 'Chopsticks Spice Malabar', 'location': {'address': 'Vimannagar', 'crossStreet': 'Datta mandir chowk', 'lat': 18.565244525751083, 'lng': 73.91311556130246, 'labeledLatLngs': [{'label': 'display', 'lat': 18.565244525751083, 'lng': 73.91311556130246}], 'distance': 83, 'cc': 'IN', 'city': 'Pune', 'state': 'Mahārāshtra', 'country': 'India', 'formattedAddress': ['Vimannagar (Datta mandir chowk)', 'Pune', 'Mahārāshtra', 'India']}, 'categories': [{'id': '4bf58dd8d48988d14f941735', 'name': 'Southern / Soul Food Restaurant', 'pluralName': 'Southern / Soul Food Restaurants', 'shortName': 'Southern / Soul', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/southern_', 'suffix': '.png'}, 'primary': True}], 'photos': {'count': 0, 'groups': []}}, 'referralId': 'e-0-5104f72fe4b0b154d1eeb637-0'}, {'reaso

In [53]:
pd.set_option('display.max_columns', None) #to display all the columns of table
pd.set_option('display.max_rows', None) #to display all the rows of table

neighborhood_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Akurdi,18.56522,73.91391,Chopsticks Spice Malabar,18.565245,73.913116,Southern / Soul Food Restaurant
1,Akurdi,18.56522,73.91391,Irani Cafe,18.563905,73.915019,Café
2,Akurdi,18.56522,73.91391,Perks 'N' Brews,18.567873,73.914766,Coffee Shop
3,Akurdi,18.56522,73.91391,Khalsa Dairy,18.565183,73.914468,Cheese Shop
4,Akurdi,18.56522,73.91391,Picantos Mexican Grill,18.565423,73.910978,Mexican Restaurant
5,Akurdi,18.56522,73.91391,Chopsticks,18.565228,73.913179,Indian Restaurant
6,Akurdi,18.56522,73.91391,Himalaya Momos,18.566085,73.91082,Dumpling Restaurant
7,Akurdi,18.56522,73.91391,PVR Cinemas,18.562506,73.916613,Multiplex
8,Akurdi,18.56522,73.91391,"Incognito - Restaurant, Bar And Cafe",18.562864,73.91641,American Restaurant
9,Akurdi,18.56522,73.91391,Falahaar,18.56539,73.918158,Juice Bar


In [54]:
# displaying the first venue of each neighborhood
grouped=neighborhood_venues.groupby('Neighborhood')
grouped.first()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Akurdi,18.56522,73.91391,Chopsticks Spice Malabar,18.565245,73.913116,Southern / Soul Food Restaurant
Balewadi,18.56345,73.81227,Picantos Mexican Grill,18.560654,73.812447,Mexican Restaurant
Baner,18.57602,73.77983,The Chocolate Room,18.574755,73.77733,Coffee Shop
Bavdhan,18.5482,73.77316,Rastafa,18.549217,73.772406,Italian Restaurant
Bhosari,18.50747,73.78236,Cafe Coffee Day,18.508258,73.782817,Coffee Shop
Camp,18.63873,73.83748,Sanket Restaurant and Bar,18.641249,73.839928,Restaurant
Chakan,18.52459,73.8788,Poona Club,18.522974,73.87914,Indian Restaurant
Dapodi,18.64741,73.80002,Tata Motors Cars Service Centre - Panchjanya A...,18.648558,73.799283,Auto Dealership
Deccan,18.56906,73.88159,Domino's Pizza,18.570222,73.878692,Pizza Place
Dehu,18.51957,73.83574,Panchvati Gaurav,18.517879,73.838623,Indian Restaurant


In [55]:
venues=neighborhood_venues.groupby('Neighborhood').count()
venues

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Akurdi,45,45,45,45,45,45
Balewadi,16,16,16,16,16,16
Baner,3,3,3,3,3,3
Bavdhan,3,3,3,3,3,3
Bhosari,13,13,13,13,13,13
Camp,1,1,1,1,1,1
Chakan,19,19,19,19,19,19
Dapodi,3,3,3,3,3,3
Deccan,5,5,5,5,5,5
Dehu,6,6,6,6,6,6


### Mapping the venues and their neighborhoods using geolocator

In [56]:
address= 'Pune, India'
geolocator= Nominatim(user_agent="Pune-explorer")
location=geolocator.geocode(address)
latitude=location.latitude
longitude= location.longitude
print("The coordinates of the Pune are {},{} .".format(latitude,longitude))

The coordinates of the Pune are 18.521428,73.8544541 .


In [57]:
map_Pune = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lng, venue, neighborhood in zip(neighborhood_venues['Venue Latitude'], neighborhood_venues['Venue Longitude'],neighborhood_venues['Venue'],  neighborhood_venues['Neighborhood']):
    label = '{}, {}'.format(neighborhood, venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#87cefa',
        fill_opacity=0.5,
        parse_html=False).add_to(map_Pune)
map_Pune

### Scraping the data of webpage that consists of main colleges in Pune

In [58]:
url="https://www.shiksha.com/colleges/pune-3"
data=requests.get(url).text
college_data=BeautifulSoup(data,'lxml')
print(college_data.prettify())

<!DOCTYPE html>
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <title id="seoTitle">
   Colleges in Pune - Reviews, Fees, Ranks &amp; Admissions of all Pune colleges at Shiksha
  </title>
  <meta content="Shiksha, education, colleges,universities, institutes,career, career options, career prospects,engineering, mba, medical, mbbs,study abroad, foreign education, college, university, institute,courses, coaching, technical education, higher education,forum, community, education career experts,ask experts, admissions,results, events,scholarships" name="keywords"/>
  <meta content="Find list of all top 1259 colleges in Pune at Shiksha.com. Check Fees, Admission process, Discussion, Reviews and Placements of all Pune colleges listed on Shiksha" id="metaDescription" name="description"/>
  <link href="https://www.shiksha.com/colleges/pune" id="canonicalUrl" rel="canonical"/>
  <link href="https://www.shiksha.com/colleges/pune-2" rel="prev"/

In [59]:
column=['Colleges','Neighborhood']
college=pd.DataFrame(columns=column)

content=college_data.find('div',id='rspnsv-tpl')
collegesname=0
location=0
for table in content.find_all('div',{"class":"elipsysBox"}):
                    
        collegesname=table.h2.text
        
    
        Neighborhood=table.span.text[:-6]
        college=college.append({'Neighborhood':Neighborhood,'Colleges':collegesname},ignore_index=True)
        
college.sort_values('Neighborhood')  
college.dropna(axis=0)
#Adding some more colleges that were left
college=college.append({'Colleges':'Symbiosis International University','Neighborhood':'Lavale Village'},ignore_index=True)
college=college.append({'Colleges':'Bharati Vidyapeeth College of Engineering','Neighborhood':'Dhankawadi'},ignore_index=True)
college=college.append({'Colleges':'Bharati Vidyapeeth Medical College','Neighborhood':'Dhankawadi'},ignore_index=True)
college=college.append({'Colleges':'Yashwantrao Mohite College','Neighborhood':'Kothrud'},ignore_index=True)
college=college.append({'Colleges':'Symbiosis Institute of International Business (SIIB)','Neighborhood':'Hinjewadi'},ignore_index=True)
college=college.append({'Colleges':'Symbiosis Law School (SLS), Pune','Neighborhood':'Viman Nagar'},ignore_index=True)
college=college.append({'Colleges':'SICSR','Neighborhood':'Model Colony'},ignore_index=True)
college

Unnamed: 0,Colleges,Neighborhood
0,Pune Business School,Nigdi
1,RIIM - Arihant Group of Institutes,Bavdhan
2,"MIT SOM College, MIT WPU",Kothrud
3,"Christ - lavasa, Pune Campus",Mulshi
4,Jayawantrao Sawant Institute of Management and...,Hadapsar
5,Dr. D.Y. Patil Vidyapeeth Global Business Scho...,Tathawade
6,Frankfinn Institute of Air Hostess Training,Kothrud
7,Government Polytechnic Pune,Shivaji Nagar
8,Indian School of Technology and Management,Chinchwad
9,VIT Pune - Vishwakarma Institute of Technology,Bibwewadi


#### Merging two dataframes; one of college list and other one is of neighborhood and their latitudes and longitudes

In [60]:
df=pd.merge(college,Neighbour,how='inner',on='Neighborhood')
df=df.sort_values('Neighborhood')
df.set_index('Neighborhood')

Unnamed: 0_level_0,Colleges,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Akurdi,Dr. D.Y. Patil College of Pharmacy,18.56522,73.91391
Akurdi,Dr. D. Y. Patil College of Agriculture Busines...,18.56522,73.91391
Balewadi,MITCON Institute of Management,18.56345,73.81227
Bavdhan,RIIM - Arihant Group of Institutes,18.5482,73.77316
Chinchwad,ASM's Institute of Business Management and Res...,18.64072,73.90397
Chinchwad,Indian School of Technology and Management,18.64072,73.90397
Deccan,NMIMS Global Access School for Continuing Educ...,18.56906,73.88159
Dhankawadi,Bharati Vidyapeeth Medical College,18.53723,73.83808
Dhankawadi,Bharati Vidyapeeth College of Engineering,18.53723,73.83808
Fergusson College Road,"NIEM- The Institute of Event Management, Pune",18.604,73.75038


In [61]:
df[["Neighborhood","Latitude","Longitude","Colleges"]] # To get columns in definite order

Unnamed: 0,Neighborhood,Latitude,Longitude,Colleges
17,Akurdi,18.56522,73.91391,Dr. D.Y. Patil College of Pharmacy
16,Akurdi,18.56522,73.91391,Dr. D. Y. Patil College of Agriculture Busines...
20,Balewadi,18.56345,73.81227,MITCON Institute of Management
3,Bavdhan,18.5482,73.77316,RIIM - Arihant Group of Institutes
13,Chinchwad,18.64072,73.90397,ASM's Institute of Business Management and Res...
12,Chinchwad,18.64072,73.90397,Indian School of Technology and Management
18,Deccan,18.56906,73.88159,NMIMS Global Access School for Continuing Educ...
22,Dhankawadi,18.53723,73.83808,Bharati Vidyapeeth Medical College
21,Dhankawadi,18.53723,73.83808,Bharati Vidyapeeth College of Engineering
19,Fergusson College Road,18.604,73.75038,"NIEM- The Institute of Event Management, Pune"


In [62]:
dataframe=df.groupby('Neighborhood').count()
dataframe

Unnamed: 0_level_0,Colleges,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Akurdi,2,2,2
Balewadi,1,1,1
Bavdhan,1,1,1
Chinchwad,2,2,2
Deccan,1,1,1
Dhankawadi,2,2,2
Fergusson College Road,1,1,1
Hadapsar,2,2,2
Kothrud,4,4,4
Lavale Village,2,2,2


### Mapping the college dataframe along with their neighborhoods 

In [63]:
map_PuneColleges = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lng, college, neighborhood in zip(df['Latitude'], df['Longitude'],df['Colleges'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, college)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#87cefa',
        fill_opacity=0.5,
        parse_html=False).add_to(map_PuneColleges)
map_PuneColleges

# Methodology

##### In this project, we have to find a suitable location to open a PG or hostel for students

##### Thus, in the first step, we have collected data of neighborhoods in Pune through web scraping followed by finding their latitudes and longitudes via geocoder . Then, we have used Foursquare to find out the venues at each neighborhood and  finally, we have scraped data of colleges in Pune and mapped all these datas .

##### In the next step, we will perform data analysis in order to get information crystal clear about where to open a hostel and will cluster the data through k-means clustering.


## Analysis

##### Let's first analyze each neighborhood and find out more about it each one.



In [92]:
# one hot encoding
neighborhood_onehot = pd.get_dummies(neighborhood_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
neighborhood_onehot['Neighborhood'] = neighborhood_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [neighborhood_onehot.columns[-1]] + list(neighborhood_onehot.columns[:-1])
neighborhood_onehot = neighborhood_onehot[fixed_columns]
pd.set_option('display.max_columns', None) #to display all the columns of table
pd.set_option('display.max_rows', None) #to display all the rows of table

neighborhood_onehot

Unnamed: 0,Neighborhood,ATM,American Restaurant,Asian Restaurant,Auto Dealership,BBQ Joint,Bakery,Bed & Breakfast,Beer Garden,Bistro,Boarding House,Bookstore,Breakfast Spot,Burger Joint,Bus Station,Café,Campground,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food Court,Food Truck,Fruit & Vegetable Store,Garden,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Historic Site,Home Service,Hotel,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Juice Bar,Korean Restaurant,Lake,Lounge,Men's Store,Mexican Restaurant,Mobile Phone Shop,Motel,Mountain,Movie Theater,Multiplex,Office,Organic Grocery,Park,Pharmacy,Pizza Place,Plaza,Pub,Restaurant,River,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Sporting Goods Shop,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theme Park,Tourist Information Center,Vegetarian / Vegan Restaurant
0,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Akurdi,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Akurdi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


##### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category



In [65]:
venue_mean=neighborhood_onehot.groupby("Neighborhood").mean().reset_index()
venue_mean

Unnamed: 0,Neighborhood,ATM,American Restaurant,Asian Restaurant,Auto Dealership,BBQ Joint,Bakery,Bed & Breakfast,Beer Garden,Bistro,Boarding House,Bookstore,Breakfast Spot,Burger Joint,Bus Station,Café,Campground,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food Court,Food Truck,Fruit & Vegetable Store,Garden,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Historic Site,Home Service,Hotel,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Juice Bar,Korean Restaurant,Lake,Lounge,Men's Store,Mexican Restaurant,Mobile Phone Shop,Motel,Mountain,Movie Theater,Multiplex,Office,Organic Grocery,Park,Pharmacy,Pizza Place,Plaza,Pub,Restaurant,River,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Sporting Goods Shop,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theme Park,Tourist Information Center,Vegetarian / Vegan Restaurant
0,Akurdi,0.0,0.022222,0.044444,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.022222,0.022222,0.044444,0.111111,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.155556,0.0,0.0,0.022222,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.044444,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222
1,Balewadi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.1875,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0
2,Baner,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bavdhan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bhosari,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.384615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Camp,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Chakan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.210526,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
7,Dapodi,0.0,0.0,0.0,0.333333,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Deccan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2
9,Dehu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [66]:
venue_mean.shape# size of dataframe

(36, 85)

##### Printing each neighborhood with top 5  most common venues 

In [67]:
num_top_venues = 5

for hood in venue_mean['Neighborhood']:
    print("----"+hood+"----")
    temp = venue_mean[venue_mean['Neighborhood'] == hood].T.reset_index() #T function is to transpose
    temp.columns = ['venue','freq']
    temp = temp.iloc[2:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')
    

----Akurdi----
                     venue  freq
0        Indian Restaurant  0.16
1                     Café  0.13
2              Coffee Shop  0.11
3  South Indian Restaurant  0.04
4           Clothing Store  0.04


----Balewadi----
                  venue  freq
0     Indian Restaurant  0.19
1   Sporting Goods Shop  0.12
2  Fast Food Restaurant  0.12
3           Bus Station  0.06
4     Mobile Phone Shop  0.06


----Baner----
                 venue  freq
0          Coffee Shop  0.33
1                  Gym  0.33
2       Breakfast Spot  0.33
3  American Restaurant  0.00
4   Mexican Restaurant  0.00


----Bavdhan----
                 venue  freq
0    Indian Restaurant  0.33
1   Italian Restaurant  0.33
2             Mountain  0.33
3  American Restaurant  0.00
4   Mexican Restaurant  0.00


----Bhosari----
               venue  freq
0  Indian Restaurant  0.38
1               Café  0.15
2         Restaurant  0.08
3        Coffee Shop  0.08
4         Food Court  0.08


----Camp----
           

##### Now let's create a new dataframe and display the top 10 venues for each neighborhood to find out more about the social characteristics.



In [68]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [69]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = venue_mean['Neighborhood']

for ind in np.arange(venue_mean.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venue_mean.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Akurdi,Indian Restaurant,Café,Coffee Shop,South Indian Restaurant,Clothing Store,Asian Restaurant,American Restaurant,Donut Shop,Dumpling Restaurant,Fruit & Vegetable Store
1,Balewadi,Indian Restaurant,Sporting Goods Shop,Fast Food Restaurant,Bus Station,Mobile Phone Shop,Ice Cream Shop,Plaza,Grocery Store,Korean Restaurant,Snack Place
2,Baner,Coffee Shop,Gym,Breakfast Spot,American Restaurant,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Mountain
3,Bavdhan,Indian Restaurant,Italian Restaurant,Mountain,American Restaurant,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Motel
4,Bhosari,Indian Restaurant,Café,Restaurant,Coffee Shop,Food Court,Smoke Shop,Asian Restaurant,Bed & Breakfast,Juice Bar,Motel


#### Now, we'll merge the dataframe of top 10 common venues of neighborhoods with dataframe of neighborhoods with number of famous colleges in Pune

In [74]:
neighborhoods_venuesColleges_sorted=pd.merge(neighborhoods_venues_sorted,dataframe,on="Neighborhood")
Analysed_data=neighborhoods_venuesColleges_sorted.drop(["Latitude","Longitude"],axis=1)
Analysed_data

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Colleges
0,Akurdi,Indian Restaurant,Café,Coffee Shop,South Indian Restaurant,Clothing Store,Asian Restaurant,American Restaurant,Donut Shop,Dumpling Restaurant,Fruit & Vegetable Store,2
1,Balewadi,Indian Restaurant,Sporting Goods Shop,Fast Food Restaurant,Bus Station,Mobile Phone Shop,Ice Cream Shop,Plaza,Grocery Store,Korean Restaurant,Snack Place,1
2,Bavdhan,Indian Restaurant,Italian Restaurant,Mountain,American Restaurant,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Motel,1
3,Deccan,Vegetarian / Vegan Restaurant,Shopping Mall,Fast Food Restaurant,Chinese Restaurant,Pizza Place,Tex-Mex Restaurant,Organic Grocery,Multiplex,Movie Theater,Mountain,1
4,Dhankawadi,Multiplex,Indian Restaurant,Lounge,Coffee Shop,Clothing Store,Chinese Restaurant,Bookstore,Pharmacy,Italian Restaurant,Asian Restaurant,2
5,Fergusson College Road,Café,Home Service,Indian Chinese Restaurant,Indian Restaurant,Mobile Phone Shop,Organic Grocery,Office,Multiplex,Movie Theater,Mountain,1
6,Kothrud,Breakfast Spot,Vegetarian / Vegan Restaurant,Indian Restaurant,Pizza Place,Italian Restaurant,Ice Cream Shop,Restaurant,Lounge,Convenience Store,Mexican Restaurant,4
7,Lavale Village,Indian Restaurant,Bakery,Gym,Office,American Restaurant,Mexican Restaurant,Organic Grocery,Multiplex,Movie Theater,Mountain,2
8,Model Colony,Pizza Place,Indian Restaurant,Chinese Restaurant,Fast Food Restaurant,Dance Studio,Motel,Park,Organic Grocery,Office,Multiplex,1
9,Mulshi,Vegetarian / Vegan Restaurant,Restaurant,Bed & Breakfast,Mobile Phone Shop,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Mountain,1


# K-Means Clustering

##### Lets merge the dataframes of mean values of venues at each neighborhood with the dataframe of  counts of colleges in each neighborhood

In [71]:
data = pd.merge(venue_mean,dataframe, on="Neighborhood")
data.head(10)
#venue_mean = venue_mean.merge(dataframe, left_on='Neighborhood', right_on='Neighborhood').drop(columns=['Neighborhood']).fillna(0)
#venue_mean


Unnamed: 0,Neighborhood,ATM,American Restaurant,Asian Restaurant,Auto Dealership,BBQ Joint,Bakery,Bed & Breakfast,Beer Garden,Bistro,Boarding House,Bookstore,Breakfast Spot,Burger Joint,Bus Station,Café,Campground,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food Court,Food Truck,Fruit & Vegetable Store,Garden,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Historic Site,Home Service,Hotel,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Juice Bar,Korean Restaurant,Lake,Lounge,Men's Store,Mexican Restaurant,Mobile Phone Shop,Motel,Mountain,Movie Theater,Multiplex,Office,Organic Grocery,Park,Pharmacy,Pizza Place,Plaza,Pub,Restaurant,River,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Sporting Goods Shop,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theme Park,Tourist Information Center,Vegetarian / Vegan Restaurant,Colleges,Latitude,Longitude
0,Akurdi,0.0,0.022222,0.044444,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.022222,0.022222,0.044444,0.111111,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.155556,0.0,0.0,0.022222,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.044444,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,2,2,2
1,Balewadi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.1875,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,1,1,1
2,Bavdhan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,1
3,Deccan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,1,1,1
4,Dhankawadi,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.066667,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.266667,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2,2
5,Fergusson College Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,1
6,Kothrud,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,4,4,4
7,Lavale Village,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2,2
8,Model Colony,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,1
9,Mulshi,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,1,1,1


##### Run k-means to cluster the neighborhood into 5 clusters.



In [72]:
# set number of clusters
kclusters = 5

clustering = data.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 3, 4, 3, 0, 1, 2, 0, 3, 3])

##### Let's create a new dataframe that includes the cluster as well as the top 10 venues and colleges for each neighborhood.



In [88]:
# add clustering labels
Analysed_data['Cluster Labels'] = kmeans.labels_
#cluster=neighborhoods_venuesColleges_sorted.drop(["Latitude","Longitude"],axis=1)
#cluster


Pune_merged = Neighbour.copy()

# merge toronto_grouped with df_toronto_borough to add latitude/longitude for each neighborhood
Pune_merged= Pune_merged.join(Analysed_data.set_index('Neighborhood'), on='Neighborhood').dropna()
Pune_merged['Cluster Labels']=Pune_merged['Cluster Labels'].astype(int)
Pune_merged['Colleges']=Pune_merged['Colleges'].astype(int)
Pune_merged.reset_index(drop=True).head(11) # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Colleges,Cluster Labels
0,Akurdi,18.56522,73.91391,Indian Restaurant,Café,Coffee Shop,South Indian Restaurant,Clothing Store,Asian Restaurant,American Restaurant,Donut Shop,Dumpling Restaurant,Fruit & Vegetable Store,2,0
1,Balewadi,18.56345,73.81227,Indian Restaurant,Sporting Goods Shop,Fast Food Restaurant,Bus Station,Mobile Phone Shop,Ice Cream Shop,Plaza,Grocery Store,Korean Restaurant,Snack Place,1,3
2,Bavdhan,18.5482,73.77316,Indian Restaurant,Italian Restaurant,Mountain,American Restaurant,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Motel,1,4
3,Deccan,18.56906,73.88159,Vegetarian / Vegan Restaurant,Shopping Mall,Fast Food Restaurant,Chinese Restaurant,Pizza Place,Tex-Mex Restaurant,Organic Grocery,Multiplex,Movie Theater,Mountain,1,3
4,Dhankawadi,18.53723,73.83808,Multiplex,Indian Restaurant,Lounge,Coffee Shop,Clothing Store,Chinese Restaurant,Bookstore,Pharmacy,Italian Restaurant,Asian Restaurant,2,0
5,Fergusson College Road,18.604,73.75038,Café,Home Service,Indian Chinese Restaurant,Indian Restaurant,Mobile Phone Shop,Organic Grocery,Office,Multiplex,Movie Theater,Mountain,1,1
6,Kothrud,18.54646,73.90067,Breakfast Spot,Vegetarian / Vegan Restaurant,Indian Restaurant,Pizza Place,Italian Restaurant,Ice Cream Shop,Restaurant,Lounge,Convenience Store,Mexican Restaurant,4,2
7,Lavale Village,18.49903,73.85843,Indian Restaurant,Bakery,Gym,Office,American Restaurant,Mexican Restaurant,Organic Grocery,Multiplex,Movie Theater,Mountain,2,0
8,Model Colony,18.49538,73.90008,Pizza Place,Indian Restaurant,Chinese Restaurant,Fast Food Restaurant,Dance Studio,Motel,Park,Organic Grocery,Office,Multiplex,1,3
9,Mulshi,18.61441,73.77385,Vegetarian / Vegan Restaurant,Restaurant,Bed & Breakfast,Mobile Phone Shop,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Mountain,1,3


##### Now, we'll visualize the resulting cluster

In [87]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)#Location of Pune city

# set color scheme for the clusters
colors=['red', 'yellow', 'orange', 'green', 'blue']

# add markers to the map
markers_colors = []
for lat, lng, neighborhood,cluster in zip(Pune_merged['Latitude'], Pune_merged['Longitude'],Pune_merged['Neighborhood'],Pune_merged['Cluster Labels']):
    label =  folium.Popup(str(neighborhood) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color=colors[cluster-1],
        fill=True,
        fill_color=colors[cluster-1],
        fill_opacity=0.5,
        parse_html=False).add_to(map_clusters)
map_clusters

##### Let's see how much registers there are in each cluster.



In [79]:
for cluster in np.sort(Pune_merged['Cluster Labels'].unique()):
    print(f'There are {Pune_merged.loc[Pune_merged["Cluster Labels"] == cluster].shape[0]} registers on Cluster {cluster}')

There are 3 registers on Cluster 0
There are 2 registers on Cluster 1
There are 1 registers on Cluster 2
There are 4 registers on Cluster 3
There are 1 registers on Cluster 4


##### Now, let's examine each cluster to find out more about it.



#### Cluster 1

In [81]:
Pune_merged.loc[Pune_merged['Cluster Labels'] == 0, Pune_merged.columns[[0] + list(range(3, Pune_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Colleges,Cluster Labels
45,Akurdi,Indian Restaurant,Café,Coffee Shop,South Indian Restaurant,Clothing Store,Asian Restaurant,American Restaurant,Donut Shop,Dumpling Restaurant,Fruit & Vegetable Store,2,0
41,Dhankawadi,Multiplex,Indian Restaurant,Lounge,Coffee Shop,Clothing Store,Chinese Restaurant,Bookstore,Pharmacy,Italian Restaurant,Asian Restaurant,2,0
43,Lavale Village,Indian Restaurant,Bakery,Gym,Office,American Restaurant,Mexican Restaurant,Organic Grocery,Multiplex,Movie Theater,Mountain,2,0


#### Cluster 2

In [82]:
Pune_merged.loc[Pune_merged['Cluster Labels'] == 1, Pune_merged.columns[[0] + list(range(3, Pune_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Colleges,Cluster Labels
46,Fergusson College Road,Café,Home Service,Indian Chinese Restaurant,Indian Restaurant,Mobile Phone Shop,Organic Grocery,Office,Multiplex,Movie Theater,Mountain,1,1
36,Viman Nagar,Gym / Fitness Center,Café,American Restaurant,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Mountain,Motel,1,1


#### Cluster 3


In [83]:
Pune_merged.loc[Pune_merged['Cluster Labels'] == 2, Pune_merged.columns[[0] + list(range(3, Pune_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Colleges,Cluster Labels
20,Kothrud,Breakfast Spot,Vegetarian / Vegan Restaurant,Indian Restaurant,Pizza Place,Italian Restaurant,Ice Cream Shop,Restaurant,Lounge,Convenience Store,Mexican Restaurant,4,2


#### Cluster 4

In [84]:
Pune_merged.loc[Pune_merged['Cluster Labels'] == 3, Pune_merged.columns[[0] + list(range(3, Pune_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Colleges,Cluster Labels
1,Balewadi,Indian Restaurant,Sporting Goods Shop,Fast Food Restaurant,Bus Station,Mobile Phone Shop,Ice Cream Shop,Plaza,Grocery Store,Korean Restaurant,Snack Place,1,3
49,Deccan,Vegetarian / Vegan Restaurant,Shopping Mall,Fast Food Restaurant,Chinese Restaurant,Pizza Place,Tex-Mex Restaurant,Organic Grocery,Multiplex,Movie Theater,Mountain,1,3
47,Model Colony,Pizza Place,Indian Restaurant,Chinese Restaurant,Fast Food Restaurant,Dance Studio,Motel,Park,Organic Grocery,Office,Multiplex,1,3
44,Mulshi,Vegetarian / Vegan Restaurant,Restaurant,Bed & Breakfast,Mobile Phone Shop,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Mountain,1,3


#### Cluster 5

In [85]:
Pune_merged.loc[Pune_merged['Cluster Labels'] == 4, Pune_merged.columns[[0] + list(range(3, Pune_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Colleges,Cluster Labels
3,Bavdhan,Indian Restaurant,Italian Restaurant,Mountain,American Restaurant,Mexican Restaurant,Organic Grocery,Office,Multiplex,Movie Theater,Motel,1,4


# Result and Discussion

#### We found out that although there are many neighborhoods in Pune but we wanted the ones with famous colleges so that the clients can open a hostel or PG(Paying Guest) over there for students who migrate from different cities to Pune for their higher education and need a place to stay.

#### Thus, according to our analysis,there are 11 main neighborhoods where hostels can be opened.
#### We got this data by analysing top venues for each neighborhood in Pune and then filtering only those neighborhoods where there are famous colleges.
#### Then, we clustered the neighborhoods and found out that Akurdi, Dhankawadi, Lavale Village are clustered in one type of neighborhood  ;while Balewadi,Deccan,Model Colony,Mulshi	are clusterd in other type of neighborhood and likewise similar type of neighborhoods were clustered together.

# Conclusion

#### The objectiive of this project was to find the most suitable location in Pune to open a hostel for college students. 
#### Hence, after analysis, we found 11 most suitable neighborhoods to open a hostel at ;and now, its upto client to go the through details provided for each neighborhood and choose the the neighborhood which fits in the requirement.

#### We suggest Kothrud,Akurdi, Dhankawadi, Lavale Village as the most suitable neighborhoods among the top 11.