# Objective

In this project, we are going to select places to locate a new coffee shop or cafe in Central, Bangalore, India

In [1]:
# library for BeautifulSoup
from bs4 import BeautifulSoup

# library to handle data in a vectorized manner
import numpy as np

# library for data analsysis
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# library to handle JSON files
import json
print('numpy, pandas, ..., imported...')

!pip -q install geopy
# conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
print('geopy installed...')
# convert an address into latitude and longitude values
from geopy.geocoders import Nominatim
print('Nominatim imported...')

# library to handle requests
import requests
print('requests imported...')

# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize
print('json_normalize imported...')

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
print('matplotlib imported...')

# import k-means from clustering stage
from sklearn.cluster import KMeans
print('Kmeans imported...')

# install the Geocoder
!pip -q install geocoder
import geocoder

# import time
import time

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
!pip -q install folium
print('folium installed...')
import folium # map rendering library
print('folium imported...')
print('...Done')

numpy, pandas, ..., imported...
geopy installed...
Nominatim imported...
requests imported...
json_normalize imported...
matplotlib imported...
Kmeans imported...
folium installed...
folium imported...
...Done


In [2]:
wikipedia_link = 'https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Bangalore'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 7.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0'}
wikipedia_page = requests.get(wikipedia_link, headers = headers)
wikipedia_page

<Response [200]>

In [4]:
html = requests.get('https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Bangalore')
bsoup = BeautifulSoup(html.text, 'xml')
table = bsoup.find('table' , {'class':'wikitable sortable'}).tbody


In [5]:
rows = table.find_all('tr')

columns =  [i.text.replace('\n', '')
           for i in rows[0].find_all('th')]

df1 = pd.DataFrame(columns = columns)
df1

Unnamed: 0,Name,Image,Summary


In [6]:
# Extracts every row with corresponding columns
# Then appends the values to the create pd dataframe "df"
# Please note that the first row (row[0]) is skipped because it is already the header
for i in range(1, len(rows)):
    tds = rows[i].find_all('td')
    
    
    if len(tds) == 7:
        values = [tds[0].text, tds[1].text, tds[2].text.replace('\n', ''.replace('\xa0','')), tds[3].text, tds[4].text.replace('\n', ''.replace('\xa0','')), tds[5].text.replace('\n', ''.replace('\xa0','')), tds[6].text.replace('\n', ''.replace('\xa0',''))]
    else:
        values = [td.text.replace('\n', '').replace('\xa0','') for td in tds]
        
        df1 = df1.append(pd.Series(values, index = columns), ignore_index = True)

df1

Unnamed: 0,Name,Image,Summary
0,Cantonment area,,The Cantonment area in Bangalore was used as a...
1,Domlur,,"Formerly part of the Cantonment area, Domlur h..."
2,Indiranagar,,Indiranagar is a sought-after residential and ...
3,Jeevanbheemanagar,,
4,Malleswaram,,
5,Pete area,,Established by Kempe Gowda I at the time of cr...
6,Sadashivanagar,,Sadashivanagar is an upscale neighbourhood in ...
7,Seshadripuram,,Seshadripuram was established in 1892 to reduc...
8,Shivajinagar,,Shivajinagar is one of the older areas of the ...
9,Ulsoor,,Ulsoor (or Halasuru) is one of the oldest area...


In [7]:
df1.drop(['Image'], axis=1,inplace=True)

In [8]:
df1.drop(['Summary'], axis=1,inplace=True)

In [9]:
df1.insert(0, "Area",True)

In [10]:
df1['Area'] = 'Central'

In [11]:
df1

Unnamed: 0,Area,Name
0,Central,Cantonment area
1,Central,Domlur
2,Central,Indiranagar
3,Central,Jeevanbheemanagar
4,Central,Malleswaram
5,Central,Pete area
6,Central,Sadashivanagar
7,Central,Seshadripuram
8,Central,Shivajinagar
9,Central,Ulsoor


In [28]:
#create a new dataframe of the Central Bangalore
bbintang = df1[df1['Area'] == 'Central'].reset_index(drop=True)

In [48]:
import geopy

In [50]:
import geopy
GOOGLE_API_KEY='AIzaSyAQWqMTOcyLBRDR2skO4F_5QEWzNDOlUHw'

#function to get latitude and longitude
def get_latlng(postal_code):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Bangalore'.format(postal_code), key=GOOGLE_API_KEY)
        lat_lng_coords = g.latlng
    return lat_lng_coords

#put new column of latitude and logitude into dataframe
postal_codes1 = df1['Name']    
coords = [ get_latlng(postal_code) for postal_code in postal_codes1.tolist() ]

df_kl_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df1['Latitude'] = df_kl_coords['Latitude']
df1['Longitude'] = df_kl_coords['Longitude']
df1

Unnamed: 0,Area,Name,Latitude,Longitude
0,Central,Cantonment area,12.993771,77.597787
1,Central,Domlur,12.960986,77.638732
2,Central,Indiranagar,12.978369,77.640836
3,Central,Jeevanbheemanagar,12.964163,77.658076
4,Central,Malleswaram,13.005511,77.569236
5,Central,Pete area,12.967078,77.641985
6,Central,Sadashivanagar,13.006818,77.581285
7,Central,Seshadripuram,12.988905,77.574044
8,Central,Shivajinagar,12.98565,77.605693
9,Central,Ulsoor,12.9817,77.628415


In [44]:
from geopy.geocoders import Nominatim
import folium

address = 'Central Bangalore,India'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of New York using latitude and longitude values
map_kl = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for neighborhood,lat,long in zip(df1['Name'], df1['Latitude'],df1['Longitude']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_kl)  
    
map_kl



In [45]:
#Define Foursquare Credentials and Version
CLIENT_ID = 'AXLFWTQT1T0CD1NP1ISKU2LBKWTK0TGM4HLLXDPNEC3LDAHW' # your Foursquare ID
CLIENT_SECRET = 'J5SV0QWUXBGUQTXMOMKIUPYCCSSL3Q0XH4WNGIOHV012KH2I' # your Foursquare Secret
VERSION = '20191106'

In [53]:
LIMIT = 100
#function to repeat the same process to all area
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Name', 
                  'Area Latitude', 
                  'Area Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [54]:
idr = df1[df1['Name'] == 'Indiranagar'].reset_index(drop=True)
#run the above function on each neighborhood and create a new dataframe
idr_venues = getNearbyVenues(names=idr['Name'],
                                   latitudes=idr['Latitude'],
                                   longitudes=idr['Longitude']
                                  )

#check the size of the resulting dataframe
print(idr_venues.shape)
#check how many venues were returned for each area
print('There are {} uniques categories in Indiranagar.'.format(len(idr_venues['Venue Category'].unique())))
idr_venues.groupby('Venue Category').count()

Indiranagar
(76, 7)
There are 37 uniques categories in Indiranagar.


Unnamed: 0_level_0,Name,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Andhra Restaurant,3,3,3,3,3,3
Bakery,3,3,3,3,3,3
Bar,3,3,3,3,3,3
Bengali Restaurant,1,1,1,1,1,1
Bike Shop,1,1,1,1,1,1
Boutique,1,1,1,1,1,1
Breakfast Spot,1,1,1,1,1,1
Brewery,1,1,1,1,1,1
Burger Joint,1,1,1,1,1,1
Café,9,9,9,9,9,9


In [55]:
dom = df1[df1['Name'] == 'Domlur'].reset_index(drop=True)
#run the above function on each neighborhood and create a new dataframe
dom_venues = getNearbyVenues(names=dom['Name'],
                                   latitudes=dom['Latitude'],
                                   longitudes=dom['Longitude']
                                  )

#check the size of the resulting dataframe
print(dom_venues.shape)
dom_venues


Domlur
(8, 7)


Unnamed: 0,Name,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Domlur,12.960986,77.638732,Lavonne,12.963909,77.638579,Café
1,Domlur,12.960986,77.638732,Barbeque Nation,12.962684,77.641599,BBQ Joint
2,Domlur,12.960986,77.638732,Domino's Pizza,12.961,77.639,Pizza Place
3,Domlur,12.960986,77.638732,Mainland China,12.962458,77.641727,Chinese Restaurant
4,Domlur,12.960986,77.638732,Cafe 42,12.963698,77.642039,Café
5,Domlur,12.960986,77.638732,Sree Nandhini Palace,12.96199,77.641813,Indian Restaurant
6,Domlur,12.960986,77.638732,Rajdhani,12.963834,77.641643,Rajasthani Restaurant
7,Domlur,12.960986,77.638732,Subway,12.963812,77.64066,Sandwich Place


In [56]:
print('There are {} uniques categories in Domlur'.format(len(idr_venues['Venue Category'].unique())))
dom_venues.groupby('Venue Category').count()

There are 37 uniques categories in Domlur


Unnamed: 0_level_0,Name,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BBQ Joint,1,1,1,1,1,1
Café,2,2,2,2,2,2
Chinese Restaurant,1,1,1,1,1,1
Indian Restaurant,1,1,1,1,1,1
Pizza Place,1,1,1,1,1,1
Rajasthani Restaurant,1,1,1,1,1,1
Sandwich Place,1,1,1,1,1,1


In [183]:
can = df1[df1['Name'] == 'Cantonment area'].reset_index(drop=True)
#run the above function on each neighborhood and create a new dataframe
can_venues = getNearbyVenues(names=can['Name'],
                                   latitudes=can['Latitude'],
                                   longitudes=can['Longitude']
                                  )
#check the size of the resulting dataframe
print(can_venues.shape)
print('There are {} uniques categories in Cantonment Area'.format(len(can_venues['Venue Category'].unique())))
can_venues.groupby('Venue Category').count()


Cantonment area
(11, 7)
There are 9 uniques categories in Cantonment Area


Unnamed: 0_level_0,Name,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Badminton Court,1,1,1,1,1,1
Concert Hall,1,1,1,1,1,1
Event Space,1,1,1,1,1,1
Indian Restaurant,3,3,3,3,3,3
Music Venue,1,1,1,1,1,1
Pool Hall,1,1,1,1,1,1
Pub,1,1,1,1,1,1
Steakhouse,1,1,1,1,1,1
Tea Room,1,1,1,1,1,1


In [184]:
jee = df1[df1['Name'] == 'Jeevanbheemanagar'].reset_index(drop=True)
#run the above function on each neighborhood and create a new dataframe
jee_venues = getNearbyVenues(names=jee['Name'],
                                   latitudes=jee['Latitude'],
                                   longitudes=jee['Longitude']
                                  )
#check the size of the resulting dataframe
print(jee_venues.shape)
print('There are {} uniques categories in jeevanbheemanagar'.format(len(jee_venues['Venue Category'].unique())))
jee_venues.groupby('Venue Category').count()

Jeevanbheemanagar
(1, 7)
There are 1 uniques categories in jeevanbheemanagar


Unnamed: 0_level_0,Name,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bakery,1,1,1,1,1,1


In [58]:
mal = df1[df1['Name'] == 'Malleswaram'].reset_index(drop=True)
#run the above function on each neighborhood and create a new dataframe
mal_venues = getNearbyVenues(names=mal['Name'],
                                   latitudes=mal['Latitude'],
                                   longitudes=mal['Longitude']
                                  )
#check the size of the resulting dataframe
print(mal_venues.shape)
print('There are {} uniques categories in Malleswaram'.format(len(mal_venues['Venue Category'].unique())))
mal_venues.groupby('Venue Category').count()

Malleswaram
(17, 7)
There are 14 uniques categories in Malleswaram


Unnamed: 0_level_0,Name,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Café,1,1,1,1,1,1
Camera Store,1,1,1,1,1,1
Chinese Restaurant,1,1,1,1,1,1
Coffee Shop,2,2,2,2,2,2
Dessert Shop,1,1,1,1,1,1
Donut Shop,1,1,1,1,1,1
Fast Food Restaurant,1,1,1,1,1,1
Flea Market,1,1,1,1,1,1
Gourmet Shop,1,1,1,1,1,1
Ice Cream Shop,2,2,2,2,2,2


# Analyze  Neighborhoods in Central Bangalore

In [67]:
cen = df1[df1['Area'] == 'Central'].reset_index(drop=True)
#run the above function on each neighborhood and create a new dataframe
cen_venues = getNearbyVenues(names=cen['Name'],
                                   latitudes=cen['Latitude'],
                                   longitudes=cen['Longitude']
                                  )
           

Cantonment area
Domlur
Indiranagar
Jeevanbheemanagar
Malleswaram
Pete area
Sadashivanagar
Seshadripuram
Shivajinagar
Ulsoor
Vasanth Nagar


In [69]:
# one hot encoding
bintang_onehot = pd.get_dummies(cen_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bintang_onehot['Name'] = cen_venues['Name'] 

# move neighborhood column to the first column
fixed_columns = [bintang_onehot.columns[-1]] + list(bintang_onehot.columns[:-1])
bintang_onehot = bintang_onehot[fixed_columns]

#examine the new dataframe size after one hot encoding
print('{} rows were returned after one hot encoding.'.format(bintang_onehot.shape[0]))

#group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
bintang_grouped = bintang_onehot.groupby('Name').mean().reset_index()

#examine the new dataframe size after one hot encoding
print('{} rows were returned after grouping.'.format(bintang_grouped.shape[0]))

278 rows were returned after one hot encoding.
11 rows were returned after grouping.


In [70]:
#print each neighborhood in Central,Bangalore along with the top 5 most common venues
num_top_venues = 5

for hood in bintang_grouped['Name']:
    print("----"+hood+"----")
    temp = bintang_grouped[bintang_grouped['Name'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Cantonment area----
               venue  freq
0  Indian Restaurant  0.27
1           Tea Room  0.09
2        Event Space  0.09
3        Music Venue  0.09
4       Concert Hall  0.09


----Domlur----
                   venue  freq
0                   Café  0.25
1      Indian Restaurant  0.12
2  Rajasthani Restaurant  0.12
3     Chinese Restaurant  0.12
4         Sandwich Place  0.12


----Indiranagar----
                  venue  freq
0     Indian Restaurant  0.14
1                  Café  0.12
2                   Pub  0.07
3     Andhra Restaurant  0.04
4  Fast Food Restaurant  0.04


----Jeevanbheemanagar----
                       venue  freq
0                     Bakery   1.0
1        American Restaurant   0.0
2  Middle Eastern Restaurant   0.0
3               Outlet Store   0.0
4               Optical Shop   0.0


----Malleswaram----
                           venue  freq
0  Vegetarian / Vegan Restaurant  0.12
1                 Ice Cream Shop  0.12
2                    Coffee Shop

In [72]:
#write a function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#create the new dataframe and display the top 10 venues for each neighborhood
num_top_venues = 8

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
areas_venues_sorted = pd.DataFrame(columns=columns)
areas_venues_sorted['Name'] = bintang_grouped['Name']

for ind in np.arange(bintang_grouped.shape[0]):
    areas_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bintang_grouped.iloc[ind, :], num_top_venues)

areas_venues_sorted

Unnamed: 0,Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
0,Cantonment area,Indian Restaurant,Concert Hall,Tea Room,Steakhouse,Pool Hall,Badminton Court,Pub,Event Space
1,Domlur,Café,Pizza Place,Sandwich Place,Chinese Restaurant,BBQ Joint,Indian Restaurant,Rajasthani Restaurant,Concert Hall
2,Indiranagar,Indian Restaurant,Café,Pub,Bakery,Dessert Shop,Pizza Place,Bar,Fast Food Restaurant
3,Jeevanbheemanagar,Bakery,Women's Store,Event Space,Concert Hall,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop
4,Malleswaram,Vegetarian / Vegan Restaurant,Ice Cream Shop,Coffee Shop,Women's Store,Café,Indian Restaurant,Flea Market,Fast Food Restaurant
5,Pete area,Café,Italian Restaurant,Clothing Store,Lounge,Pub,Cocktail Bar,Restaurant,Coffee Shop
6,Sadashivanagar,Indian Restaurant,Coffee Shop,Ice Cream Shop,Department Store,Café,Women's Store,Seafood Restaurant,Gym
7,Seshadripuram,Clothing Store,Ice Cream Shop,Indian Restaurant,Fast Food Restaurant,Arcade,Coffee Shop,Donut Shop,Electronics Store
8,Shivajinagar,Indian Restaurant,Clothing Store,Donut Shop,Tea Room,South Indian Restaurant,Fast Food Restaurant,Market,Coffee Shop
9,Ulsoor,Café,Bakery,Burger Joint,Women's Store,Electronics Store,Cupcake Shop,Deli / Bodega,Department Store


# K Means Clutering of the neighborhoods CENTRAL BANGALORE

In [83]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 4

bintang_grouped_clustering = bintang_grouped.drop('Name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bintang_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

#create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
bintang_merged = bbintang

# add clustering labels
bintang_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
bintang_merged = bintang_merged.join(areas_venues_sorted.set_index('Name'), on='Name')

bintang_merged

Unnamed: 0,Area,Name,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
0,Central,Cantonment area,12.993771,77.597787,3,Indian Restaurant,Concert Hall,Tea Room,Steakhouse,Pool Hall,Badminton Court,Pub,Event Space
1,Central,Domlur,12.960986,77.638732,0,Café,Pizza Place,Sandwich Place,Chinese Restaurant,BBQ Joint,Indian Restaurant,Rajasthani Restaurant,Concert Hall
2,Central,Indiranagar,12.978369,77.640836,0,Indian Restaurant,Café,Pub,Bakery,Dessert Shop,Pizza Place,Bar,Fast Food Restaurant
3,Central,Jeevanbheemanagar,12.964163,77.658076,2,Bakery,Women's Store,Event Space,Concert Hall,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop
4,Central,Malleswaram,13.005511,77.569236,0,Vegetarian / Vegan Restaurant,Ice Cream Shop,Coffee Shop,Women's Store,Café,Indian Restaurant,Flea Market,Fast Food Restaurant
5,Central,Pete area,12.967078,77.641985,0,Café,Italian Restaurant,Clothing Store,Lounge,Pub,Cocktail Bar,Restaurant,Coffee Shop
6,Central,Sadashivanagar,13.006818,77.581285,0,Indian Restaurant,Coffee Shop,Ice Cream Shop,Department Store,Café,Women's Store,Seafood Restaurant,Gym
7,Central,Seshadripuram,12.988905,77.574044,0,Clothing Store,Ice Cream Shop,Indian Restaurant,Fast Food Restaurant,Arcade,Coffee Shop,Donut Shop,Electronics Store
8,Central,Shivajinagar,12.98565,77.605693,3,Indian Restaurant,Clothing Store,Donut Shop,Tea Room,South Indian Restaurant,Fast Food Restaurant,Market,Coffee Shop
9,Central,Ulsoor,12.9817,77.628415,1,Café,Bakery,Burger Joint,Women's Store,Electronics Store,Cupcake Shop,Deli / Bodega,Department Store


In [84]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#Finally, let's visualize the resulting clusters

bb_clusters = folium.Map(location=[12.9265, 77.6753], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bintang_merged['Latitude'], bintang_merged['Longitude'], bintang_merged['Name'], bintang_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(bb_clusters)
       
bb_clusters

# Results

In [87]:
#Cluster 1
bintang_merged.loc[bintang_merged['Cluster Labels'] == 0, bintang_merged.columns[[1] + list(range(5, bintang_merged.shape[1]))]]

Unnamed: 0,Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
1,Domlur,Café,Pizza Place,Sandwich Place,Chinese Restaurant,BBQ Joint,Indian Restaurant,Rajasthani Restaurant,Concert Hall
2,Indiranagar,Indian Restaurant,Café,Pub,Bakery,Dessert Shop,Pizza Place,Bar,Fast Food Restaurant
4,Malleswaram,Vegetarian / Vegan Restaurant,Ice Cream Shop,Coffee Shop,Women's Store,Café,Indian Restaurant,Flea Market,Fast Food Restaurant
5,Pete area,Café,Italian Restaurant,Clothing Store,Lounge,Pub,Cocktail Bar,Restaurant,Coffee Shop
6,Sadashivanagar,Indian Restaurant,Coffee Shop,Ice Cream Shop,Department Store,Café,Women's Store,Seafood Restaurant,Gym
7,Seshadripuram,Clothing Store,Ice Cream Shop,Indian Restaurant,Fast Food Restaurant,Arcade,Coffee Shop,Donut Shop,Electronics Store
10,Vasanth Nagar,Indian Restaurant,Italian Restaurant,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Real Estate Office,Vegetarian / Vegan Restaurant,Concert Hall


In [88]:
#Cluster 2 for Central 
bintang_merged.loc[bintang_merged['Cluster Labels'] == 1, bintang_merged.columns[[1] + list(range(5, bintang_merged.shape[1]))]]

Unnamed: 0,Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
9,Ulsoor,Café,Bakery,Burger Joint,Women's Store,Electronics Store,Cupcake Shop,Deli / Bodega,Department Store


In [89]:
#Cluster 3
bintang_merged.loc[bintang_merged['Cluster Labels'] == 2, bintang_merged.columns[[1] + list(range(5, bintang_merged.shape[1]))]]

Unnamed: 0,Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
3,Jeevanbheemanagar,Bakery,Women's Store,Event Space,Concert Hall,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop


In [90]:
#Cluster 4
bintang_merged.loc[bintang_merged['Cluster Labels'] == 3, bintang_merged.columns[[1] + list(range(5, bintang_merged.shape[1]))]]

Unnamed: 0,Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
0,Cantonment area,Indian Restaurant,Concert Hall,Tea Room,Steakhouse,Pool Hall,Badminton Court,Pub,Event Space
8,Shivajinagar,Indian Restaurant,Clothing Store,Donut Shop,Tea Room,South Indian Restaurant,Fast Food Restaurant,Market,Coffee Shop


# Discussion

To place a coffee shop / cafe in the neighborhoods of Central Bangalore, 4 different clusters are explored.

1st Cluster - Domlur and Pete Area hass Cafe  as the first most commmon venue ,hence there is no use in developing a cafe due to the competitors 
              and also the other restaurants
              Indiranagar, Malleswaram, Sadashivanagar also have cafe
              Seshadripuram and Vasanth nagar has only coffeee shops , so it gives a good opportunity to develop a cafe in these 2 places 
2nd Cluster - Ulsur has a cafe , so can leave off that neighborhood
3rd Cluster - Jeevanbheemanagar doessnt have a coffee shop or a cafe around
4th Cluster - Cantonment area and Shivajinagar has restaurants around , so its better to devvelop a coffee shop here too.

# CONCLUSION

Using Foursquare API, we can captured data of common places around Central Bangalore. 
Using it, we refer back to our main objectives, which is to determine;

* the similarity or dissimilarirty of neighborhoods
* classification of area located inside the city whether to place a coffee shop or not

**In conclusion, places Seshadripuram,Jeevanbheemanagar	and Cantonment Area are suited to place a coffee shop.  