# Capstone

** Opening a New Shopping Mall in Mumbai (India) **

---

**1. Import libraries**

In [1]:
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import json 
from geopy.geocoders import Nominatim 
import geocoder
import requests 
from bs4 import BeautifulSoup 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
pd.options.display.max_columns = None

**2. Get data from Wiki**

In [2]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Suburbs_of_Mumbai").text
soup = BeautifulSoup(data, 'html.parser')
neighborhoodList = []
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)
bom_df = pd.DataFrame({"Neighborhood": neighborhoodList})
print("shape = ",bom_df.shape)
bom_df.head()

shape =  (42, 1)


Unnamed: 0,Neighborhood
0,Andheri
1,Anushakti Nagar
2,Baiganwadi
3,Bandra
4,Bhandup


** 3. Get coordinates **

In [3]:
def get_latlng(neighborhood):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Mumbai, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords
coords = [get_latlng(neighborhood) for neighborhood in bom_df["Neighborhood"].tolist()]
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
bom_df['Latitude'] = df_coords['Latitude']
bom_df['Longitude'] = df_coords['Longitude']
print("shape = ",bom_df.shape)
bom_df.head()

shape =  (42, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Andheri,19.118459,72.841763
1,Anushakti Nagar,19.04283,72.92734
2,Baiganwadi,19.06294,72.92663
3,Bandra,19.05437,72.84017
4,Bhandup,19.14556,72.94856


** 4. Coordinate of Mumbai, India **

In [4]:
address = 'Mumbai, India'
geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai, India 18.9387711, 72.8353355.


** 5 . Map of Mumbai **

In [5]:
map_bom = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lng, neighborhood in zip(bom_df['Latitude'], bom_df['Longitude'], bom_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='grey',
        fill_opacity=0.7).add_to(map_bom)     
map_bom

** 6. four square - top 100 venues in a radius of 1000 meters**

In [6]:
CLIENT_ID = 'LXXF32DQXHDOMXFAXI2WSDSWYVMLT1K30Q4BWYAK5Y0HFJ0Y'
CLIENT_SECRET = 'PM0RN25YMWG513M1KUC5Y2XHKFHZBDJDPD5BXTFFXPRCPMYX'
VERSION = '20180605' 
radius = 1000
LIMIT = 100
venues = []

for lat, long, neighborhood in zip(bom_df['Latitude'], bom_df['Longitude'], bom_df['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,CLIENT_SECRET,VERSION,lat,long,radius,LIMIT)
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((neighborhood,lat,long,venue['venue']['name'],venue['venue']['location']['lat'],venue['venue']['location']['lng'],venue['venue']['categories'][0]['name']))

In [7]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print("shape =",venues_df.shape)
venues_df.head()

shape = (1081, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Andheri,19.118459,72.841763,Merwans Cake shop,19.1193,72.845418,Bakery
1,Andheri,19.118459,72.841763,Radha Krishna Veg Restaurant,19.11513,72.84306,Indian Restaurant
2,Andheri,19.118459,72.841763,Shawarma Factory,19.124591,72.840398,Falafel Restaurant
3,Andheri,19.118459,72.841763,Temptations,19.113767,72.841337,Ice Cream Shop
4,Andheri,19.118459,72.841763,McDonald's,19.119691,72.846102,Fast Food Restaurant


** 7. Analyze Each Area **

In [8]:
bom_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")
bom_onehot['Neighborhoods'] = venues_df['Neighborhood'] 
fixed_columns = [bom_onehot.columns[-1]] + list(bom_onehot.columns[:-1])
bom_onehot = bom_onehot[fixed_columns]
print("shape =",bom_onehot.shape)
bom_onehot.head()

shape = (1081, 149)


Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,Antique Shop,Aquarium,Arcade,Art Gallery,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beach,Bed & Breakfast,Bike Rental / Bike Share,Bistro,Bookstore,Boutique,Breakfast Spot,Burger Joint,Bus Station,Butcher,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Donut Shop,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Gaming Cafe,Garden,Gastropub,General Entertainment,Gift Shop,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Harbor / Marina,Historic Site,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motorcycle Shop,Mountain,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Venue,Neighborhood,Nightclub,Opera House,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pub,Recreation Center,Residential Building (Apartment / Condo),Rest Area,Restaurant,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,South Indian Restaurant,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Tea Room,Thai Restaurant,Theater,Trail,Train,Train Station,Vegetarian / Vegan Restaurant,Whisky Bar,Women's Store,Yoga Studio
0,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


** 8. Create a new DataFrame for Shopping Mall data only **

In [9]:
bom_grouped = bom_onehot.groupby(["Neighborhoods"]).mean().reset_index()
bom_mall = bom_grouped[["Neighborhoods","Shopping Mall"]]
print("shape =",bom_mall.shape)
bom_mall.head()

shape = (41, 2)


Unnamed: 0,Neighborhoods,Shopping Mall
0,Andheri,0.0
1,Anushakti Nagar,0.0
2,Baiganwadi,0.0
3,Bandra,0.0
4,Bhandup,0.0


**9. Cluster Areas - kmeans **

In [10]:
kclusters = 3
bom_clustering = bom_mall.drop(["Neighborhoods"], 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bom_clustering)
bom_merged = bom_mall.copy()
bom_merged["Cluster Labels"] = kmeans.labels_
bom_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
bom_merged = bom_merged.join(bom_df.set_index("Neighborhood"), on="Neighborhood")
bom_merged.sort_values(["Cluster Labels"], inplace=True)
print("shape =",bom_merged.shape)
bom_merged

shape = (41, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Andheri,0.0,0,19.118459,72.841763
39,Western Suburbs (Mumbai),0.0,0,19.19701,72.82768
21,Kurla,0.0,0,19.06498,72.88069
22,Mahavir Nagar (Kandivali),0.0,0,19.211982,72.837573
23,Mankhurd,0.0,0,19.04853,72.93222
24,"Matharpacady, Mumbai",0.0,0,19.04492,72.867205
27,Mumbra,0.0,0,19.19054,73.02266
29,Seven Bungalows,0.0,0,19.131342,72.816342
30,Shil Phata,0.0,0,19.14658,73.04005
31,"Sion, Mumbai",0.0,0,19.04359,72.86412


** 10. Cluster map **

In [11]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(1, 0, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []
for lat, lon, poi, cluster in zip(bom_merged['Latitude'], bom_merged['Longitude'], bom_merged['Neighborhood'], bom_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=7,
        popup=label,
        color='black',
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters)
map_clusters

** 11. cluster = 1 **

In [12]:
bom_merged.loc[bom_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
5,Borivali,0.021277,1,19.22936,72.85751
16,Juhu,0.02439,1,19.01492,72.84522
7,Chembur,0.02439,1,19.06218,72.90241
28,Pestom sagar,0.025641,1,19.07064,72.90217
26,Mulund,0.030303,1,19.17183,72.95565
25,Mira Road,0.041667,1,19.280032,72.867932
32,"Sonapur, Bhandup",0.034483,1,19.16394,72.93544


** 12. cluster = 2 **

In [13]:
bom_merged.loc[bom_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
9,Devipada,0.076923,2,19.22469,72.86605
