# Week 5 - IBM Applied Data Science Capstone Project
This notebook contains the code for week 5

In [20]:
# Importing libraries
import pandas as pd
import numpy as np
import json
import requests
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from pandas.io.json import json_normalize
from bs4 import BeautifulSoup
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

## Scrapping the data of Mumbai from the Wikipedia page
There are a total of 135 neighborhoods in Mumbai

In [28]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Mumbai").text

# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

# create a list to store neighborhood data
neighborhoodList = []

# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

# create a new DataFrame from the list
mum_df = pd.DataFrame({"Neighborhood": neighborhoodList})

# removing an extra name - "List of neighbourhoods in Mumbai"
mum_df = mum_df[mum_df.Neighborhood != 'List of neighbourhoods in Mumbai']

print(mum_df.shape)
mum_df.head()
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(mum_df)

(135, 1)
                      Neighborhood
1                Aarey Milk Colony
2                         Agripada
3                   Altamount Road
4                   Amboli, Mumbai
5                      Amrut Nagar
6                       Antop Hill
7                  Anushakti Nagar
8                           Asalfa
9                     Badhwar Park
10                      Baiganwadi
11                  Ballard Estate
12                          Bandra
13            Bandra Kurla Complex
14                    Bangur Nagar
15                      Bhuleshwar
16                     Bori Bunder
17                    Breach Candy
18                         Byculla
19                   C.G.S. colony
20                           Cavel
21             Chandanwadi, Mumbai
22                      Chandivali
23                     Chinchpokli
24                    Chira Bazaar
25                     Chor Bazaar
26                      Churchgate
27                          Colaba
28         

In [30]:
# define a function to get coordinates
def get_latlng(neighborhood):
    geolocator = Nominatim()
    city ="Mumbai"
    country ="India"
    loc = geolocator.geocode(neighborhood + ',' + city + ',' + country , timeout=10)
    try:
        if (loc != None):
            lat_lng_coords = [loc.latitude, loc.longitude]
        else:
            lat_lng_coords = [0, 0]
    except GeocoderTimedOut as e:
        print("Error: geocode failed on input %s with message %s"%(my_address, e.message))
    return lat_lng_coords

In [32]:
# call the function to get the coordinates, store in a new list using list comprehension
# get_latlng(neighborhood) for
coords = [ get_latlng(neighborhood) for neighborhood in mum_df["Neighborhood"].tolist() ]
print(coords)

  This is separate from the ipykernel package so we can avoid doing imports until


[[19.1561292, 72.8707223], [18.9753024, 72.8248975], [18.9663549, 72.8091632], [19.1319915, 72.8499596], [19.1008449, 72.9118196], [19.0207608, 72.8652556], [19.0395778, 72.9221562], [0, 0], [18.91904145, 72.8264976296761], [19.0618939, 72.9247918], [18.9366512, 72.8391325], [19.0549792, 72.8402203], [19.067115, 72.8657245], [19.1688142, 72.8336777], [18.9537706, 72.8274755], [18.9394438, 72.8333427], [18.97256625, 72.80426769748523], [18.9766219, 72.8327936], [0, 0], [0, 0], [0, 0], [19.1091482, 72.8945793], [18.9871984, 72.8326716], [0, 0], [18.9593519, 72.829914], [18.9359567, 72.8273404], [18.915091, 72.8259691], [18.9871866, 72.8438965], [18.9136406, 72.8209295], [18.9634974, 72.8087763], [18.9949978, 72.8328707], [19.12829205, 72.83019335523952], [19.019282, 72.8428757], [19.028379, 72.8415661], [0, 0], [0, 0], [0, 0], [19.0475502, 72.9051895], [19.044463, 72.8586177], [18.9441404, 72.8283521], [19.1740198, 72.8695219], [18.9622265, 72.836795], [18.9515935, 72.8256965], [19.04928

In [33]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [35]:
# merge the coordinates into the original dataframe
mum_df['Latitude'] = df_coords['Latitude']
mum_df['Longitude'] = df_coords['Longitude']

# check the neighborhoods and the coordinates
print(mum_df.shape)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(mum_df)

(135, 3)
                      Neighborhood   Latitude  Longitude
1                Aarey Milk Colony  18.975302  72.824898
2                         Agripada  18.966355  72.809163
3                   Altamount Road  19.131992  72.849960
4                   Amboli, Mumbai  19.100845  72.911820
5                      Amrut Nagar  19.020761  72.865256
6                       Antop Hill  19.039578  72.922156
7                  Anushakti Nagar   0.000000   0.000000
8                           Asalfa  18.919041  72.826498
9                     Badhwar Park  19.061894  72.924792
10                      Baiganwadi  18.936651  72.839133
11                  Ballard Estate  19.054979  72.840220
12                          Bandra  19.067115  72.865724
13            Bandra Kurla Complex  19.168814  72.833678
14                    Bangur Nagar  18.953771  72.827476
15                      Bhuleshwar  18.939444  72.833343
16                     Bori Bunder  18.972566  72.804268
17                    

***
### There are some locations for which the output is either 0 or NaN because the latitude and longitude could not be found. These neighborhoods are removed using the code below

In [40]:
mum_df = mum_df.dropna(how='any',axis=0) 
mum_df = mum_df[mum_df.Latitude != 0.00]
print(mum_df.shape)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(mum_df)

(111, 3)
                    Neighborhood   Latitude  Longitude
1              Aarey Milk Colony  18.975302  72.824898
2                       Agripada  18.966355  72.809163
3                 Altamount Road  19.131992  72.849960
4                 Amboli, Mumbai  19.100845  72.911820
5                    Amrut Nagar  19.020761  72.865256
6                     Antop Hill  19.039578  72.922156
8                         Asalfa  18.919041  72.826498
9                   Badhwar Park  19.061894  72.924792
10                    Baiganwadi  18.936651  72.839133
11                Ballard Estate  19.054979  72.840220
12                        Bandra  19.067115  72.865724
13          Bandra Kurla Complex  19.168814  72.833678
14                  Bangur Nagar  18.953771  72.827476
15                    Bhuleshwar  18.939444  72.833343
16                   Bori Bunder  18.972566  72.804268
17                  Breach Candy  18.976622  72.832794
21           Chandanwadi, Mumbai  19.109148  72.894579
2

Now we have a total of 111 neighborhoods in Mumbai which we will analyse

In [41]:
# save the DataFrame as CSV file
mum_df.to_csv("mum_df.csv", index=False)

## Creating a map of Mumbai with neighborhoods superimposed on top

In [44]:
# get the coordinates of Mumbai
address = 'Mumbai, India'
geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai, India is {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai, India is 18.9387711, 72.8353355.


In [46]:
# create map of Mumbai using latitude and longitude values
map_mum = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(mum_df['Latitude'], mum_df['Longitude'], mum_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_mum)  
    
map_mum

In [47]:
# save the map as HTML file
map_mum.save('map_mum.html')

## Using the foursquare API to explore the neighborhoods

In [48]:
# define Foursquare Credentials and Version
CLIENT_ID = 'MNXTCWDXGW50SGZZTKICVZIECP102VKJUIINMCD24ZTOMHWO'
CLIENT_SECRET = 'K0PISV5S2YHLFYABHVPJCGWU03VLQYVXO24S0YIAZMPDHRUO'
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET: ' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: MNXTCWDXGW50SGZZTKICVZIECP102VKJUIINMCD24ZTOMHWO
CLIENT_SECRET: K0PISV5S2YHLFYABHVPJCGWU03VLQYVXO24S0YIAZMPDHRUO


In [49]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(mum_df['Latitude'], mum_df['Longitude'], mum_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [50]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()


(9093, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Aarey Milk Colony,18.975302,72.824898,Celejor,18.975844,72.823679,Bakery
1,Aarey Milk Colony,18.975302,72.824898,Mahalaxmi Race Course (Royal Western India Tur...,18.980535,72.818588,Club House
2,Aarey Milk Colony,18.975302,72.824898,Tote On The Turf,18.980266,72.820294,Nightclub
3,Aarey Milk Colony,18.975302,72.824898,Willingdon Sports Club,18.976925,72.815256,Golf Course
4,Aarey Milk Colony,18.975302,72.824898,Neel,18.980407,72.820403,Indian Restaurant


In [51]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aarey Milk Colony,89,89,89,89,89,89
Agripada,96,96,96,96,96,96
Altamount Road,61,61,61,61,61,61
"Amboli, Mumbai",100,100,100,100,100,100
Amrut Nagar,76,76,76,76,76,76
...,...,...,...,...,...,...
Vidyavihar,4,4,4,4,4,4
Virar,16,16,16,16,16,16
Wadala,100,100,100,100,100,100
Western Suburbs (Mumbai),64,64,64,64,64,64


### Let's find out how many unique categories can be curated from all the returned venues

In [66]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))
# print out the list of categories
print(venues_df['VenueCategory'].unique()[:300])

There are 218 uniques categories.
['Bakery' 'Club House' 'Nightclub' 'Golf Course' 'Indian Restaurant'
 'Middle Eastern Restaurant' 'History Museum' 'Scenic Lookout'
 'Coffee Shop' 'Ice Cream Shop' 'Bengali Restaurant' 'Restaurant'
 'Music Venue' 'Bar' 'Asian Restaurant' 'Deli / Bodega' 'Spa' 'Juice Bar'
 'Chinese Restaurant' 'Cupcake Shop' 'Café' 'Gym / Fitness Center'
 'Stadium' 'Italian Restaurant' 'Fast Food Restaurant' 'Dessert Shop'
 'BBQ Joint' 'Theater' 'Snack Place' 'Planetarium' 'Park' 'Hotel'
 'Racetrack' 'Vegetarian / Vegan Restaurant' 'Art Gallery'
 'Modern European Restaurant' 'Pizza Place' 'Lounge' 'Electronics Store'
 'Multiplex' 'Zoo' 'Fish Market' 'Antique Shop' 'Farmers Market' 'Market'
 'Bank' 'Brewery' 'Donut Shop' 'Sandwich Place' 'Bookstore'
 'Japanese Restaurant' "Men's Store" 'Other Great Outdoors'
 'Salon / Barbershop' 'Steakhouse' 'Department Store' 'Mexican Restaurant'
 'Beach' 'Harbor / Marina' 'Gastropub' 'Breakfast Spot' 'Opera House'
 'Food Truck' 'Froze

In [54]:
# check if the results contain "Shopping Mall"
"Shopping Mall" in venues_df['VenueCategory'].unique()

True

## Analyze each neighborhood

In [55]:
# one hot encoding
mum_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
mum_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [mum_onehot.columns[-1]] + list(mum_onehot.columns[:-1])
mum_onehot = mum_onehot[fixed_columns]

print(mum_onehot.shape)
mum_onehot.head()

(9093, 219)


Unnamed: 0,Neighborhoods,ATM,Afghan Restaurant,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,...,Track,Trail,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Water Park,Whisky Bar,Wine Bar,Women's Store,Zoo
0,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [64]:
mum_grouped = mum_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(mum_grouped.shape)
mum_grouped

(111, 219)


Unnamed: 0,Neighborhoods,ATM,Afghan Restaurant,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,...,Track,Trail,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Water Park,Whisky Bar,Wine Bar,Women's Store,Zoo
0,Aarey Milk Colony,0.0,0.00,0.0,0.0,0.000000,0.011236,0.0,0.0,0.022472,...,0.0,0.000000,0.000000,0.0,0.011236,0.0,0.0,0.0,0.0,0.011236
1,Agripada,0.0,0.00,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.0,0.020833,0.0,0.0,0.0,0.0,0.000000
2,Altamount Road,0.0,0.00,0.0,0.0,0.016393,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.0,0.016393,0.0,0.0,0.0,0.0,0.000000
3,"Amboli, Mumbai",0.0,0.01,0.0,0.0,0.010000,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.0,0.020000,0.0,0.0,0.0,0.0,0.000000
4,Amrut Nagar,0.0,0.00,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.026316,0.0,0.039474,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,Vidyavihar,0.0,0.00,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000
107,Virar,0.0,0.00,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.062500,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000
108,Wadala,0.0,0.00,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.0,0.020000,0.0,0.0,0.0,0.0,0.000000
109,Western Suburbs (Mumbai),0.0,0.00,0.0,0.0,0.015625,0.000000,0.0,0.0,0.000000,...,0.0,0.015625,0.000000,0.0,0.046875,0.0,0.0,0.0,0.0,0.000000


In [72]:
len(mum_grouped[mum_grouped["Shopping Mall"] > 0])

33

### Create a new DataFrame for Shopping Mall data only


In [110]:
mum_mall = mum_grouped[["Neighborhoods","Shopping Mall"]]
print(mum_mall.shape)
mum_mall.head()

(111, 2)


Unnamed: 0,Neighborhoods,Shopping Mall
0,Aarey Milk Colony,0.0
1,Agripada,0.0
2,Altamount Road,0.016393
3,"Amboli, Mumbai",0.02
4,Amrut Nagar,0.013158


## Cluster Neighborhoods
Run k-means to cluster the neighborhoods in Mumbai into 3 clusters.

In [111]:
# set number of clusters
kclusters = 3

mum_clustering = mum_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mum_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 2, 2, 2, 0, 0, 0, 0, 0])

In [112]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
mum_merged = mum_mall.copy()

# add clustering labels
mum_merged["Cluster Labels"] = kmeans.labels_

In [113]:
mum_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
mum_merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,Aarey Milk Colony,0.0,0
1,Agripada,0.0,0
2,Altamount Road,0.016393,2
3,"Amboli, Mumbai",0.02,2
4,Amrut Nagar,0.013158,2


In [114]:
# add latitude/longitude for each neighborhood
mum_merged = mum_merged.join(mum_df.set_index("Neighborhood"), on="Neighborhood")

print(mum_merged.shape)
mum_merged.head() 

(111, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.0,0,18.975302,72.824898
1,Agripada,0.0,0,18.966355,72.809163
2,Altamount Road,0.016393,2,19.131992,72.84996
3,"Amboli, Mumbai",0.02,2,19.100845,72.91182
4,Amrut Nagar,0.013158,2,19.020761,72.865256


In [115]:
# sort the results by Cluster Labels
print(mum_merged.shape)
mum_merged.sort_values(["Cluster Labels"], inplace=True)
mum_merged

(111, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.000000,0,18.975302,72.824898
77,Mira Road,0.000000,0,18.968178,72.828601
75,"Matunga Road, Mumbai",0.000000,0,18.969734,72.840620
73,Marine Lines,0.000000,0,19.117220,72.882342
72,"Marine Drive, Mumbai",0.000000,0,18.945670,72.823781
...,...,...,...,...,...
24,Cumbala Hill,0.020000,2,18.994998,72.832871
47,Irla,0.030000,2,19.092544,72.901952
63,Madh Island,0.020000,2,18.982568,72.824160
80,Nariman Point,0.041667,2,18.909766,72.809831


In [116]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mum_merged['Latitude'], mum_merged['Longitude'], mum_merged['Neighborhood'], mum_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [117]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

## Examine Clusters

### Cluster 0

In [118]:
mum_merged.loc[mum_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.0,0,18.975302,72.824898
77,Mira Road,0.0,0,18.968178,72.828601
75,"Matunga Road, Mumbai",0.0,0,18.969734,72.840620
73,Marine Lines,0.0,0,19.117220,72.882342
72,"Marine Drive, Mumbai",0.0,0,18.945670,72.823781
...,...,...,...,...,...
35,Fort (Mumbai precinct),0.0,0,19.128794,72.825554
34,Fanas Wadi,0.0,0,19.049285,72.829376
110,Yashodham,0.0,0,18.949472,72.830716
32,Dindoshi,0.0,0,18.962226,72.836795


### Cluster 1

In [119]:
mum_merged.loc[mum_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
59,"Land's End, Bandra",0.333333,1,19.241887,72.895317


### Cluster 2

In [120]:
mum_merged.loc[mum_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
68,Malabar Hill,0.030303,2,19.186719,72.848588
2,Altamount Road,0.016393,2,19.131992,72.84996
3,"Amboli, Mumbai",0.02,2,19.100845,72.91182
105,"Versova, Mumbai",0.03,2,19.079629,72.897491
4,Amrut Nagar,0.013158,2,19.020761,72.865256
27,Dadar,0.02,2,19.028379,72.841566
43,Guru Tegh Bahadur Nagar,0.02,2,19.019962,72.847893
101,Tardeo,0.012658,2,19.060204,72.888099
42,Gowalia Tank,0.011765,2,19.036991,72.862165
99,Shivaji Park,0.02,2,19.027236,72.838348


## Observations:

Most of the shopping malls are concentrated in the southern and central area of Mumbai, with the highest number in cluster 0 and moderate number in cluster 2. On the other hand, cluster 1 has only 1 shopping mall. This represents a great opportunity and high potential areas to open new shopping malls as there is very little to no competition from existing malls. Meanwhile, shopping malls in cluster 0 are likely suffering from intense competition due to oversupply and high concentration of shopping malls. From another perspective, this also shows that the oversupply of shopping malls mostly happened in the southern area of the city, with the suburb area still have very few shopping malls. Therefore, this project recommends property developers to capitalize on these findings to open new shopping malls in neighborhoods in cluster 1 with little to no competition. Property developers with unique selling propositions to stand out from the competition can also open new shopping malls in neighborhoods in cluster 2 with moderate competition. Lastly, property developers are advised to avoid neighborhoods in cluster 0 which already have high concentration of shopping malls and suffering from intense competition.