# IBM Applied Data Science Capstone Course by Coursera

### Opening a New Shopping Mall in Bhubaneswar, India

- Build a dataframe of neighborhoods in Bhubaneswar, India by web scraping the data from Wikipedia page
- Get the geographical coordinates of the neighborhoods
- Obtain the venue data for the neighborhoods from Foursquare API
- Explore and cluster the neighborhoods
- Select the best cluster to open a new shopping mall

#### Import libraries

In [3]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library
import io
print("Libraries imported.")

Libraries imported.


#### Import data from github page into a DataFrame

In [4]:
url = "https://raw.githubusercontent.com/dasrasmikant/Coursera_Capstone/main/Neighbourhood.csv" # Make sure the url is the raw version of the file on GitHub
download = requests.get(url).content

# Reading the downloaded content and turning it into a pandas dataframe

bh_df = pd.read_csv(io.StringIO(download.decode('utf-8')))

# Printing out the first 5 rows of the dataframe

print (bh_df.head())

      Neighbourhood
0      Bapuji Nagar
1          Andharua
2         BJB Nagar
3           Hanspal
4  Badaraghunathpur


In [5]:

bh_df.shape

(56, 1)

In [6]:
def get_latilong(location):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, Bhubaneswar, Odisha'.format(location))
        lati_long_coords = g.latlng
    return lati_long_coords
    
get_latilong('Bapuji Nagar')

[20.26150000000007, 85.83193000000006]

In [8]:
# Retrieving location Co-ordinates
location_names = bh_df['Neighbourhood']
#coords = [get_latilong(location_name) for location_name in location_names.tolist() ]
coords=[]
for location_name in location_names.tolist():    
    print(location_name,get_latilong(location_name))
    coords.append(get_latilong(location_name))

Bapuji Nagar [20.26150000000007, 85.83193000000006]
Andharua [20.319890000000044, 85.77184000000005]
BJB Nagar [20.251050000000077, 85.84402000000006]
Hanspal [20.31746000000004, 85.88205000000005]
Badaraghunathpur [20.227350000000058, 85.73241000000007]
Chandaka [20.366970000000038, 85.76606000000004]
Orakal [20.268790000000024, 85.84100000000007]
Baramunda [20.271310000000028, 85.80236000000008]
Chandrasekharpur [20.327420000000075, 85.81540000000007]
Mancheswar [20.321920000000034, 85.84625000000005]
Bharatpur [20.30320000000006, 85.77822000000003]
Gajapati Nagar [20.31224000000003, 85.82811000000004]
Jatani [20.249900000000025, 85.82577000000003]
Khordha [20.268790000000024, 85.84100000000007]
Patia [20.355300000000057, 85.82766000000004]
Kalinga Nagar [20.267270000000053, 85.75958000000008]
Madhusudan Nagar [20.284050000000036, 85.83210000000008]
Patrapada [20.242850000000033, 85.76710000000008]
Khandagiri [20.26231000000007, 85.78550000000007]
Nayapalli [20.288800000000037, 85.81

In [9]:
# Adding Columns Latitude & Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
bh_df['Latitude'] = df_coords['Latitude']
bh_df['Longitude'] = df_coords['Longitude']
print(bh_df)

       Neighbourhood  Latitude  Longitude
0       Bapuji Nagar  20.26150   85.83193
1           Andharua  20.31989   85.77184
2          BJB Nagar  20.25105   85.84402
3            Hanspal  20.31746   85.88205
4   Badaraghunathpur  20.22735   85.73241
5           Chandaka  20.36697   85.76606
6             Orakal  20.26879   85.84100
7          Baramunda  20.27131   85.80236
8   Chandrasekharpur  20.32742   85.81540
9         Mancheswar  20.32192   85.84625
10         Bharatpur  20.30320   85.77822
11    Gajapati Nagar  20.31224   85.82811
12            Jatani  20.24990   85.82577
13           Khordha  20.26879   85.84100
14             Patia  20.35530   85.82766
15     Kalinga Nagar  20.26727   85.75958
16  Madhusudan Nagar  20.28405   85.83210
17         Patrapada  20.24285   85.76710
18        Khandagiri  20.26231   85.78550
19         Nayapalli  20.28880   85.81258
20       Phulnakhara  20.26879   85.84100
21   Kharavela Nagar  20.27576   85.83971
22          Old Town  20.24009   8

#### Create a map of Bhubaneswar with neighborhoods superimposed on top

In [13]:
# get the coordinates of Bhubaneswar
address = 'Bhubaneswar, Odisha, India'

geolocator = Nominatim(user_agent="smy-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bhubaneswar, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bhubaneswar, India 20.2667774, 85.8435592.


In [15]:
# create map of Bhubaneswar using latitude and longitude values
map_bh = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(bh_df['Latitude'], bh_df['Longitude'], bh_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_kl)  
    
map_bh

KeyError: 'Neighborhood'