# The battle of neighborhood - Segmenting and clustering the communities of Dubai (Data science capstone project final)

In [3]:
#installing the necessary libraries
import sys
!{sys.executable} -m pip install beautifulsoup4 
!{sys.executable} -m pip install requests
!{sys.executable} -m pip install geopy

print('installation finish')

installation finish


In [1]:
#importing all the necessary objects 
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from pandas.io.json import json_normalize  # tranform JSON file into a pandas dataframe

import folium # map rendering library

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Successfully imported')

Successfully imported


In [335]:
#set the max no. of rows to be displayed of dataframe
pd.set_option('display.max_rows', 100)

In [2]:
url='https://en.wikipedia.org/wiki/List_of_communities_in_Dubai'
response=requests.get(url).text

soup=BeautifulSoup(response, 'html.parser')
#print(soup.prettify())

In [3]:
mytable=soup.find('table',{'class':'wikitable sortable'})
#mytable

# Here are two methods to scrap the table from wikipedia

# Method 1

In [4]:
df_new = pd.read_html('https://en.wikipedia.org/wiki/List_of_communities_in_Dubai')[0]


In [5]:
df_new.head()

Unnamed: 0,Community Number,Community (English),Community (Arabic),Area(km2),Population(2000),Population density(/km2)
0,126.0,Abu Hail,أبو هيل,1.27 km²,21414.0,"16,861.4/km²"
1,711.0,Al Awir First,العوير الأولى,,,
2,721.0,Al Awir Second,العوير الثانية,,,
3,333.0,Al Bada,البدع,0.82 km²,18816.0,22946/km²
4,122.0,Al Baraha,البراحة,1.104 km²,7823.0,"7,086/km²"


In [6]:
df_new.shape

(131, 6)

# Method 2

In [7]:
#Assigning the names of the column
column_names = ['CommunityNumber','Community_english', 'Community_arabic','Area', 'Population','Population_density']
df = pd.DataFrame(columns = column_names)


In [8]:
#searching through each row in the table of wikipedia and assigning the data of each cell to newly formed dataframe i.e. df
rows=mytable.find_all('tr')

for tr in rows:
    row_data=[]
    for td in tr.find_all('td'):
        row_data.append(td.text.strip())
    if(len(row_data)==6):
        df.loc[len(df)]=row_data

In [9]:
df.head()

Unnamed: 0,CommunityNumber,Community_english,Community_arabic,Area,Population,Population_density
0,126,Abu Hail,أبو هيل,1.27 km²,21414.0,"16,861.4/km²"
1,711,Al Awir First,العوير الأولى,,,
2,721,Al Awir Second,العوير الثانية,,,
3,333,Al Bada,البدع,0.82 km²,18816.0,22946/km²
4,122,Al Baraha,البراحة,1.104 km²,7823.0,"7,086/km²"


In [10]:
#dropping the non-required columns
df.drop(['Area','Population','Population_density'], axis=1, inplace=True)

In [11]:
df.head()

Unnamed: 0,CommunityNumber,Community_english,Community_arabic
0,126,Abu Hail,أبو هيل
1,711,Al Awir First,العوير الأولى
2,721,Al Awir Second,العوير الثانية
3,333,Al Bada,البدع
4,122,Al Baraha,البراحة


In [12]:
#renaming the column 'Community_english' to Community
df.rename(columns={'Community_english':'Community'}, inplace=True)
df.head(131)

Unnamed: 0,CommunityNumber,Community,Community_arabic
0,126,Abu Hail,أبو هيل
1,711,Al Awir First,العوير الأولى
2,721,Al Awir Second,العوير الثانية
3,333,Al Bada,البدع
4,122,Al Baraha,البراحة
...,...,...,...
126,271,Wadi Alamardi,وادي العمردي
127,621,Warsan First,ورسان الاولى
128,622,Warsan Second,ورسان الثانية
129,325,Za'abeel First,زعبيل الأولى


In [13]:
df.shape

(131, 3)

In [14]:
df.isnull().sum()

CommunityNumber     0
Community           0
Community_arabic    0
dtype: int64

# Adding Geospatial data

In [55]:
#Get Latitude and Longitude for communitites and ignoring the communities whose data is not available
from geopy.exc import GeocoderTimedOut
from geopy.exc import GeocoderNotFound

address= df['Community'].apply(lambda x: x.split('-')[-1]+', Dubai').unique()
geolocater= Nominatim(user_agent="dubai-explorer")
location=[]
empty=[]

def getcoords(add):
    try:
        coords= geolocater.geocode(add, timeout=10)
        location.append([add, coords.latitude, coords.longitude])
        print("the coords are {}".format(location[-1]))
    
    except GeocoderTimedOut:
        return getcoords(add)
    
    except:
        empty.append([add])
        print("Couldn't find coords of {}".format(empty[-1]))
        
for add in address:
        getcoords(add)

the coords are ['Abu Hail, Dubai', 25.28594185, 55.32944354478134]
Couldn't find coords of ['Al Awir First, Dubai']
Couldn't find coords of ['Al Awir Second, Dubai']
Couldn't find coords of ['Al Bada, Dubai']
the coords are ['Al Baraha, Dubai', 25.281368, 55.319412717201146]
Couldn't find coords of ['Al Barsha First, Dubai']
Couldn't find coords of ['Al Barsha Second, Dubai']
Couldn't find coords of ['Al Barsha South First, Dubai']
Couldn't find coords of ['Al Barsha South Second, Dubai']
Couldn't find coords of ['Al Barsha South Third, Dubai']
Couldn't find coords of ['Al Barsha Third, Dubai']
the coords are ['Al Buteen, Dubai', 25.26305655, 55.3205840389995]
Couldn't find coords of ['Al Dhagaya, Dubai']
the coords are ['Al Garhoud, Dubai', 25.23983135, 55.35566829285183]
Couldn't find coords of ['Al Guoz Fourth, Dubai']
Couldn't find coords of ['Al Hamriya, Dubai, Dubai']
Couldn't find coords of ['Al Hamriya Port, Dubai']
the coords are ['Al Hudaiba, Dubai', 25.2400505, 55.2774585314

In [56]:
#we can see after cleaning we are left with 65 communities out of 131 communities in Dubai
len(location)

65

In [None]:
dubai_data= pd.DataFrame(location, columns=['Community','Latitude','Longitude'])
dubai_data.to_csv('dubai_communities_location.csv')


In [134]:
dubai_data.head()

Unnamed: 0,Community,Latitude,Longitude
0,"Abu Hail, Dubai",25.285942,55.329444
1,"Al Baraha, Dubai",25.281368,55.319413
2,"Al Buteen, Dubai",25.263057,55.320584
3,"Al Garhoud, Dubai",25.239831,55.355668
4,"Al Hudaiba, Dubai",25.24005,55.277459


In [135]:
CLIENT_ID = 'XHED4EHOOSFZ41A1QJXZ21TLW2KGUUUEH45ETS5QPVJDTXTU' # your Foursquare ID
CLIENT_SECRET = 'H53SGUBWTK44GUDTUXOI242XB5DVFTISNO4X0MGNVQKL4FEL' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [59]:
#exploring the venues of 'Abu Hail, Dubai'
dubai_data.loc[0,'Community']

'Abu Hail, Dubai'

In [60]:
neighborhood_latitude = dubai_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = dubai_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = dubai_data.loc[0, 'Community'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Abu Hail, Dubai are 25.28594185, 55.32944354478134.


In [61]:
LIMIT=100 # limit of number of venues returned by Foursquare API
radius=500

url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
neighborhood_latitude,
neighborhood_longitude,
radius,
LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=L2VUPJ1QXZIVAIXW22HYACND1PW0I5US0RMKOAMCKNMVIJW3&client_secret=ND0PLJO4OAMIF4T3UODPTUMVE32V0QQMQD4UI5RKGPEOD4YV&v=20180605&ll=25.28594185,55.32944354478134&radius=500&limit=100'

In [62]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f1d154d802bda45aceb7f9c'},
 'response': {'headerLocation': 'Deira',
  'headerFullLocation': 'Deira, Dubai',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 25.290441854500006,
    'lng': 55.33441110192604},
   'sw': {'lat': 25.281441845499995, 'lng': 55.324475987636646}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '515849dee4b03ece53c49f44',
       'name': 'Pond Park - Al Qusais',
       'location': {'lat': 25.28806045780595,
        'lng': 55.332605711347874,
        'labeledLatLngs': [{'label': 'display',
          'lat': 25.28806045780595,
          'lng': 55.332605711347874}],
        'distance': 396,
        'cc': 'AE',
        'city': 'دبي',
        'state': 'دبي',
   

In [63]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [64]:
venues=results['response']['groups'][0]['items']
venues

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '515849dee4b03ece53c49f44',
   'name': 'Pond Park - Al Qusais',
   'location': {'lat': 25.28806045780595,
    'lng': 55.332605711347874,
    'labeledLatLngs': [{'label': 'display',
      'lat': 25.28806045780595,
      'lng': 55.332605711347874}],
    'distance': 396,
    'cc': 'AE',
    'city': 'دبي',
    'state': 'دبي',
    'country': 'الإمارات العربية المتحدة',
    'formattedAddress': ['دبي', 'الإمارات العربية المتحدة']},
   'categories': [{'id': '4bf58dd8d48988d163941735',
     'name': 'Park',
     'pluralName': 'Parks',
     'shortName': 'Park',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
      'suffix': '.png'},
     'primary': True}],
   'photos': {'count': 0, 'groups': []}},
  'referralId': 'e-0-515849dee4b03ece53c49f44-0'},
 {'reasons': {'count': 0,
   'items': [{'summary

In [65]:
#Now we are ready to clean the json and structure it into a pandas dataframe.
nearby_venues=json_normalize(venues)
filtered_columns=['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues=nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories']=nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns=[col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

  


Unnamed: 0,name,categories,lat,lng
0,Pond Park - Al Qusais,Park,25.28806,55.332606
1,Zahr El Laymoun,Middle Eastern Restaurant,25.289486,55.330753
2,Lively,Track,25.285194,55.325276
3,Jannati Health Club and Spa,Spa,25.285408,55.325168


In [66]:
#we can see we have got only four venues in Abu Hail within 500m radius
print("{} venues were returned by foursquare".format(nearby_venues.shape[0]))

4 venues were returned by foursquare


# Dividing the dataframe 'dubai_data' into 4 parts horizontally to get the details of their neighborhood. The division has been done to avoid the error returned by Foursqare. At last all four parts have been concatenated.

In [148]:
dubai_data1=dubai_data[:15]
print(dubai_data1.shape)
dubai_data1

(15, 3)


Unnamed: 0,Community,Latitude,Longitude
0,"Abu Hail, Dubai",25.285942,55.329444
1,"Al Baraha, Dubai",25.281368,55.319413
2,"Al Buteen, Dubai",25.263057,55.320584
3,"Al Garhoud, Dubai",25.239831,55.355668
4,"Al Hudaiba, Dubai",25.24005,55.277459
5,"Al Jaddaf, Dubai",25.07501,55.188761
6,"Al Jafiliya, Dubai",25.23336,55.29205
7,"Al Karama, Dubai",25.244403,55.304755
8,"Al Kifaf, Dubai",25.227887,55.274809
9,"Al Mamzar, Dubai",25.303247,55.343453


In [136]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    LIMIT1=10
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT1)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [138]:
#get the near by venues of above data frame i.e. dubai_data1
dubai_venues = getNearbyVenues(names=dubai_data1['Community'],
                                   latitudes=dubai_data1['Latitude'],
                                   longitudes=dubai_data1['Longitude']
                                  )

Abu Hail, Dubai
Al Baraha, Dubai
Al Buteen, Dubai
Al Garhoud, Dubai
Al Hudaiba, Dubai
Al Jaddaf, Dubai
Al Jafiliya, Dubai
Al Karama, Dubai
Al Kifaf, Dubai
Al Mamzar, Dubai
Al Manara, Dubai
Al Mankhool, Dubai
Al Mina, Dubai
Al Muraqqabat, Dubai
Al Murar, Dubai


In [139]:
print(dubai_venues.shape)
dubai_venues.head()

(118, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Abu Hail, Dubai",25.285942,55.329444,Pond Park - Al Qusais,25.28806,55.332606,Park
1,"Abu Hail, Dubai",25.285942,55.329444,Zahr El Laymoun,25.289486,55.330753,Middle Eastern Restaurant
2,"Abu Hail, Dubai",25.285942,55.329444,Lively,25.285194,55.325276,Track
3,"Abu Hail, Dubai",25.285942,55.329444,Jannati Health Club and Spa,25.285408,55.325168,Spa
4,"Al Baraha, Dubai",25.281368,55.319413,TGI Friday's,25.279242,55.316245,American Restaurant


In [149]:
dubai_data2=dubai_data[15:30]
print(dubai_data2.shape)
dubai_data2

(15, 3)


Unnamed: 0,Community,Latitude,Longitude
15,"Al Mushrif, Dubai",25.266905,55.296018
16,"Al Muteena, Dubai",25.273892,55.32263
17,"Al Quoz First, Dubai",25.125079,55.214446
18,"Al Quoz Industrial First, Dubai",25.125079,55.214446
19,"Al Qusais First, Dubai",25.209832,55.275282
20,"Al Qusais Industrial Fifth, Dubai",14.587386,120.984895
21,"Al Qusais Industrial First, Dubai",42.339734,-71.046516
22,"Al Qusais Industrial Fourth, Dubai",43.68713,-92.980409
23,"Al Qusais Industrial Second, Dubai",14.625784,121.078077
24,"Al Qusais Industrial Third, Dubai",25.326002,55.433145


In [150]:
#get the near by venues of above data frame i.e. dubai_data2
dubai_venues2 = getNearbyVenues(names=dubai_data2['Community'],
                                   latitudes=dubai_data2['Latitude'],
                                   longitudes=dubai_data2['Longitude']
                                  )

Al Mushrif, Dubai
Al Muteena, Dubai
Al Quoz First, Dubai
Al Quoz Industrial First, Dubai
Al Qusais First, Dubai
Al Qusais Industrial Fifth, Dubai
Al Qusais Industrial First, Dubai
Al Qusais Industrial Fourth, Dubai
Al Qusais Industrial Second, Dubai
Al Qusais Industrial Third, Dubai
Al Qusais Second, Dubai
Al Qusais Third, Dubai
Al Raffa, Dubai
Al Ras, Dubai
Al Rashidiya, Dubai


In [157]:
print(dubai_venues2.shape)
dubai_venues2.head()

(110, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Al Mushrif, Dubai",25.266905,55.296018,AMIT Garmin أميت. وكيل جارمن,25.266268,55.295146,Electronics Store
1,"Al Mushrif, Dubai",25.266905,55.296018,Mazmi Coffee&More,25.265007,55.296505,Coffee Shop
2,"Al Mushrif, Dubai",25.266905,55.296018,أرض الزعفران,25.26984,55.2979,Flower Shop
3,"Al Mushrif, Dubai",25.266905,55.296018,Jafer Biman Ali Juices جعفر بيمان علي,25.267746,55.297646,Juice Bar
4,"Al Mushrif, Dubai",25.266905,55.296018,Grand Souq Deira,25.267317,55.296957,Market


In [155]:
dubai_data3=dubai_data[30:50]
print(dubai_data3.shape)
dubai_data3

(20, 3)


Unnamed: 0,Community,Latitude,Longitude
30,"Al Rigga, Dubai",25.267316,55.312268
31,"Al Sabkha, Dubai",25.267078,55.303537
32,"Al Satwa, Dubai",25.221161,55.273836
33,"Al Shindagha, Dubai",25.275464,55.294855
34,"Al Wasl, Dubai",25.198898,55.257049
35,"Ayal Nasir, Dubai",25.275307,55.30312
36,"Business Bay, Dubai",25.188828,55.265022
37,"Downtown Dubai, Dubai",25.195173,55.273532
38,"Emirates Hill First, Dubai",25.077855,55.178838
39,"Emirates Hill Second, Dubai",25.317471,55.413125


In [156]:
#get the near by venues of above data frame i.e. dubai_data3
dubai_venues3 = getNearbyVenues(names=dubai_data3['Community'],
                                   latitudes=dubai_data3['Latitude'],
                                   longitudes=dubai_data3['Longitude']
                                  )

Al Rigga, Dubai
Al Sabkha, Dubai
Al Satwa, Dubai
Al Shindagha, Dubai
Al Wasl, Dubai
Ayal Nasir, Dubai
Business Bay, Dubai
Downtown Dubai, Dubai
Emirates Hill First, Dubai
Emirates Hill Second, Dubai
Emirates Hill Third, Dubai
Hatta, Dubai
Hor Al Anz, Dubai
Hor Al Anz East, Dubai
Jebel Ali 1, Dubai
Jebel Ali 2, Dubai
Jebel Ali Industrial, Dubai
Jebel Ali Palm, Dubai
Palm Jumeira, Dubai
Jumeira Second, Dubai


In [158]:
print(dubai_venues3.shape)
dubai_venues3.head()

(115, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Al Rigga, Dubai",25.267316,55.312268,Chinese Red House,25.266292,55.312758,Chinese Restaurant
1,"Al Rigga, Dubai",25.267316,55.312268,Minato,25.265595,55.309761,Japanese Restaurant
2,"Al Rigga, Dubai",25.267316,55.312268,The China Club,25.265315,55.309711,Dim Sum Restaurant
3,"Al Rigga, Dubai",25.267316,55.312268,Shabestan (Shabestan مطعم شبستان الإيراني),25.265303,55.309696,Persian Restaurant
4,"Al Rigga, Dubai",25.267316,55.312268,Royal Club Lounge,25.26468,55.310727,Lounge


In [162]:
dubai_data4=dubai_data[50:65]
print(dubai_data4.shape)
dubai_data4

(15, 3)


Unnamed: 0,Community,Latitude,Longitude
50,"Marsa Dubai, Dubai",25.087754,55.146172
51,"Mirdif, Dubai",25.221335,55.423499
52,"Nad Shamma, Dubai",25.217577,55.383785
53,"Naif, Dubai",25.272775,55.311351
54,"Port Saeed, Dubai",25.246647,55.333871
55,"Arabian Ranches, Dubai",25.04933,55.265839
56,"Ras Al Khor, Dubai",25.802717,51.592465
57,"Rigga Al Buteen, Dubai",25.265992,55.317428
58,"Trade Centre 1, Dubai",42.216425,43.062483
59,"Trade Centre 2, Dubai",25.228012,55.289702


In [169]:
#get the near by venues of above data frame i.e. dubai_data4
dubai_venues4 = getNearbyVenues(names=dubai_data4['Community'],
                                   latitudes=dubai_data4['Latitude'],
                                   longitudes=dubai_data4['Longitude']
                                  )

Marsa Dubai, Dubai
Mirdif, Dubai
Nad Shamma, Dubai
Naif, Dubai
Port Saeed, Dubai
Arabian Ranches, Dubai
Ras Al Khor, Dubai
Rigga Al Buteen, Dubai
Trade Centre 1, Dubai
Trade Centre 2, Dubai
Umm Al Sheif, Dubai
Umm Ramool, Dubai
Wadi Alamardi, Dubai
Za'abeel First, Dubai
Za'abeel Second, Dubai


In [170]:
print(dubai_venues4.shape)
dubai_venues4.head()

(94, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Marsa Dubai, Dubai",25.087754,55.146172,Buddha Bar,25.08656,55.144828,Cocktail Bar
1,"Marsa Dubai, Dubai",25.087754,55.146172,Deniz Restaurant & Cafe,25.087833,55.14693,Restaurant
2,"Marsa Dubai, Dubai",25.087754,55.146172,The Açaí Spot - Marina,25.086301,55.147448,Coffee Shop
3,"Marsa Dubai, Dubai",25.087754,55.146172,Observatory Bar & Grill,25.087903,55.146222,Hotel Bar
4,"Marsa Dubai, Dubai",25.087754,55.146172,Dubai Marriott Harbour Hotel & Suites,25.087784,55.146433,Hotel


In [171]:
#concatenating all 4 above venues 
dubai_venues_combined = pd.concat([dubai_venues, dubai_venues2, dubai_venues3, dubai_venues4], ignore_index=True, sort=False)


In [172]:
print('The shape of dubai_venues_combined is: ', dubai_venues_combined.shape)
dubai_venues_combined.head()

The shape of dubai_venues_combined is:  (437, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Abu Hail, Dubai",25.285942,55.329444,Pond Park - Al Qusais,25.28806,55.332606,Park
1,"Abu Hail, Dubai",25.285942,55.329444,Zahr El Laymoun,25.289486,55.330753,Middle Eastern Restaurant
2,"Abu Hail, Dubai",25.285942,55.329444,Lively,25.285194,55.325276,Track
3,"Abu Hail, Dubai",25.285942,55.329444,Jannati Health Club and Spa,25.285408,55.325168,Spa
4,"Al Baraha, Dubai",25.281368,55.319413,TGI Friday's,25.279242,55.316245,American Restaurant


In [289]:
#finding 1

#finiding the no. of various restaurants in Dubai
only_restaurant = dubai_venues_combined[dubai_venues_combined['Venue Category'].str.contains('Restaurant')].reset_index(drop=True)
only_restaurant.index = np.arange(1, len(only_restaurant )+1)

In [290]:
#printing the restaurants along with their numbers
print (only_restaurant['Venue Category'].value_counts())


Restaurant                   19
Middle Eastern Restaurant    19
Indian Restaurant            18
Asian Restaurant             14
Japanese Restaurant           7
Mexican Restaurant            6
Fast Food Restaurant          6
Chinese Restaurant            5
Italian Restaurant            5
Korean Restaurant             4
Filipino Restaurant           4
Seafood Restaurant            3
Greek Restaurant              2
Lebanese Restaurant           2
Thai Restaurant               2
American Restaurant           2
French Restaurant             2
Iraqi Restaurant              2
Persian Restaurant            2
Tapas Restaurant              1
Syrian Restaurant             1
Yemeni Restaurant             1
Latin American Restaurant     1
Turkish Restaurant            1
African Restaurant            1
Comfort Food Restaurant       1
New American Restaurant       1
Spanish Restaurant            1
North Indian Restaurant       1
Dim Sum Restaurant            1
Name: Venue Category, dtype: int64


In [300]:
#finding 2

#finiding the no. of various restaurants in Dubai
only_hotel = dubai_venues_combined[dubai_venues_combined['Venue Category'].str.contains('Hotel')].reset_index(drop=True)
only_hotel.index = np.arange(1, len(only_hotel )+1)

In [301]:
#printing the restaurants along with their numbers
print (only_hotel['Venue Category'].value_counts())

Hotel         33
Hotel Bar      2
Hotel Pool     1
Name: Venue Category, dtype: int64


In [173]:
#groupig the venues by communities to get the number of venues in each communities
dubai_venues_combined.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Abu Hail, Dubai",4,4,4,4,4,4
"Al Baraha, Dubai",10,10,10,10,10,10
"Al Buteen, Dubai",10,10,10,10,10,10
"Al Garhoud, Dubai",10,10,10,10,10,10
"Al Hudaiba, Dubai",10,10,10,10,10,10
...,...,...,...,...,...,...
"Umm Al Sheif, Dubai",2,2,2,2,2,2
"Umm Ramool, Dubai",5,5,5,5,5,5
"Wadi Alamardi, Dubai",1,1,1,1,1,1
"Za'abeel First, Dubai",10,10,10,10,10,10


In [174]:
print('There are {} uniques categories.'.format(len(dubai_venues_combined['Venue Category'].unique())))


There are 127 uniques categories.


In [176]:
#Analyzing each neighborhood

# one hot encoding
dubai_onehot = pd.get_dummies(dubai_venues_combined[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dubai_onehot['Neighborhood'] = dubai_venues_combined['Neighborhood'] 

dubai_onehot.drop(['Neighborhood'],axis=1,inplace=True) 
dubai_onehot.insert(loc=0, column='Neighborhood', value=dubai_venues_combined['Neighborhood'] )

dubai_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,...,Tennis Court,Thai Restaurant,Track,Train Station,Tunnel,Turkish Restaurant,Video Game Store,Waterfront,Women's Store,Yemeni Restaurant
0,"Abu Hail, Dubai",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Abu Hail, Dubai",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Abu Hail, Dubai",0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,"Abu Hail, Dubai",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Al Baraha, Dubai",0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [177]:
dubai_onehot.shape

(437, 128)

In [178]:
dubai_grouped = dubai_onehot.groupby('Neighborhood').mean().reset_index()
dubai_grouped

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,...,Tennis Court,Thai Restaurant,Track,Train Station,Tunnel,Turkish Restaurant,Video Game Store,Waterfront,Women's Store,Yemeni Restaurant
0,"Abu Hail, Dubai",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Al Baraha, Dubai",0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Al Buteen, Dubai",0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Al Garhoud, Dubai",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Al Hudaiba, Dubai",0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,"Umm Al Sheif, Dubai",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
58,"Umm Ramool, Dubai",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
59,"Wadi Alamardi, Dubai",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60,"Za'abeel First, Dubai",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [179]:
dubai_grouped.shape

(62, 128)

In [181]:
#Printing each neighborhood with top 5 most common venues
num_top_venues = 5

for hood in dubai_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = dubai_grouped[dubai_grouped['Neighborhood'] == hood].T.reset_index()#transpose the columns of each neighborhood into rows
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Abu Hail, Dubai----
                       venue  freq
0                      Track  0.25
1                       Park  0.25
2  Middle Eastern Restaurant  0.25
3                        Spa  0.25
4      Performing Arts Venue  0.00


----Al Baraha, Dubai----
                       venue  freq
0                      Hotel   0.2
1  Middle Eastern Restaurant   0.2
2                       Café   0.1
3        American Restaurant   0.1
4                        Spa   0.1


----Al Buteen, Dubai----
                  venue  freq
0        Sandwich Place   0.2
1            Restaurant   0.2
2           Coffee Shop   0.1
3   Fried Chicken Joint   0.1
4  Fast Food Restaurant   0.1


----Al Garhoud, Dubai----
              venue  freq
0             Hotel   0.2
1       Post Office   0.2
2      Dessert Shop   0.1
3  Basketball Court   0.1
4    Shipping Store   0.1


----Al Hudaiba, Dubai----
               venue  freq
0  Convenience Store   0.2
1               Café   0.1
2          Juice Bar   0.1
3 

                       venue  freq
0                       Café   0.2
1  Middle Eastern Restaurant   0.2
2            Thai Restaurant   0.1
3         Seafood Restaurant   0.1
4                 Smoke Shop   0.1


----Hor Al Anz, Dubai----
               venue  freq
0             Market   0.2
1  Currency Exchange   0.2
2         Campground   0.2
3  Convenience Store   0.2
4     History Museum   0.2


----Jebel Ali 1, Dubai----
                venue  freq
0  Mexican Restaurant  0.25
1  Italian Restaurant  0.12
2       Metro Station  0.12
3                Café  0.12
4       Deli / Bodega  0.12


----Jebel Ali 2, Dubai----
                venue  freq
0         Coffee Shop   1.0
1  African Restaurant   0.0
2         Post Office   0.0
3          Playground   0.0
4         Pizza Place   0.0


----Jebel Ali Palm, Dubai----
                venue  freq
0               Hotel   0.2
1                 Spa   0.1
2        Cocktail Bar   0.1
3           Racetrack   0.1
4  Italian Restaurant   0.1


----

In [182]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [307]:
#Finding top 10 venues in each communities
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = dubai_grouped['Neighborhood']

for ind in np.arange(dubai_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dubai_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Abu Hail, Dubai",Middle Eastern Restaurant,Park,Track,Spa,Filipino Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market
1,"Al Baraha, Dubai",Hotel,Middle Eastern Restaurant,Lounge,Coffee Shop,Café,Mobile Phone Shop,Spa,American Restaurant,Gym,Food & Drink Shop
2,"Al Buteen, Dubai",Restaurant,Sandwich Place,Ice Cream Shop,Asian Restaurant,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Hotel,Historic Site,Flower Shop
3,"Al Garhoud, Dubai",Hotel,Post Office,Gastropub,Dessert Shop,Coffee Shop,Restaurant,Shipping Store,Basketball Court,Yemeni Restaurant,Flea Market
4,"Al Hudaiba, Dubai",Convenience Store,Ice Cream Shop,Cafeteria,Café,Korean Restaurant,Filipino Restaurant,Juice Bar,Asian Restaurant,Hotel,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
57,"Umm Al Sheif, Dubai",Pool,French Restaurant,Ice Cream Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant
58,"Umm Ramool, Dubai",Auto Garage,Gym,Food & Drink Shop,Cafeteria,Food,Flower Shop,Flea Market,Fish Market,Yemeni Restaurant,Fried Chicken Joint
59,"Wadi Alamardi, Dubai",Campground,Yemeni Restaurant,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Filipino Restaurant
60,"Za'abeel First, Dubai",Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Building,Steakhouse,Lebanese Restaurant,Indian Restaurant,Gym,Food & Drink Shop,Dim Sum Restaurant


In [310]:
df_most_common=neighborhoods_venues_sorted.iloc[:,:2]

In [336]:
df_most_common

Unnamed: 0,Neighborhood,1st Most Common Venue
0,"Abu Hail, Dubai",Middle Eastern Restaurant
1,"Al Baraha, Dubai",Hotel
2,"Al Buteen, Dubai",Restaurant
3,"Al Garhoud, Dubai",Hotel
4,"Al Hudaiba, Dubai",Convenience Store
5,"Al Jaddaf, Dubai",Italian Restaurant
6,"Al Jafiliya, Dubai",Coffee Shop
7,"Al Karama, Dubai",Indian Restaurant
8,"Al Kifaf, Dubai",Electronics Store
9,"Al Mamzar, Dubai",Restaurant


In [323]:
#df_group=df_most_common.groupby('1st Most Common Venue').count().reset_index()
#Arranging in ascending order
most_common1 = df_most_common.sort_values(['1st Most Common Venue'], ascending=[True])


In [333]:
#df_group
print(most_common1.shape)
most_common1

(62, 2)


Unnamed: 0,Neighborhood,1st Most Common Venue
16,"Al Muteena, Dubai",African Restaurant
58,"Umm Ramool, Dubai",Auto Garage
23,"Al Qusais Industrial Second, Dubai",Basketball Court
53,"Palm Jumeira, Dubai",Beach
33,"Al Shindagha, Dubai",Boat or Ferry
...,...,...
24,"Al Qusais Industrial Third, Dubai",Restaurant
9,"Al Mamzar, Dubai",Restaurant
2,"Al Buteen, Dubai",Restaurant
38,"Downtown Dubai, Dubai",Scenic Lookout


In [329]:
#Neighborhood in Dubai famous for restaurant
df_rest = most_common1[most_common1['1st Most Common Venue'].str.contains('Restaurant')].reset_index(drop=True)
print(df_rest.shape)
df_rest

(19, 2)


Unnamed: 0,Neighborhood,1st Most Common Venue
0,"Al Muteena, Dubai",African Restaurant
1,"Al Qusais Industrial Fifth, Dubai",Filipino Restaurant
2,"Emirates Hill Third, Dubai",Indian Restaurant
3,"Marsa Dubai, Dubai",Indian Restaurant
4,"Al Raffa, Dubai",Indian Restaurant
5,"Al Karama, Dubai",Indian Restaurant
6,"Al Jaddaf, Dubai",Italian Restaurant
7,"Al Mina, Dubai",Mexican Restaurant
8,"Jebel Ali 1, Dubai",Mexican Restaurant
9,"Al Muraqqabat, Dubai",Middle Eastern Restaurant


In [330]:
#Neighborhood in Dubai famous for hotel
df_hotel = most_common1[most_common1['1st Most Common Venue'].str.contains('Hotel')].reset_index(drop=True)
print(df_hotel.shape)
df_hotel

(11, 2)


Unnamed: 0,Neighborhood,1st Most Common Venue
0,"Al Qusais Industrial First, Dubai",Hotel
1,"Naif, Dubai",Hotel
2,"Nad Shamma, Dubai",Hotel
3,"Al Murar, Dubai",Hotel
4,"Al Sabkha, Dubai",Hotel
5,"Al Mankhool, Dubai",Hotel
6,"Jebel Ali Palm, Dubai",Hotel
7,"Al Garhoud, Dubai",Hotel
8,"Al Baraha, Dubai",Hotel
9,"Jumeira Second, Dubai",Hotel


In [331]:
#Neighborhood in Dubai famous for park
df_park = most_common1[most_common1['1st Most Common Venue'].str.contains('Park')].reset_index(drop=True)
print(df_park.shape)
df_park

(2, 2)


Unnamed: 0,Neighborhood,1st Most Common Venue
0,"Za'abeel Second, Dubai",Park
1,"Al Qusais Second, Dubai",Park


In [334]:
#Neighborhood in Dubai famous for beach
df_beach = most_common1[most_common1['1st Most Common Venue'].str.contains('Beach')].reset_index(drop=True)
print(df_beach.shape)
df_beach

(1, 2)


Unnamed: 0,Neighborhood,1st Most Common Venue
0,"Palm Jumeira, Dubai",Beach


In [337]:
#Neighborhood in Dubai famous for coffee
df_coffee = most_common1[most_common1['1st Most Common Venue'].str.contains('Coffee Shop')].reset_index(drop=True)
print(df_coffee.shape)
df_coffee

(7, 2)


Unnamed: 0,Neighborhood,1st Most Common Venue
0,"Rigga Al Buteen, Dubai",Coffee Shop
1,"Al Qusais First, Dubai",Coffee Shop
2,"Mirdif, Dubai",Coffee Shop
3,"Za'abeel First, Dubai",Coffee Shop
4,"Al Satwa, Dubai",Coffee Shop
5,"Jebel Ali 2, Dubai",Coffee Shop
6,"Al Jafiliya, Dubai",Coffee Shop


# Now cluster each neighborhood

In [192]:
#renaming the Community column in dubai_data to Neighborhood and updating into a new data frame dubai_new
dubai_new=dubai_data
dubai_new.rename(columns={'Community':'Neighborhood'}, inplace=True)
dubai_new

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Abu Hail, Dubai",25.285942,55.329444
1,"Al Baraha, Dubai",25.281368,55.319413
2,"Al Buteen, Dubai",25.263057,55.320584
3,"Al Garhoud, Dubai",25.239831,55.355668
4,"Al Hudaiba, Dubai",25.240050,55.277459
...,...,...,...
60,"Umm Al Sheif, Dubai",25.132752,55.205857
61,"Umm Ramool, Dubai",25.229996,55.365626
62,"Wadi Alamardi, Dubai",25.195744,55.494357
63,"Za'abeel First, Dubai",25.209832,55.275282


In [255]:
# set number of clusters
kclusters = 5

dubai_grouped_clustering = dubai_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dubai_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 0, 0, 3, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 4, 1, 3, 0, 1,
       1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 2, 3, 1])

In [256]:
#neighborhoods_venues_sorted.drop(["Cluster Labels"], axis=1, inplace=True)

In [257]:
#adding clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dubai_merged = dubai_new

#merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dubai_merged = dubai_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

dubai_merged.head() # check the last columns!


Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Abu Hail, Dubai",25.285942,55.329444,1.0,Middle Eastern Restaurant,Park,Track,Spa,Filipino Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market
1,"Al Baraha, Dubai",25.281368,55.319413,1.0,Hotel,Middle Eastern Restaurant,Lounge,Coffee Shop,Café,Mobile Phone Shop,Spa,American Restaurant,Gym,Food & Drink Shop
2,"Al Buteen, Dubai",25.263057,55.320584,1.0,Restaurant,Sandwich Place,Ice Cream Shop,Asian Restaurant,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Hotel,Historic Site,Flower Shop
3,"Al Garhoud, Dubai",25.239831,55.355668,1.0,Hotel,Post Office,Gastropub,Dessert Shop,Coffee Shop,Restaurant,Shipping Store,Basketball Court,Yemeni Restaurant,Flea Market
4,"Al Hudaiba, Dubai",25.24005,55.277459,1.0,Convenience Store,Ice Cream Shop,Cafeteria,Café,Korean Restaurant,Filipino Restaurant,Juice Bar,Asian Restaurant,Hotel,Fast Food Restaurant


In [264]:
#converting the 'Cluster Labels' column into int
dubai_merged['Cluster Labels'] = dubai_merged['Cluster Labels'].astype('Int64')


In [265]:
dubai_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Abu Hail, Dubai",25.285942,55.329444,1,Middle Eastern Restaurant,Park,Track,Spa,Filipino Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market
1,"Al Baraha, Dubai",25.281368,55.319413,1,Hotel,Middle Eastern Restaurant,Lounge,Coffee Shop,Café,Mobile Phone Shop,Spa,American Restaurant,Gym,Food & Drink Shop
2,"Al Buteen, Dubai",25.263057,55.320584,1,Restaurant,Sandwich Place,Ice Cream Shop,Asian Restaurant,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Hotel,Historic Site,Flower Shop
3,"Al Garhoud, Dubai",25.239831,55.355668,1,Hotel,Post Office,Gastropub,Dessert Shop,Coffee Shop,Restaurant,Shipping Store,Basketball Court,Yemeni Restaurant,Flea Market
4,"Al Hudaiba, Dubai",25.24005,55.277459,1,Convenience Store,Ice Cream Shop,Cafeteria,Café,Korean Restaurant,Filipino Restaurant,Juice Bar,Asian Restaurant,Hotel,Fast Food Restaurant


In [266]:
dubai_merged.dtypes

Neighborhood               object
Latitude                  float64
Longitude                 float64
Cluster Labels              Int64
1st Most Common Venue      object
2nd Most Common Venue      object
3rd Most Common Venue      object
4th Most Common Venue      object
5th Most Common Venue      object
6th Most Common Venue      object
7th Most Common Venue      object
8th Most Common Venue      object
9th Most Common Venue      object
10th Most Common Venue     object
dtype: object

In [252]:
#defining the latitude and longitude of City/place whose map is to be created
address = 'Dubai'

geolocator = Nominatim(user_agent="dubai_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Dubai are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Dubai are 25.0750095, 55.18876088183319.


In [268]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dubai_merged['Latitude'], dubai_merged['Longitude'], dubai_merged['Neighborhood'], dubai_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        #color=rainbow[cluster-1],
        fill=True,
        #fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [280]:
#save the map as html file
map_clusters.save('dubai_map.html')

# Examining clusters

# Cluster 1

In [270]:
dubai_merged.loc[dubai_merged['Cluster Labels'] == 0, dubai_merged.columns[[1] + list(range(5, dubai_merged.shape[1]))]]


Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,25.125079,Soccer Field,Motorcycle Shop,Jewelry Store,Art Gallery,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Filipino Restaurant
18,25.125079,Soccer Field,Motorcycle Shop,Jewelry Store,Art Gallery,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Filipino Restaurant
41,24.799096,Soccer Field,Yemeni Restaurant,French Restaurant,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant


# Cluster 2

In [271]:
dubai_merged.loc[dubai_merged['Cluster Labels'] == 1, dubai_merged.columns[[1] + list(range(5, dubai_merged.shape[1]))]]


Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,25.285942,Park,Track,Spa,Filipino Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market
1,25.281368,Middle Eastern Restaurant,Lounge,Coffee Shop,Café,Mobile Phone Shop,Spa,American Restaurant,Gym,Food & Drink Shop
2,25.263057,Sandwich Place,Ice Cream Shop,Asian Restaurant,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Hotel,Historic Site,Flower Shop
3,25.239831,Post Office,Gastropub,Dessert Shop,Coffee Shop,Restaurant,Shipping Store,Basketball Court,Yemeni Restaurant,Flea Market
4,25.24005,Ice Cream Shop,Cafeteria,Café,Korean Restaurant,Filipino Restaurant,Juice Bar,Asian Restaurant,Hotel,Fast Food Restaurant
5,25.07501,Restaurant,Yemeni Restaurant,French Restaurant,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant
7,25.244403,Park,Bakery,Korean Restaurant,Middle Eastern Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Filipino Restaurant
8,25.227887,Grocery Store,Indian Restaurant,Women's Store,Middle Eastern Restaurant,Asian Restaurant,Japanese Restaurant,Fish Market,Food & Drink Shop,Food
9,25.303247,Athletics & Sports,Yemeni Restaurant,Filipino Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Farmers Market
11,25.250827,Greek Restaurant,Supermarket,Hookah Bar,Asian Restaurant,Chinese Restaurant,Sports Bar,Roof Deck,Flea Market,Fish Market


# Cluster 3

In [272]:
dubai_merged.loc[dubai_merged['Cluster Labels'] == 2, dubai_merged.columns[[1] + list(range(5, dubai_merged.shape[1]))]]


Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,25.195744,Yemeni Restaurant,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Filipino Restaurant


# Cluster 4

In [273]:
dubai_merged.loc[dubai_merged['Cluster Labels'] == 3, dubai_merged.columns[[1] + list(range(5, dubai_merged.shape[1]))]]


Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,25.23336,Sandwich Place,Fast Food Restaurant,Metro Station,Track,Food,Flower Shop,Flea Market,Fish Market,Filipino Restaurant
10,25.143712,Coffee Shop,Cafeteria,Playground,Donut Shop,Electronics Store,Dim Sum Restaurant,Farmers Market,Food & Drink Shop,Fast Food Restaurant
19,25.209832,Middle Eastern Restaurant,Japanese Restaurant,Building,Steakhouse,Lebanese Restaurant,Indian Restaurant,Gym,Food & Drink Shop,Dim Sum Restaurant
32,25.221161,Gym / Fitness Center,Yemeni Restaurant,French Restaurant,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant
40,25.310742,Coffee Shop,Furniture / Home Store,Currency Exchange,Art Museum,American Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant
45,25.028782,Yemeni Restaurant,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Filipino Restaurant
51,25.221335,Pizza Place,Park,Café,Gym,Grocery Store,Nail Salon,Seafood Restaurant,Fast Food Restaurant,Fish Market
57,25.265992,Breakfast Spot,Juice Bar,Donut Shop,Café,Shopping Mall,Middle Eastern Restaurant,Korean Restaurant,Pizza Place,Greek Restaurant
63,25.209832,Middle Eastern Restaurant,Japanese Restaurant,Building,Steakhouse,Lebanese Restaurant,Indian Restaurant,Gym,Food & Drink Shop,Dim Sum Restaurant


# Cluster 5

In [274]:
dubai_merged.loc[dubai_merged['Cluster Labels'] == 4, dubai_merged.columns[[1] + list(range(5, dubai_merged.shape[1]))]]


Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
38,25.077855,Yemeni Restaurant,Fried Chicken Joint,Dessert Shop,Dim Sum Restaurant,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Filipino Restaurant
