# Approach

We will explore each city individually, then plot all the neighborhoods that we consider on a map using the Folium package. After that the model will be built by clustering all of the similar neighborhoods together and map the new clustered neighborhoods.

In [1]:
import pandas as pd
import requests
import matplotlib.cm as cm
from sklearn.cluster import KMeans
import numpy as np
import folium 
import matplotlib.colors as colors

# Data Collection
reading the json data on https://www.data.gouv.fr/fr/datasets/r/e88c6fda-1d09-42a0-a069-606d3259114e that was already saved on the machine and loading it using pandas

In [2]:
paris_raw=pd.read_json('E:\\correspondances-code-insee-code-postal.json')
paris_raw.head()

Unnamed: 0,datasetid,recordid,fields,geometry,record_timestamp
0,correspondances-code-insee-code-postal,2bf36b38314b6c39dfbcd09225f97fa532b1fc45,"{'code_comm': '645', 'nom_dept': 'ESSONNE', 's...","{'type': 'Point', 'coordinates': [2.2517129721...",2016-09-21T00:29:06.175+02:00
1,correspondances-code-insee-code-postal,7ee82e74e059b443df18bb79fc5a19b1f05e5a88,"{'code_comm': '133', 'nom_dept': 'SEINE-ET-MAR...","{'type': 'Point', 'coordinates': [3.0529405055...",2016-09-21T00:29:06.175+02:00
2,correspondances-code-insee-code-postal,e2cd3186f07286705ed482a10b6aebd9de633c81,"{'code_comm': '378', 'nom_dept': 'ESSONNE', 's...","{'type': 'Point', 'coordinates': [2.1971816504...",2016-09-21T00:29:06.175+02:00
3,correspondances-code-insee-code-postal,868bf03527a1d0a9defe5cf4e6fa0a730d725699,"{'code_comm': '243', 'nom_dept': 'SEINE-ET-MAR...","{'type': 'Point', 'coordinates': [2.7097808131...",2016-09-21T00:29:06.175+02:00
4,correspondances-code-insee-code-postal,21e809b1d4480333c8b6fe7addd8f3b06f343e2c,"{'code_comm': '003', 'nom_dept': 'VAL-DE-MARNE...","{'type': 'Point', 'coordinates': [2.3335102498...",2016-09-21T00:29:06.175+02:00


# Data Preprocessing
- data needs to processed in which we will break down the nested fields

In [3]:
paris_field_data=pd.DataFrame()
for f in paris_raw.fields:
    dict_new=f
    paris_field_data=paris_field_data.append(dict_new, ignore_index=True)
    
paris_field_data.head()
    

Unnamed: 0,code_arr,code_cant,code_comm,code_dept,code_reg,geo_point_2d,geo_shape,id_geofla,insee_com,nom_comm,nom_dept,nom_region,population,postal_code,statut,superficie,z_moyen
0,3,3,645,91,11,"[48.750443119964764, 2.251712972144151]","{'type': 'Polygon', 'coordinates': [[[2.238024...",16275,91645,VERRIERES-LE-BUISSON,ESSONNE,ILE-DE-FRANCE,15.5,91370,Commune simple,999.0,121.0
1,3,20,133,77,11,"[48.41256065214989, 3.052940505560729]","{'type': 'Polygon', 'coordinates': [[[3.076046...",31428,77133,COURCELLES-EN-BASSEE,SEINE-ET-MARNE,ILE-DE-FRANCE,0.2,77126,Commune simple,1082.0,88.0
2,1,9,378,91,11,"[48.52726809075556, 2.19718165044305]","{'type': 'Polygon', 'coordinates': [[[2.203466...",30975,91378,MAUCHAMPS,ESSONNE,ILE-DE-FRANCE,0.3,91730,Commune simple,313.0,150.0
3,5,14,243,77,11,"[48.87307018579678, 2.7097808131278462]","{'type': 'Polygon', 'coordinates': [[[2.727542...",17000,77243,LAGNY-SUR-MARNE,SEINE-ET-MARNE,ILE-DE-FRANCE,20.2,77400,Chef-lieu canton,579.0,71.0
4,3,34,3,94,11,"[48.80588035965699, 2.333510249842654]","{'type': 'Polygon', 'coordinates': [[[2.343851...",32123,94003,ARCUEIL,VAL-DE-MARNE,ILE-DE-FRANCE,19.5,94110,Chef-lieu canton,232.0,70.0


- **feature selection**: we will only select the columns we need

In [4]:
df_2 = paris_field_data[['postal_code','nom_comm','nom_dept','geo_point_2d']]
df_2

Unnamed: 0,postal_code,nom_comm,nom_dept,geo_point_2d
0,91370,VERRIERES-LE-BUISSON,ESSONNE,"[48.750443119964764, 2.251712972144151]"
1,77126,COURCELLES-EN-BASSEE,SEINE-ET-MARNE,"[48.41256065214989, 3.052940505560729]"
2,91730,MAUCHAMPS,ESSONNE,"[48.52726809075556, 2.19718165044305]"
3,77400,LAGNY-SUR-MARNE,SEINE-ET-MARNE,"[48.87307018579678, 2.7097808131278462]"
4,94110,ARCUEIL,VAL-DE-MARNE,"[48.80588035965699, 2.333510249842654]"
...,...,...,...,...
1295,75014,PARIS-14E-ARRONDISSEMENT,PARIS,"[48.82899321160942, 2.327100883257538]"
1296,77250,VILLEMER,SEINE-ET-MARNE,"[48.30365721296873, 2.826057744171064]"
1297,95810,MENOUVILLE,VAL-D'OISE,"[49.15323556351221, 2.107794339870269]"
1298,94490,ORMESSON-SUR-MARNE,VAL-DE-MARNE,"[48.785456274386185, 2.539242143174428]"


# Feature Engineering
We will only consider the city Paris

In [5]:
df_paris=df_2[df_2['nom_dept'].str.contains('PARIS')].reset_index(drop=True)
df_paris.head()

Unnamed: 0,postal_code,nom_comm,nom_dept,geo_point_2d
0,75010,PARIS-10E-ARRONDISSEMENT,PARIS,"[48.87602855694339, 2.361112904561707]"
1,75016,PARIS-16E-ARRONDISSEMENT,PARIS,"[48.86039876035177, 2.262099559395783]"
2,75009,PARIS-9E-ARRONDISSEMENT,PARIS,"[48.87689616237872, 2.337460241388529]"
3,75003,PARIS-3E-ARRONDISSEMENT,PARIS,"[48.86305413181178, 2.359361058970589]"
4,75006,PARIS-6E-ARRONDISSEMENT,PARIS,"[48.84896809191946, 2.332670898588416]"


We will extract the geolocation from the column *geo_point_2d*

In [6]:
t=df_paris['geo_point_2d']


In [7]:
p_ll=df_paris['geo_point_2d'].astype('str')

### Latitude

In [8]:
paris_lat=p_ll.apply(lambda x: x.split(',')[0])
paris_lat=paris_lat.apply(lambda x: x.lstrip('['))
paris_lat

0      48.87602855694339
1      48.86039876035177
2      48.87689616237872
3      48.86305413181178
4      48.84896809191946
5      48.84015541860987
6      48.86790337886785
7      48.85941549762748
8     48.844508659617546
9      48.88686862295828
10     48.86318677744551
11     48.85608259819694
12    48.854228281954754
13    48.892735074561706
14      48.8626304851685
15     48.87252726662346
16     48.82871768452136
17     48.83515623066034
18     48.88733716648682
19     48.82899321160942
Name: geo_point_2d, dtype: object

### Longitude

In [9]:
paris_long=p_ll.apply(lambda x: x.split(',')[1])
paris_long=paris_long.apply(lambda x: x.rstrip(']'))
paris_long

0      2.361112904561707
1      2.262099559395783
2      2.337460241388529
3      2.359361058970589
4      2.332670898588416
5      2.293559372435076
6      2.344107166658533
7      2.378741060237548
8      2.349859385560182
9      2.384694327870042
10     2.400819826729021
11     2.312438687733857
12     2.357361938142205
13     2.348711933867703
14     2.336293446550539
15     2.312582560420059
16     2.362468228516128
17     2.419807034965275
18     2.307485559493426
19     2.327100883257538
Name: geo_point_2d, dtype: object

In [10]:
p_lat= pd.DataFrame(paris_lat.astype(float))
p_lat.columns=['Latitude']
p_long=pd.DataFrame(paris_long.astype(float))
p_long.columns=['Longitude']

In [11]:
paris_final=pd.concat([df_paris.drop('geo_point_2d',axis=1),p_lat, p_long], axis=1)
paris_final.head()

Unnamed: 0,postal_code,nom_comm,nom_dept,Latitude,Longitude
0,75010,PARIS-10E-ARRONDISSEMENT,PARIS,48.876029,2.361113
1,75016,PARIS-16E-ARRONDISSEMENT,PARIS,48.860399,2.2621
2,75009,PARIS-9E-ARRONDISSEMENT,PARIS,48.876896,2.33746
3,75003,PARIS-3E-ARRONDISSEMENT,PARIS,48.863054,2.359361
4,75006,PARIS-6E-ARRONDISSEMENT,PARIS,48.848968,2.332671


In [12]:
from arcgis.geocoding import geocode
from arcgis.gis import GIS
gis = GIS()
paris=geocode(address='Paris, France, FR')[0]
paris_lng_coords= paris['location']['x']
paris_lat_coords= paris['location']['y']
print("The geolocation of Paris: ", paris_lat_coords, paris_lng_coords)

The geolocation of Paris:  48.85341000000005 2.3488000000000397


### Using the above data we will visualize the map of Paris

In [13]:

map_Paris= folium.Map(location=[paris_lat_coords, paris_lng_coords], zoom_start=12)
map_Paris

for latitude, longitude, borough, town in zip(paris_final['Latitude'], paris_final['Longitude'], paris_final['nom_comm'], paris_final['nom_dept']):
    label = '{}, {}'.format(town, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='Blue',
        fill=True,
        fill_opacity=0.8
        ).add_to(map_Paris)  
    
map_Paris

In [14]:
CLIENT_ID = 'LYCNOZCPOJNDUQMAJAIX1QNCDP5KXX3LKQUCG5BTX2SJ52TL' 
CLIENT_SECRET = 'FPBNIFQJ0KIZZCPUTVBYFG4AUPJTPN5W1QZATF1LYLKOPQFL'
VERSION = '20180605' 

In [15]:
LIMIT=100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            LIMIT
            )
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

In [16]:
venues_in_Paris = getNearbyVenues(paris_final['nom_comm'], paris_final['Latitude'], paris_final['Longitude'])

PARIS-10E-ARRONDISSEMENT
PARIS-16E-ARRONDISSEMENT
PARIS-9E-ARRONDISSEMENT
PARIS-3E-ARRONDISSEMENT
PARIS-6E-ARRONDISSEMENT
PARIS-15E-ARRONDISSEMENT
PARIS-2E-ARRONDISSEMENT
PARIS-11E-ARRONDISSEMENT
PARIS-5E-ARRONDISSEMENT
PARIS-19E-ARRONDISSEMENT
PARIS-20E-ARRONDISSEMENT
PARIS-7E-ARRONDISSEMENT
PARIS-4E-ARRONDISSEMENT
PARIS-18E-ARRONDISSEMENT
PARIS-1ER-ARRONDISSEMENT
PARIS-8E-ARRONDISSEMENT
PARIS-13E-ARRONDISSEMENT
PARIS-12E-ARRONDISSEMENT
PARIS-17E-ARRONDISSEMENT
PARIS-14E-ARRONDISSEMENT


In [17]:
venues_in_Paris.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,PARIS-10E-ARRONDISSEMENT,48.876029,2.361113,Les Orientalistes,Mediterranean Restaurant
1,PARIS-10E-ARRONDISSEMENT,48.876029,2.361113,Les Enfants Perdus,French Restaurant
2,PARIS-10E-ARRONDISSEMENT,48.876029,2.361113,Marrow,Café
3,PARIS-10E-ARRONDISSEMENT,48.876029,2.361113,Café A,Café
4,PARIS-10E-ARRONDISSEMENT,48.876029,2.361113,Marks & Spencer Food,Food & Drink Shop


We will group the above dataset on venue categories

In [18]:
venues_in_Paris.groupby('Venue Category').max()


Unnamed: 0_level_0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghan Restaurant,PARIS-11E-ARRONDISSEMENT,48.859415,2.378741,Afghanistan
African Restaurant,PARIS-9E-ARRONDISSEMENT,48.876896,2.361113,Wally Le Saharien
American Restaurant,PARIS-19E-ARRONDISSEMENT,48.892735,2.384694,Harper's
Antique Shop,PARIS-9E-ARRONDISSEMENT,48.876896,2.337460,Hôtel des Ventes Drouot
Argentinian Restaurant,PARIS-3E-ARRONDISSEMENT,48.863054,2.359361,Anahi
...,...,...,...,...
Wine Bar,PARIS-9E-ARRONDISSEMENT,48.892735,2.400820,Vingt Vins d'Art
Wine Shop,PARIS-3E-ARRONDISSEMENT,48.892735,2.400820,Trois Fois Vin
Women's Store,PARIS-2E-ARRONDISSEMENT,48.867903,2.344107,& Other Stories
Zoo,PARIS-12E-ARRONDISSEMENT,48.835156,2.419807,Parc zoologique de Paris


### One Hot Encoding
For clustering the above data we will perform one hot encoding on the dataset

In [19]:
Paris_venue_cat = pd.get_dummies(venues_in_Paris[['Venue Category']], prefix="", prefix_sep="")
Paris_venue_cat

Unnamed: 0,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auvergne Restaurant,...,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Zoo,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1309,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1310,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1311,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1312,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:

Paris_venue_cat['Neighbourhood'] = venues_in_Paris['Neighbourhood'] 

# moving neighborhood column to the first column
fixed_columns = [Paris_venue_cat.columns[-1]] + list(Paris_venue_cat.columns[:-1])
Paris_venue_cat = Paris_venue_cat[fixed_columns]

Paris_venue_cat.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Zoo,Zoo Exhibit
0,PARIS-10E-ARRONDISSEMENT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,PARIS-10E-ARRONDISSEMENT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,PARIS-10E-ARRONDISSEMENT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,PARIS-10E-ARRONDISSEMENT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,PARIS-10E-ARRONDISSEMENT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
Paris_grouped = Paris_venue_cat.groupby('Neighbourhood').mean().reset_index()
Paris_grouped.head()


Unnamed: 0,Neighbourhood,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Zoo,Zoo Exhibit
0,PARIS-10E-ARRONDISSEMENT,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.01,0.0,0.0,0.02,0.02,0.0,0.0,0.0
1,PARIS-11E-ARRONDISSEMENT,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.044444,...,0.0,0.0,0.044444,0.0,0.022222,0.044444,0.0,0.0,0.0,0.0
2,PARIS-12E-ARRONDISSEMENT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2
3,PARIS-13E-ARRONDISSEMENT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.180328,...,0.0,0.0,0.0,0.0,0.229508,0.0,0.0,0.0,0.0,0.0
4,PARIS-14E-ARRONDISSEMENT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

### We will extract top venue categories using the above defined function and create a new dataframe

In [29]:
neighborhoods_venues_sorted_paris = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted_paris['Neighbourhood'] = Paris_grouped['Neighbourhood']

for ind in np.arange(Paris_grouped.shape[0]):
    neighborhoods_venues_sorted_paris.iloc[ind, 1:] = return_most_common_venues(Paris_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted_paris.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,PARIS-10E-ARRONDISSEMENT,French Restaurant,Bistro,Coffee Shop,Hotel,Café,Pizza Place,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Asian Restaurant
1,PARIS-11E-ARRONDISSEMENT,Restaurant,French Restaurant,Café,Italian Restaurant,Pastry Shop,Wine Bar,Vegetarian / Vegan Restaurant,Cocktail Bar,Bar,Asian Restaurant
2,PARIS-12E-ARRONDISSEMENT,Zoo Exhibit,Supermarket,Monument / Landmark,Bistro,Zoo,Argentinian Restaurant,Art Gallery,African Restaurant,Flower Shop,Fish Market
3,PARIS-13E-ARRONDISSEMENT,Vietnamese Restaurant,Asian Restaurant,Chinese Restaurant,Thai Restaurant,French Restaurant,Juice Bar,Bakery,Hotel,Coffee Shop,Gourmet Shop
4,PARIS-14E-ARRONDISSEMENT,French Restaurant,Japanese Restaurant,Bistro,Hotel,Pizza Place,Italian Restaurant,Brasserie,Food & Drink Shop,Plaza,Bakery


# Building the model
We will cluster the neighborhoods of Paris using K Means.

## K Means
We will create 4 clusters

In [42]:
k_num_clusters = 4

Paris_grouped_clustering = Paris_grouped.drop('Neighbourhood', 1)

kmeans_Paris = KMeans(n_clusters=k_num_clusters, random_state=0).fit(Paris_grouped_clustering)
kmeans_Paris

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [43]:
kmeans_Paris.labels_

array([0, 0, 2, 0, 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0])

We have labeled the city, now we add these labels to our dataset

In [44]:
neighborhoods_venues_sorted_paris.insert(0, 'Cluster Labels', kmeans_Paris.labels_ +1)

ValueError: cannot insert Cluster Labels, already exists

In [39]:
paris_data = paris_final

paris_data = paris_data.join(neighborhoods_venues_sorted_paris.set_index('Neighbourhood'), on='nom_comm')

paris_data.head()

Unnamed: 0,postal_code,nom_comm,nom_dept,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,75010,PARIS-10E-ARRONDISSEMENT,PARIS,48.876029,2.361113,1,French Restaurant,Bistro,Coffee Shop,Hotel,Café,Pizza Place,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Asian Restaurant
1,75016,PARIS-16E-ARRONDISSEMENT,PARIS,48.860399,2.2621,4,Lake,Plaza,Trail,Boat or Ferry,French Restaurant,Bus Station,Bus Stop,Park,Pool,Art Museum
2,75009,PARIS-9E-ARRONDISSEMENT,PARIS,48.876896,2.33746,1,French Restaurant,Hotel,Japanese Restaurant,Bakery,Bistro,Cocktail Bar,Lounge,Wine Bar,Tea Room,Bar
3,75003,PARIS-3E-ARRONDISSEMENT,PARIS,48.863054,2.359361,1,French Restaurant,Coffee Shop,Japanese Restaurant,Bistro,Bakery,Italian Restaurant,Art Gallery,Wine Bar,Cocktail Bar,Chinese Restaurant
4,75006,PARIS-6E-ARRONDISSEMENT,PARIS,48.848968,2.332671,1,French Restaurant,Chocolate Shop,Italian Restaurant,Bistro,Bakery,Fountain,Plaza,Hotel,Pub,Seafood Restaurant


In [40]:
paris_data_nonan = paris_data.dropna(subset=['Cluster Labels'])

Now, we can finally visualize the clusters:


In [45]:
map_clusters_paris = folium.Map(location=[paris_lat_coords, paris_lng_coords], zoom_start=12)

# set color scheme for the clusters
x = np.arange(k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(paris_data_nonan['Latitude'], paris_data_nonan['Longitude'], paris_data_nonan['nom_comm'], paris_data_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + ' ' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.8
        ).add_to(map_clusters_paris)
        
map_clusters_paris

# Result

Paris is multicultural hub having an enormous variety of food cuisines and establishments including resturants, bars and bistros. Paris seems like the relaxing vacation spot with a mix of lakes, historic spots and a wide variety of cusines to try out. Overall, the city of Paris offers a multicultural, diverse and certainly an entertaining experience.

# Conclusion
The aim of this project was to explore the city of Paris and define the neighborhoods in clusters which helps prospective tourists in choosing their destination and overall enhancing their experience.
We can see from our exploring that the city of Paris is a multiculturual hub which gives a feeling of inclusion