# THE BATTLE OF NEIGHBOURHOODS

### Professional Certificate of Data Science

In [1]:
# Import libraries and options
import pandas as pd
pd.options.display.max_colwidth = 150
pd.options.display.width = 150
import numpy as np
import requests
import folium
from geopy.geocoders import Nominatim
import json
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.colors as colors

import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

Scrapping Data and Preprocessing

In [2]:
# Scrape wikipedia webpage with Pandas
website_url = 'https://es.wikipedia.org/wiki/Anexo:Distritos_de_Madrid'
tables = pd.read_html(website_url)
madrid_df = tables[0]
madrid_df.drop(labels = ['Número', 'Población','Superficie[n. 1]​ (Ha.)','Densidad(Hab./Ha.)','Imagen'],axis = 1, inplace = True)
madrid_df.drop(labels = 21, axis = 0, inplace = True)
madrid_df.rename(columns = {'Nombre':'Borough', 'Barrios': 'Neighbourhoods'},inplace = True)


In [3]:
# Function to remove string inside brackets
def remove_text_inside_brackets(text, brackets="()[]"):
    count = [0] * (len(brackets) // 2) # count open/close brackets
    saved_chars = []
    for character in text:
        for i, b in enumerate(brackets):
            if character == b: # found bracket
                kind, is_close = divmod(i, 2)
                count[kind] += (-1)**is_close # `+1`: open, `-1`: close
                if count[kind] < 0: # unbalanced bracket
                    count[kind] = 0  # keep it
                else:  # found bracket to remove
                    break
        else: # character is not a [balanced] bracket
            if not any(count): # outside brackets
                saved_chars.append(character)
    return ''.join(saved_chars)

In [4]:
# Preprocess dataframe

# Remove elements inside brackets
for i in madrid_df.index:
    madrid_df.loc[i,'Neighbourhoods'] = remove_text_inside_brackets(madrid_df.loc[i,'Neighbourhoods'])
    
# Replace spanish simbols to properly print map labels
simbols_to_replace = ['á','é','í','ó','ú','Á','É','Í','Ó','Ú','ñ']
simbols_to_obtain = ['a','e','i','o','u','A','E','I','O','U','n']
madrid_df.replace(to_replace = simbols_to_replace, value = simbols_to_obtain, regex = True, method = None, inplace = True)
#regex = True because to_replace contain strings and method = None because to_replace and value are lists

# Insert in dataframe area(hectarea(ha)) poblation(n residents) and desity(residents/hectarea(ha))
madrid_data_df = pd.read_excel('madrid_data.xlsx',header = None, decimal = ',') # In the excel file decimals are with ',' so we need to especify it because if not its read as type object
madrid_df['Area[ha]'] = madrid_data_df[0]
madrid_df['Poblation'] = madrid_data_df[1]
madrid_df['Density[pob/ha]'] = madrid_data_df[2]
# For now I will remove the neighbourhood column as I will not use it
madrid_df.drop(labels = 'Neighbourhoods', axis = 1, inplace = True)

Find latitude and longitude for each Borough

In [5]:
for i in madrid_df.index:
    address = madrid_df.loc[i,'Borough']+', Madrid'
    geolocator = Nominatim(user_agent="foursquare_agent")
    location = geolocator.geocode(address)
    madrid_df.loc[i,'Latitude'] = location.latitude
    madrid_df.loc[i,'Longitude'] = location.longitude
madrid_df    

Unnamed: 0,Borough,Area[ha],Poblation,Density[pob/ha],Latitude,Longitude
0,Centro,522.82,131928,252.34,40.417653,-3.707914
1,Arganzuela,646.22,151965,235.16,40.398068,-3.693734
2,Retiro,546.62,118516,216.82,40.41115,-3.676057
3,Salamanca,539.24,143800,266.67,40.427045,-3.680602
4,Chamartin,917.55,143424,156.31,40.460764,-3.677534
5,Tetuan,537.47,153789,286.13,40.460821,-3.69952
6,Chamberi,467.92,13401,293.64,40.436247,-3.70383
7,Fuencarral-El Pardo,23783.84,238756,10.04,40.556346,-3.778591
8,Moncloa-Aravaca,4653.11,116903,25.12,40.439495,-3.744204
9,Latina,2542.72,233808,91.95,40.403532,-3.736152


Location of each borough in the Madrid map using the Folium library

In [6]:
# Madrid location
address = 'Madrid'
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
mad_lat = location.latitude
mad_lon = location.longitude
# Create map
map_madrid = folium.Map(location = [mad_lat, mad_lon], zoom_start = 11)
for lat, lng, borough in zip(madrid_df['Latitude'], madrid_df['Longitude'], madrid_df['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'lightcoral',
        fill_opacity = 0.7,
        parse_html = False    
    ).add_to(map_madrid)
map_madrid   

Now we will explore the boroughs venues

In [7]:
# Import credentials for Foursquare
rows = list()
with open('cred.txt','r') as file:
    for line in file:
        rows.append(file.readlines())
CLIENT_ID = rows[0][0].rstrip(' \n')
CLIENT_SECRET = rows[0][2]
VERSION = '20180605' 
LIMIT = 100

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [9]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [10]:
madrid_venues_df = getNearbyVenues(names = madrid_df['Borough'],longitudes = madrid_df['Longitude'],latitudes = madrid_df['Latitude'])

Centro
Arganzuela
Retiro
Salamanca
Chamartin
Tetuan
Chamberi
Fuencarral-El Pardo
Moncloa-Aravaca
Latina
Carabanchel
Usera
Puente de Vallecas
Moratalaz
Ciudad Lineal
Hortaleza
Villaverde
Villa de Vallecas
Vicalvaro
San Blas-Canillejas
Barajas


In [11]:
madrid_venues_df

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Centro,40.417653,-3.707914,Plaza de Isabel II,40.418114,-3.709397,Plaza
1,Centro,40.417653,-3.707914,TOC Hostel,40.417264,-3.705928,Hostel
2,Centro,40.417653,-3.707914,Plaza Mayor,40.415527,-3.707506,Plaza
3,Centro,40.417653,-3.707914,Gyoza Go!,40.416179,-3.708612,Dumpling Restaurant
4,Centro,40.417653,-3.707914,Palacio de Gaviria,40.417139,-3.706044,Art Museum
...,...,...,...,...,...,...,...
554,Barajas,40.473318,-3.579845,Mercadillo Barajas,40.470179,-3.577668,Flea Market
555,Barajas,40.473318,-3.579845,Metro Barajas,40.475768,-3.582535,Metro Station
556,Barajas,40.473318,-3.579845,Burger King,40.473312,-3.582063,Fast Food Restaurant
557,Barajas,40.473318,-3.579845,Tryp Alameda Aeropuerto,40.469134,-3.580105,Hotel


Search restaurant venues

In [12]:
madrid_restaurants_df = madrid_venues_df.loc[madrid_venues_df['Venue Category'].str.contains('Restaurant') == True]
madrid_restaurants_df.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
3,Centro,40.417653,-3.707914,Gyoza Go!,40.416179,-3.708612,Dumpling Restaurant
11,Centro,40.417653,-3.707914,Musashi,40.419,-3.707656,Japanese Restaurant
17,Centro,40.417653,-3.707914,Ramen Kagura,40.41685,-3.708624,Ramen Restaurant
20,Centro,40.417653,-3.707914,la gastroteca de santiago,40.416639,-3.710944,Restaurant
27,Centro,40.417653,-3.707914,Museo del Jamón,40.416343,-3.705682,Spanish Restaurant


In [13]:
madrid_restaurants_df.sort_values(by ='Venue Category', inplace = True)
madrid_restaurants_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  madrid_restaurants_df.sort_values(by ='Venue Category', inplace = True)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
154,Salamanca,40.427045,-3.680602,Dingo,40.427247,-3.684072,American Restaurant
313,Chamberi,40.436247,-3.703830,Rochela Restaurante,40.433575,-3.702525,American Restaurant
531,Barajas,40.473318,-3.579845,La Torino,40.473706,-3.578169,Argentinian Restaurant
517,Barajas,40.473318,-3.579845,Finca Lucero,40.470693,-3.583052,Argentinian Restaurant
452,Ciudad Lineal,40.448431,-3.650495,La Vaca Argentina,40.451483,-3.649788,Argentinian Restaurant
...,...,...,...,...,...,...,...
497,Villa de Vallecas,40.373958,-3.612163,Nueva York,40.370319,-3.612369,Tapas Restaurant
33,Centro,40.417653,-3.707914,El Mollete,40.419913,-3.710503,Tapas Restaurant
207,Salamanca,40.427045,-3.680602,Thaidy,40.423578,-3.679922,Thai Restaurant
546,Barajas,40.473318,-3.579845,Barajas Doner Kebab,40.472209,-3.579033,Turkish Restaurant


In [14]:
madrid_restaurants_df['Venue Category'].unique().tolist()

['American Restaurant',
 'Argentinian Restaurant',
 'Asian Restaurant',
 'Brazilian Restaurant',
 'Cajun / Creole Restaurant',
 'Chinese Restaurant',
 'Comfort Food Restaurant',
 'Dumpling Restaurant',
 'Falafel Restaurant',
 'Fast Food Restaurant',
 'French Restaurant',
 'Himalayan Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Mediterranean Restaurant',
 'Mexican Restaurant',
 'Middle Eastern Restaurant',
 'Ramen Restaurant',
 'Restaurant',
 'Seafood Restaurant',
 'Spanish Restaurant',
 'Sushi Restaurant',
 'Tapas Restaurant',
 'Thai Restaurant',
 'Turkish Restaurant',
 'Vietnamese Restaurant']

Create dummy variable for the venues category

In [15]:
onehot = pd.get_dummies(madrid_restaurants_df['Venue Category'], prefix = '', prefix_sep = '')
onehot.head()

Unnamed: 0,American Restaurant,Argentinian Restaurant,Asian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant,...,Middle Eastern Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Spanish Restaurant,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Vietnamese Restaurant
154,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
313,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
531,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
517,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
452,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Merge a copy of the madrid_restaurants_df with the dummy variables

In [16]:
madrid_restaurants_df2 = madrid_restaurants_df
complet_df = pd.merge(madrid_restaurants_df2, onehot, on = madrid_restaurants_df2.index)
complet_df.head(5)

Unnamed: 0,key_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,American Restaurant,Argentinian Restaurant,...,Middle Eastern Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Spanish Restaurant,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Vietnamese Restaurant
0,154,Salamanca,40.427045,-3.680602,Dingo,40.427247,-3.684072,American Restaurant,1,0,...,0,0,0,0,0,0,0,0,0,0
1,313,Chamberi,40.436247,-3.70383,Rochela Restaurante,40.433575,-3.702525,American Restaurant,1,0,...,0,0,0,0,0,0,0,0,0,0
2,531,Barajas,40.473318,-3.579845,La Torino,40.473706,-3.578169,Argentinian Restaurant,0,1,...,0,0,0,0,0,0,0,0,0,0
3,517,Barajas,40.473318,-3.579845,Finca Lucero,40.470693,-3.583052,Argentinian Restaurant,0,1,...,0,0,0,0,0,0,0,0,0,0
4,452,Ciudad Lineal,40.448431,-3.650495,La Vaca Argentina,40.451483,-3.649788,Argentinian Restaurant,0,1,...,0,0,0,0,0,0,0,0,0,0


Prepare dataset for clustering

In [17]:
complet_df.drop(['key_0'], axis = 1, inplace = True)
complet_df.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,American Restaurant,Argentinian Restaurant,Asian Restaurant,...,Middle Eastern Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Spanish Restaurant,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Vietnamese Restaurant
0,Salamanca,40.427045,-3.680602,Dingo,40.427247,-3.684072,American Restaurant,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Chamberi,40.436247,-3.70383,Rochela Restaurante,40.433575,-3.702525,American Restaurant,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Barajas,40.473318,-3.579845,La Torino,40.473706,-3.578169,Argentinian Restaurant,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,Barajas,40.473318,-3.579845,Finca Lucero,40.470693,-3.583052,Argentinian Restaurant,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,Ciudad Lineal,40.448431,-3.650495,La Vaca Argentina,40.451483,-3.649788,Argentinian Restaurant,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
cluster_groups = complet_df.drop(['Neighborhood','Neighborhood Latitude','Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude','Venue Category'], axis = 1)

In [19]:
cluster_groups

Unnamed: 0,American Restaurant,Argentinian Restaurant,Asian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant,...,Middle Eastern Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,Spanish Restaurant,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Vietnamese Restaurant
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
191,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
192,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
193,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


Clustering the types of restaurants using k-means algorithm.

In [20]:
k = 8
kmeans = KMeans(n_clusters = k, random_state = 0)
kmeans.fit(cluster_groups)

KMeans(random_state=0)

In [21]:
madrid_restaurants_df2.insert(0,'Cluster Labels',kmeans.labels_)

In [22]:
madrid_restaurants_df2

Unnamed: 0,Cluster Labels,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
154,0,Salamanca,40.427045,-3.680602,Dingo,40.427247,-3.684072,American Restaurant
313,0,Chamberi,40.436247,-3.703830,Rochela Restaurante,40.433575,-3.702525,American Restaurant
531,0,Barajas,40.473318,-3.579845,La Torino,40.473706,-3.578169,Argentinian Restaurant
517,0,Barajas,40.473318,-3.579845,Finca Lucero,40.470693,-3.583052,Argentinian Restaurant
452,0,Ciudad Lineal,40.448431,-3.650495,La Vaca Argentina,40.451483,-3.649788,Argentinian Restaurant
...,...,...,...,...,...,...,...,...
497,2,Villa de Vallecas,40.373958,-3.612163,Nueva York,40.370319,-3.612369,Tapas Restaurant
33,2,Centro,40.417653,-3.707914,El Mollete,40.419913,-3.710503,Tapas Restaurant
207,0,Salamanca,40.427045,-3.680602,Thaidy,40.423578,-3.679922,Thai Restaurant
546,0,Barajas,40.473318,-3.579845,Barajas Doner Kebab,40.472209,-3.579033,Turkish Restaurant


Create map for all the restaurants

In [23]:
# create map

map_clusters = folium.Map(location=[mad_lat, mad_lon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, name, cat, cluster in zip(madrid_restaurants_df2['Venue Latitude'], madrid_restaurants_df2['Venue Longitude'], madrid_restaurants_df2['Venue'],madrid_restaurants_df2['Venue Category'], madrid_restaurants_df2['Cluster Labels']):
    label = folium.Popup(name +','+ cat + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=4,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
for lat, lng, neighbourhood in zip(madrid_restaurants_df2['Neighborhood Latitude'], madrid_restaurants_df2['Neighborhood Longitude'], madrid_restaurants_df2['Neighborhood']):
    label = '{}'.format(neighbourhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters) 

    

    
map_clusters

Now lets examine the clusters created

In [24]:
cluster0 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 0, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]
cluster1 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 1, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]
cluster2 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 2, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]
cluster3 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 3, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]
cluster4 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 4, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]
cluster5 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 5, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]
cluster6 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 6, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]
cluster7 = madrid_restaurants_df2.loc[madrid_restaurants_df2['Cluster Labels'] == 7, madrid_restaurants_df2.columns[[1] + list(range(5, madrid_restaurants_df2.shape[1]))]]

In [35]:
cluster0['Venue Category'].value_counts()

Japanese Restaurant          6
Mexican Restaurant           5
Italian Restaurant           4
Argentinian Restaurant       4
Sushi Restaurant             3
Brazilian Restaurant         2
Indian Restaurant            2
Ramen Restaurant             2
American Restaurant          2
Falafel Restaurant           2
Fast Food Restaurant         2
Vietnamese Restaurant        1
Korean Restaurant            1
French Restaurant            1
Turkish Restaurant           1
Middle Eastern Restaurant    1
Dumpling Restaurant          1
Himalayan Restaurant         1
Thai Restaurant              1
Comfort Food Restaurant      1
Cajun / Creole Restaurant    1
Name: Venue Category, dtype: int64

In [25]:
cluster1['Venue Category'].value_counts()

Spanish Restaurant    49
Name: Venue Category, dtype: int64

In [26]:
cluster2['Venue Category'].value_counts()

Tapas Restaurant    38
Name: Venue Category, dtype: int64

In [27]:
cluster3['Venue Category'].value_counts()

Restaurant    35
Name: Venue Category, dtype: int64

In [28]:
cluster4['Venue Category'].value_counts()

Seafood Restaurant    7
Name: Venue Category, dtype: int64

In [29]:
cluster5['Venue Category'].value_counts()

Asian Restaurant    6
Name: Venue Category, dtype: int64

In [30]:
cluster6['Venue Category'].value_counts()

Chinese Restaurant    6
Name: Venue Category, dtype: int64

In [31]:
cluster7['Venue Category'].value_counts()

Mediterranean Restaurant    10
Name: Venue Category, dtype: int64