## Because of necessity of using Foursquare location data I've chosen an aim to compare two big cities: NY and Moscow.

In [2]:
#import all libraries
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_rows', 10)

import json # library to handle JSON files

#!pip install geopy 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!pip install folium
import folium # map rendering library


<a id='item1'></a>


##Getting data

In [3]:
#for NY I use an existing data
!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
neighborhoods_data = newyork_data['features']

# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
# instantiate the dataframe
nyneighborhoods = pd.DataFrame(columns=column_names)

In [4]:
#loading data from json
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    nyneighborhoods = nyneighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [5]:
#for Moscow I have found this geodata:
!wget -q -O 'moscow_data.geojson' http://gis-lab.info/data/mos-adm/mo.geojson
with open('moscow_data.geojson') as json_data:
    moscow_data = json.load(json_data)

neighborhoods_data = moscow_data['features']
# instantiate the dataframe
moneighborhoods = pd.DataFrame(columns=column_names)

In [6]:
#loading and clearingdata from json
for data in neighborhoods_data:
 # while (n<100):
    borough = neighborhood_name = data['properties']['NAME_AO'] 
    neighborhood_name = data['properties']['NAME']
        
    neighborhood_latlon = data['geometry']['coordinates']
    #print(neighborhood_latlon,'\n','len= ',len(neighborhood_latlon))  
    df = pd.DataFrame(neighborhood_latlon)
    list1=df[0][0]
    #print('list1',list1)
    i=1
    while (isinstance(list1[0],list)):
      #print('i= ',i,' ',list1[0],'\n')
      list1=list1[0]
      i+=1
    columns=['Latitude','Longitude'] 
    neighborhood_lat = list1[1]
    neighborhood_lon = list1[0]
    moneighborhoods = moneighborhoods.append({'Borough': borough,
                                              'Neighborhood': neighborhood_name,
                                              'Latitude': neighborhood_lat,
                                              'Longitude': neighborhood_lon}, ignore_index=True)

In [17]:
nyneighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
...,...,...,...,...
301,Manhattan,Hudson Yards,40.756658,-74.000111
302,Queens,Hammels,40.587338,-73.805530
303,Queens,Bayswater,40.611322,-73.765968
304,Queens,Queensbridge,40.756091,-73.945631


#Maps for NY and Moscow

In [9]:
#create map for Moscow boroughs
address = 'Moscow'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
molatitude = location.latitude
molongitude = location.longitude

map_moscow = folium.Map(location=[molatitude, molongitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(moneighborhoods['Latitude'], moneighborhoods['Longitude'], moneighborhoods['Borough'], moneighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_moscow)  
    
map_moscow

In [10]:
#create map for NY borough
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(nyneighborhoods['Latitude'], nyneighborhoods['Longitude'], nyneighborhoods['Borough'], nyneighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


##Getting data from Foursqare


In [11]:
CLIENT_ID = 'Q112VBXVEKS5WUG4ZXRI5GUDHZYVEYB1GD151PK5LBRMAFKY' # your Foursquare ID
CLIENT_SECRET = 'KGA2CQJP4GFA4W5N1W0EN1TZEL2X4WLIKADAUKQG1U3I5KNM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value


In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
#moscow places from Foursquare
mo_venues = getNearbyVenues(names=moneighborhoods['Neighborhood'],
                                   latitudes=moneighborhoods['Latitude'],
                                   longitudes=moneighborhoods['Longitude']
                                  )

In [14]:
#NY places from Foursquare
ny_venues = getNearbyVenues(names=nyneighborhoods['Neighborhood'],
                                   latitudes=nyneighborhoods['Latitude'],
                                   longitudes=nyneighborhoods['Longitude']
                                  )

In [None]:
#check Moscow results
print(mo_venues.shape)
mo_venues.head()

In [None]:
mo_venues.groupby('Neighborhood').count()

In [17]:
#check NY results
print(ny_venues.shape)
ny_venues.head()

(10047, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
2,Wakefield,40.894705,-73.847201,Walgreens,40.896528,-73.8447,Pharmacy
3,Wakefield,40.894705,-73.847201,Rite Aid,40.896649,-73.844846,Pharmacy
4,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop


In [1]:
(190**4 - 120/0.3)*1780

2319713088000.0

<a id='item3'></a>


In [18]:
#combine NY and Moscow places for further analysis
mony=mo_venues.append(ny_venues)
mony.reset_index(inplace=True,drop=True)
mony

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Филёвский Парк,55.748210,37.42765,Hills,55.749205,37.423439,Mediterranean Restaurant
1,Филёвский Парк,55.748210,37.42765,Спортивно-экологический комплекс «Лата Трэк»,55.750687,37.426734,Ski Area
2,Филёвский Парк,55.748210,37.42765,Андерсон,55.746490,37.422141,Café
3,Филёвский Парк,55.748210,37.42765,ЧЕТЫРЕ ЛАПЫ,55.746892,37.421252,Pet Store
4,Филёвский Парк,55.748210,37.42765,Горнолыжная База ЦСКА,55.744827,37.432187,Ski Area
...,...,...,...,...,...,...,...
11917,Fox Hills,40.617311,-74.08174,SUBWAY,40.618939,-74.082881,Sandwich Place
11918,Fox Hills,40.617311,-74.08174,Bums Chicken N Ribs Joint,40.618192,-74.085506,BBQ Joint
11919,Fox Hills,40.617311,-74.08174,Nettys playhouse,40.616856,-74.077566,Playground
11920,Fox Hills,40.617311,-74.08174,Stop 1 Supermarket,40.614576,-74.084714,Grocery Store


In [19]:
# one hot encoding
mony_onehot = pd.get_dummies(mony[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
mony_onehot['Neighborhood'] = mony['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [mony_onehot.columns[-1]] + list(mony_onehot.columns[:-1])
mony_onehot = mony_onehot[fixed_columns]

mony_onehot.head()

Unnamed: 0,Zoo Exhibit,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Amphitheater,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Dealership,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Bath House,Beach,Beach Bar,Bed & Breakfast,...,Thrift / Vintage Store,Tibetan Restaurant,Tiki Bar,Toll Plaza,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tram Station,Trattoria/Osteria,Tree,Tunnel,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Used Bookstore,Vape Store,Varenyky restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volcano,Volleyball Court,Warehouse Store,Waste Facility,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [20]:
mony_onehot['Neighborhood']

0        Филёвский Парк
1        Филёвский Парк
2        Филёвский Парк
3        Филёвский Парк
4        Филёвский Парк
              ...      
11917         Fox Hills
11918         Fox Hills
11919         Fox Hills
11920         Fox Hills
11921         Fox Hills
Name: Neighborhood, Length: 11922, dtype: object

And let's examine the new dataframe size.


In [21]:
mony_onehot.shape

(11922, 484)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [22]:
mony_grouped = mony_onehot.groupby('Neighborhood').mean().reset_index()
mony_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Amphitheater,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Dealership,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Bath House,Beach,Beach Bar,...,Thrift / Vintage Store,Tibetan Restaurant,Tiki Bar,Toll Plaza,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tram Station,Trattoria/Osteria,Tree,Tunnel,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Used Bookstore,Vape Store,Varenyky restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volcano,Volleyball Court,Warehouse Store,Waste Facility,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"""Мосрентген""",0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
1,Allerton,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.096774,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
2,Annadale,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.100,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.100000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
3,Arden Heights,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
4,Arlington,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429,Южное Тушино,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.25,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
430,Южнопортовый,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.052632,0.0,0.000000,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
431,Якиманка,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0
432,Ярославский,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.00,0.0,0.0,0.0,0.0,0.000000,0.0,0.076923,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.000000,0.0


#### Let's confirm the new size


In [70]:
mony_grouped.shape

(436, 480)

#### Let's put that into a _pandas_ dataframe


First, let's write a function to sort the venues in descending order.


In [23]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.


In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = mony_grouped['Neighborhood']

for ind in np.arange(mony_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(mony_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"""Мосрентген""",Middle Eastern Restaurant,Café,Yoga Studio,Exhibit,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
1,Allerton,Pizza Place,Bakery,Spa,Chinese Restaurant,Deli / Bodega,Supermarket,Bus Station,Donut Shop,Grocery Store,Fast Food Restaurant
2,Annadale,Pizza Place,Bakery,American Restaurant,Train Station,Food,Diner,Cosmetics Shop,Pharmacy,Restaurant,Food Truck
3,Arden Heights,Pharmacy,Deli / Bodega,Pizza Place,Coffee Shop,Home Service,Yoga Studio,Film Studio,Eye Doctor,Fabric Shop,Factory
4,Arlington,Bus Stop,Grocery Store,Coffee Shop,ATM,Deli / Bodega,Intersection,American Restaurant,Fish & Chips Shop,Film Studio,Filipino Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
429,Южное Тушино,Auto Workshop,Hookah Bar,Park,Café,Yoga Studio,Fish & Chips Shop,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant
430,Южнопортовый,Clothing Store,Hotel,Bath House,Pharmacy,Miscellaneous Shop,Health Food Store,Bakery,Coffee Shop,Historic Site,Paper / Office Supplies Store
431,Якиманка,Café,Outdoor Gym,Arcade,Stables,Surf Spot,Theater,Science Museum,Scenic Lookout,Lounge,Coffee Shop
432,Ярославский,Big Box Store,Clothing Store,Bus Line,Food & Drink Shop,Roof Deck,Auto Dealership,Arcade,Bar,Supermarket,Wine Shop


<a id='item4'></a>


#Cluster Neighborhoods



Run _k_-means to cluster the neighborhood into 5 clusters.


In [25]:
# set number of clusters
kclusters = 9

mony_grouped_clustering = mony_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mony_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [26]:
neighborhoods_venues_sorted

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,7,"""Мосрентген""",Middle Eastern Restaurant,Café,Yoga Studio,Exhibit,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
1,0,Allerton,Pizza Place,Bakery,Spa,Chinese Restaurant,Deli / Bodega,Supermarket,Bus Station,Donut Shop,Grocery Store,Fast Food Restaurant
2,0,Annadale,Pizza Place,Bakery,American Restaurant,Train Station,Food,Diner,Cosmetics Shop,Pharmacy,Restaurant,Food Truck
3,0,Arden Heights,Pharmacy,Deli / Bodega,Pizza Place,Coffee Shop,Home Service,Yoga Studio,Film Studio,Eye Doctor,Fabric Shop,Factory
4,7,Arlington,Bus Stop,Grocery Store,Coffee Shop,ATM,Deli / Bodega,Intersection,American Restaurant,Fish & Chips Shop,Film Studio,Filipino Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
429,1,Южное Тушино,Auto Workshop,Hookah Bar,Park,Café,Yoga Studio,Fish & Chips Shop,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant
430,7,Южнопортовый,Clothing Store,Hotel,Bath House,Pharmacy,Miscellaneous Shop,Health Food Store,Bakery,Coffee Shop,Historic Site,Paper / Office Supplies Store
431,7,Якиманка,Café,Outdoor Gym,Arcade,Stables,Surf Spot,Theater,Science Museum,Scenic Lookout,Lounge,Coffee Shop
432,7,Ярославский,Big Box Store,Clothing Store,Bus Line,Food & Drink Shop,Roof Deck,Auto Dealership,Arcade,Bar,Supermarket,Wine Shop


Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.


In [27]:

mony_merged = moneighborhoods.append(nyneighborhoods)
mony_merged.reset_index(inplace=True,drop=True)
mony_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Троицкий,Киевский,55.440830,36.803100
1,Западный,Филёвский Парк,55.748210,37.427650
2,Троицкий,Новофёдоровское,55.451620,36.803570
3,Троицкий,Роговское,55.241390,36.937240
4,Новомосковский,"""Мосрентген""",55.627310,37.439560
...,...,...,...,...
447,Manhattan,Hudson Yards,40.756658,-74.000111
448,Queens,Hammels,40.587338,-73.805530
449,Queens,Bayswater,40.611322,-73.765968
450,Queens,Queensbridge,40.756091,-73.945631


In [28]:
# merge to add latitude/longitude for each neighborhood
mony_merged = mony_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

mony_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Троицкий,Киевский,55.440830,36.803100,,,,,,,,,,,
1,Западный,Филёвский Парк,55.748210,37.427650,7.0,Italian Restaurant,Gym,Ski Area,Café,Mediterranean Restaurant,Gastropub,Gym / Fitness Center,BBQ Joint,Pizza Place,Tennis Court
2,Троицкий,Новофёдоровское,55.451620,36.803570,,,,,,,,,,,
3,Троицкий,Роговское,55.241390,36.937240,7.0,Nightclub,Gastropub,Soccer Field,Farm,Yoga Studio,Fish Market,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant
4,Новомосковский,"""Мосрентген""",55.627310,37.439560,7.0,Middle Eastern Restaurant,Café,Yoga Studio,Exhibit,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
447,Manhattan,Hudson Yards,40.756658,-74.000111,7.0,Gym / Fitness Center,Café,American Restaurant,Italian Restaurant,Hotel,Park,Coffee Shop,Restaurant,Dog Run,Gym
448,Queens,Hammels,40.587338,-73.805530,4.0,Beach,Fried Chicken Joint,Gym / Fitness Center,Dog Run,Fast Food Restaurant,Bus Stop,Bus Station,Diner,Food Truck,Building
449,Queens,Bayswater,40.611322,-73.765968,7.0,Playground,Yoga Studio,Event Service,Exhibit,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market
450,Queens,Queensbridge,40.756091,-73.945631,7.0,Hotel,Baseball Field,Scenic Lookout,Basketball Court,Gym / Fitness Center,Platform,Beer Garden,Sandwich Place,Cocktail Bar,Spanish Restaurant


In [29]:
mony_merged.dropna(axis=0,how='any',inplace=True)
mony_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Западный,Филёвский Парк,55.748210,37.427650,7.0,Italian Restaurant,Gym,Ski Area,Café,Mediterranean Restaurant,Gastropub,Gym / Fitness Center,BBQ Joint,Pizza Place,Tennis Court
3,Троицкий,Роговское,55.241390,36.937240,7.0,Nightclub,Gastropub,Soccer Field,Farm,Yoga Studio,Fish Market,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant
4,Новомосковский,"""Мосрентген""",55.627310,37.439560,7.0,Middle Eastern Restaurant,Café,Yoga Studio,Exhibit,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
5,Троицкий,Вороновское,55.354850,36.970080,6.0,Tree,Yoga Studio,Fish Market,Exhibit,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market
9,Зеленоградский,Матушкино,56.007950,37.178530,1.0,Auto Workshop,Flower Shop,Yoga Studio,Fish Market,Exhibit,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant,Farm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
447,Manhattan,Hudson Yards,40.756658,-74.000111,7.0,Gym / Fitness Center,Café,American Restaurant,Italian Restaurant,Hotel,Park,Coffee Shop,Restaurant,Dog Run,Gym
448,Queens,Hammels,40.587338,-73.805530,4.0,Beach,Fried Chicken Joint,Gym / Fitness Center,Dog Run,Fast Food Restaurant,Bus Stop,Bus Station,Diner,Food Truck,Building
449,Queens,Bayswater,40.611322,-73.765968,7.0,Playground,Yoga Studio,Event Service,Exhibit,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market
450,Queens,Queensbridge,40.756091,-73.945631,7.0,Hotel,Baseball Field,Scenic Lookout,Basketball Court,Gym / Fitness Center,Platform,Beer Garden,Sandwich Place,Cocktail Bar,Spanish Restaurant


In [30]:
mony_merged.astype({'Cluster Labels': 'int32'},copy=False).dtypes
mony_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Западный,Филёвский Парк,55.748210,37.427650,7.0,Italian Restaurant,Gym,Ski Area,Café,Mediterranean Restaurant,Gastropub,Gym / Fitness Center,BBQ Joint,Pizza Place,Tennis Court
3,Троицкий,Роговское,55.241390,36.937240,7.0,Nightclub,Gastropub,Soccer Field,Farm,Yoga Studio,Fish Market,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant
4,Новомосковский,"""Мосрентген""",55.627310,37.439560,7.0,Middle Eastern Restaurant,Café,Yoga Studio,Exhibit,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
5,Троицкий,Вороновское,55.354850,36.970080,6.0,Tree,Yoga Studio,Fish Market,Exhibit,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market
9,Зеленоградский,Матушкино,56.007950,37.178530,1.0,Auto Workshop,Flower Shop,Yoga Studio,Fish Market,Exhibit,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant,Farm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
447,Manhattan,Hudson Yards,40.756658,-74.000111,7.0,Gym / Fitness Center,Café,American Restaurant,Italian Restaurant,Hotel,Park,Coffee Shop,Restaurant,Dog Run,Gym
448,Queens,Hammels,40.587338,-73.805530,4.0,Beach,Fried Chicken Joint,Gym / Fitness Center,Dog Run,Fast Food Restaurant,Bus Stop,Bus Station,Diner,Food Truck,Building
449,Queens,Bayswater,40.611322,-73.765968,7.0,Playground,Yoga Studio,Event Service,Exhibit,Eye Doctor,Fabric Shop,Factory,Falafel Restaurant,Farm,Farmers Market
450,Queens,Queensbridge,40.756091,-73.945631,7.0,Hotel,Baseball Field,Scenic Lookout,Basketball Court,Gym / Fitness Center,Platform,Beer Garden,Sandwich Place,Cocktail Bar,Spanish Restaurant


In [31]:
mony_merged.dtypes

Borough                    object
Neighborhood               object
Latitude                  float64
Longitude                 float64
Cluster Labels            float64
                           ...   
6th Most Common Venue      object
7th Most Common Venue      object
8th Most Common Venue      object
9th Most Common Venue      object
10th Most Common Venue     object
Length: 15, dtype: object

Create maps


In [32]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10) #,width='50%', height='100%', left='0%', top='0%')

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mony_merged['Latitude'], mony_merged['Longitude'], mony_merged['Neighborhood'], mony_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)


momap_clusters = folium.Map(location=[molatitude, molongitude], zoom_start=10) #,width='50%', height='100%', left='50%', top='0%')

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mony_merged['Latitude'], mony_merged['Longitude'], mony_merged['Neighborhood'], mony_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(momap_clusters)

In [33]:
#and finally visualise two maps side by side
from IPython.core.display import display, HTML

htmlmap = HTML('<iframe srcdoc="{}" style="float:left; width: {}px; height: {}px; display:inline-block; width: 49%; margin: 0 auto; border: 2px solid black"></iframe>'
           '<iframe srcdoc="{}" style="float:right; width: {}px; height: {}px; display:inline-block; width: 49%; margin: 0 auto; border: 2px solid black"></iframe>'
           .format(momap_clusters.get_root().render().replace('"', '&quot;'),500,500,
                   map_clusters.get_root().render().replace('"', '&quot;'),500,500))
display(htmlmap)