In [36]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [37]:
with open('Zipcodes_Poly.geojson') as json_data:
    philly_data = json.load(json_data)

In [38]:
philly_data = philly_data['features']

In [39]:
#import list of Philly Neighborhood names with corresponding zip code
neighborhoods = pd.read_csv('philly_neighborhoods.csv')

In [40]:
neighborhoods

Unnamed: 0,Zip Code,Neighborhood
0,19102,Rittenhouse Square
1,19105,Center City
2,19108,Callowhill
3,19110,Rittenhouse Square
4,19102,Avenue of The Arts
5,19103,Logan Square
6,19103,Fitler Square
7,19129,East Falls
8,19130,Art Museum
9,19130,Francisville


In [41]:
neighborhoods = neighborhoods.groupby(['Zip Code'])['Neighborhood'].apply(', '.join).reset_index()
neighborhoods

Unnamed: 0,Zip Code,Neighborhood
0,19102,"Rittenhouse Square, Avenue of The Arts"
1,19103,"Logan Square, Fitler Square"
2,19104,"University City, Belmont, Parkside, Powelton V..."
3,19105,Center City
4,19106,"Society Hill, Old City"
5,19107,"Washington Square West, Midtown Village, China..."
6,19108,Callowhill
7,19110,Rittenhouse Square
8,19111,"Lawndale, Castor Gardens"
9,19112,Navy Yard


In [42]:
#turn json data into pandas dataframe
column_names = ['Zip Code', 'Latitude', 'Longitude']

#instantiate json as dataframe
zip_coordinates = pd.DataFrame(columns=column_names)
zip_coordinates

Unnamed: 0,Zip Code,Latitude,Longitude


In [43]:
for data in philly_data:
    zip_code = data['properties']['CODE']
    neighborhood_latlon = data['geometry']['coordinates']
    
    #calculate the average latitude and longitude to find the centerpoint of each neighborhood
    avg_lon = sum([i[0] for i in neighborhood_latlon[0]]) / len([i[0] for i in neighborhood_latlon[0]])
    avg_lat = sum([i[1] for i in neighborhood_latlon[0]]) / len([i[1] for i in neighborhood_latlon[0]])
    zip_coordinates = zip_coordinates.append({'Zip Code': zip_code, 'Latitude': avg_lat, 'Longitude': avg_lon}, ignore_index=True)

#sets the data type for Zip Code column to integer in order to merge the Neighborhoods and Zip code Datafram
zip_coordinates['Zip Code'] = zip_coordinates['Zip Code'].astype(int) 
zip_coordinates

Unnamed: 0,Zip Code,Latitude,Longitude
0,19120,40.033817,-75.119314
1,19121,39.98511,-75.185927
2,19122,39.977956,-75.145224
3,19123,39.963869,-75.14776
4,19124,40.017222,-75.092961
5,19125,39.977076,-75.125977
6,19126,40.054479,-75.137457
7,19127,40.026074,-75.223739
8,19128,40.041703,-75.21699
9,19102,39.952719,-75.16515


In [44]:
neighborhoods = neighborhoods.merge(zip_coordinates, on='Zip Code')
neighborhoods

Unnamed: 0,Zip Code,Neighborhood,Latitude,Longitude
0,19102,"Rittenhouse Square, Avenue of The Arts",39.952719,-75.16515
1,19103,"Logan Square, Fitler Square",39.954636,-75.175518
2,19104,"University City, Belmont, Parkside, Powelton V...",39.958853,-75.196545
3,19106,"Society Hill, Old City",39.952621,-75.147081
4,19107,"Washington Square West, Midtown Village, China...",39.952426,-75.159222
5,19111,"Lawndale, Castor Gardens",40.066935,-75.072031
6,19112,Navy Yard,39.897171,-75.179071
7,19114,Torresdale,40.061797,-74.994471
8,19115,Somerton,40.083605,-75.052115
9,19116,Bustleton,40.120532,-74.997741


In [45]:
address = 'Philadelphia, PA'
geolocator = Nominatim(user_agent='philly_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Philadelphia are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Philadelphia are 39.9527237, -75.1635262.


In [46]:
# create a map of Philly and map each Neighborhood location using latitude and longitude
map_philly = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, codes,neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Zip Code'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, zip_code)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_philly)
    
map_philly

In [47]:
CLIENT_ID = 'DOPOTW2LFF0F01CX5OQETOADOCJQSP4DUQRZNNF412IMEPBQ' # your Foursquare ID
CLIENT_SECRET = 'VZQUFWD1A2IJQLBTCSPUM1YU2CYXLO4ZWZAHSHXNIYQXI3ZE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

In [48]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [49]:
philly_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Rittenhouse Square, Avenue of The Arts
Logan Square, Fitler Square
University City, Belmont, Parkside, Powelton Village
Society Hill, Old City
Washington Square West, Midtown Village, Chinatown
Lawndale, Castor Gardens
Navy Yard
Torresdale
Somerton
Bustleton
Chestnut Hill
Mount Airy
Logan, Olney, Feltonville
Brewerytown
Yorktown, Old Kensington, Jinogi
Northern Liberties, Loft District
Juniata, Frankford, Feltonville
Fishtown, Kensington
Oak Lane
Manayunk
Roxborough
East Falls
Art Museum, Francisville
Wynnefield
Strawberry Mansion
Fairhill, North Philadelphia
Port Richmond
Tacony
Holmesburg
Bridesburg
West Oak Lane
Walnut Hill/West
Hunting Park
Logan
Elmwood, Southwest Philadelphia
University City
Germantown
Passyunk
Graduate Hospital, Naval Square, Southwest Center City
Queen Village, Bella Vista, Pennsport, Italian Market
Whitman, Pennsport, South Philadelphia
Frankford, Mayfair
Cedarbrook
Overbrook, Overbrook Farms, Overbrook Park
Rhawnhurst
Eastwick
Parkwood, Byberry


In [50]:
print(philly_venues.shape)
philly_venues.head()

(1030, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rittenhouse Square, Avenue of The Arts",39.952719,-75.16515,Dilworth Park,39.952772,-75.164723,Park
1,"Rittenhouse Square, Avenue of The Arts",39.952719,-75.16515,La Colombe Coffee Roasters,39.951659,-75.165238,Coffee Shop
2,"Rittenhouse Square, Avenue of The Arts",39.952719,-75.16515,City Hall Courtyard,39.952484,-75.163592,Plaza
3,"Rittenhouse Square, Avenue of The Arts",39.952719,-75.16515,Del Frisco's Double Eagle Steak House,39.950956,-75.165459,Steakhouse
4,"Rittenhouse Square, Avenue of The Arts",39.952719,-75.16515,JFK Plaza / Love Park,39.954123,-75.165303,Plaza


In [51]:
philly_venues.groupby('Venue Category').count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ATM,1,1,1,1,1,1
Accessories Store,1,1,1,1,1,1
African Restaurant,1,1,1,1,1,1
American Restaurant,15,15,15,15,15,15
Art Gallery,15,15,15,15,15,15
Art Museum,4,4,4,4,4,4
Arts & Crafts Store,5,5,5,5,5,5
Arts & Entertainment,1,1,1,1,1,1
Asian Restaurant,9,9,9,9,9,9
Athletics & Sports,5,5,5,5,5,5


In [52]:
philly_venues = philly_venues[(philly_venues['Venue Category'].str.contains('Restaurant')) | (philly_venues['Venue Category'].str.contains('Steakhouse')) | (philly_venues['Venue Category'].str.contains('Pizza'))]
philly_venues.groupby('Venue Category').count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
African Restaurant,1,1,1,1,1,1
American Restaurant,15,15,15,15,15,15
Asian Restaurant,9,9,9,9,9,9
Brazilian Restaurant,2,2,2,2,2,2
Cajun / Creole Restaurant,1,1,1,1,1,1
Caribbean Restaurant,3,3,3,3,3,3
Chinese Restaurant,18,18,18,18,18,18
Colombian Restaurant,1,1,1,1,1,1
Cuban Restaurant,2,2,2,2,2,2
Eastern European Restaurant,1,1,1,1,1,1


In [53]:
# one hot encoding
philly_onehot = pd.get_dummies(philly_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
philly_onehot['Neighborhood'] = philly_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [philly_onehot.columns[-1]] + list(philly_onehot.columns[:-1])
philly_onehot = philly_onehot[fixed_columns]

philly_onehot

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Asian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Cuban Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jewish Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Pizza Place,Restaurant,Seafood Restaurant,Shanghai Restaurant,Southern / Soul Food Restaurant,Steakhouse,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
3,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
13,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
15,"Rittenhouse Square, Avenue of The Arts",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16,"Rittenhouse Square, Avenue of The Arts",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
22,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
24,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
27,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
31,"Rittenhouse Square, Avenue of The Arts",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [54]:
philly_grouped = philly_onehot.groupby('Neighborhood').mean().reset_index()
philly_grouped

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Asian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Cuban Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jewish Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Pizza Place,Restaurant,Seafood Restaurant,Shanghai Restaurant,Southern / Soul Food Restaurant,Steakhouse,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"Art Museum, Francisville",0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.214286,0.0,0.0,0.0,0.071429,0.0,0.071429,0.071429,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0
1,Bridesburg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,East Falls,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Elmwood, Southwest Philadelphia",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Fairhill, North Philadelphia",0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Fishtown, Kensington",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.285714,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Frankford, Mayfair",0.0,0.125,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.25,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Germantown,0.0,0.166667,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Graduate Hospital, Naval Square, Southwest Cen...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
9,Hunting Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
num_top_venues = 5

for hood in philly_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = philly_grouped[philly_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Art Museum, Francisville----
                       venue  freq
0         Italian Restaurant  0.21
1           Sushi Restaurant  0.14
2  Latin American Restaurant  0.07
3          French Restaurant  0.07
4           Asian Restaurant  0.07


----Bridesburg----
                      venue  freq
0               Pizza Place   1.0
1        African Restaurant   0.0
2        Seafood Restaurant   0.0
3          Malay Restaurant   0.0
4  Mediterranean Restaurant   0.0


----East Falls----
                      venue  freq
0         Indian Restaurant   1.0
1        African Restaurant   0.0
2         Korean Restaurant   0.0
3          Malay Restaurant   0.0
4  Mediterranean Restaurant   0.0


----Elmwood, Southwest Philadelphia----
                      venue  freq
0        African Restaurant   1.0
1        Seafood Restaurant   0.0
2          Malay Restaurant   0.0
3  Mediterranean Restaurant   0.0
4        Mexican Restaurant   0.0


----Fairhill, North Philadelphia----
                      

In [56]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [57]:
num_top_venues = 3

for hood in philly_grouped['Neighborhood']:
    temp = philly_grouped[philly_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = philly_grouped['Neighborhood']

for ind in np.arange(philly_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(philly_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,"Art Museum, Francisville",Italian Restaurant,Sushi Restaurant,Pizza Place
1,Bridesburg,Pizza Place,Vietnamese Restaurant,Japanese Restaurant
2,East Falls,Indian Restaurant,Vietnamese Restaurant,Ethiopian Restaurant
3,"Elmwood, Southwest Philadelphia",African Restaurant,Ethiopian Restaurant,Israeli Restaurant
4,"Fairhill, North Philadelphia",Chinese Restaurant,Fast Food Restaurant,Seafood Restaurant


In [58]:
# set number of clusters
kclusters = 5

philly_grouped_clustering = philly_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(philly_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 4, 1, 1, 1, 1, 1, 1, 1, 4])

In [59]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

philly_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
philly_merged = philly_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
philly_merged = philly_merged.dropna()
philly_merged['Cluster Labels'] = philly_merged['Cluster Labels'].astype(int)

philly_merged.head(20) # check the last columns!

Unnamed: 0,Zip Code,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,19102,"Rittenhouse Square, Avenue of The Arts",39.952719,-75.16515,1,Seafood Restaurant,American Restaurant,Mediterranean Restaurant
1,19103,"Logan Square, Fitler Square",39.954636,-75.175518,1,American Restaurant,Thai Restaurant,Sushi Restaurant
2,19104,"University City, Belmont, Parkside, Powelton V...",39.958853,-75.196545,4,Pizza Place,Japanese Restaurant,Korean Restaurant
3,19106,"Society Hill, Old City",39.952621,-75.147081,1,New American Restaurant,Pizza Place,American Restaurant
4,19107,"Washington Square West, Midtown Village, China...",39.952426,-75.159222,1,Chinese Restaurant,Shanghai Restaurant,Mediterranean Restaurant
5,19111,"Lawndale, Castor Gardens",40.066935,-75.072031,4,Pizza Place,Chinese Restaurant,Vietnamese Restaurant
6,19112,Navy Yard,39.897171,-75.179071,4,Italian Restaurant,Pizza Place,Japanese Restaurant
8,19115,Somerton,40.083605,-75.052115,3,Asian Restaurant,Vietnamese Restaurant,Ethiopian Restaurant
11,19119,Mount Airy,40.052161,-75.195672,1,Italian Restaurant,Indian Restaurant,Pizza Place
12,19120,"Logan, Olney, Feltonville",40.033817,-75.119314,1,Chinese Restaurant,Seafood Restaurant,Korean Restaurant


In [60]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(philly_merged['Latitude'], philly_merged['Longitude'], philly_merged['Neighborhood'], philly_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters), 
       
map_clusters

In [61]:
cluster1 = philly_merged.loc[philly_merged['Cluster Labels'] == 0, philly_merged.columns[[1] + list(range(5, philly_merged.shape[1]))]]
cluster1

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
14,"Yorktown, Old Kensington, Jinogi",Restaurant,Colombian Restaurant,Ethiopian Restaurant
16,"Juniata, Frankford, Feltonville",Restaurant,Japanese Restaurant,Israeli Restaurant


In [62]:
cluster2 = philly_merged.loc[philly_merged['Cluster Labels'] == 1, philly_merged.columns[[1] + list(range(5, philly_merged.shape[1]))]]
cluster2

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,"Rittenhouse Square, Avenue of The Arts",Seafood Restaurant,American Restaurant,Mediterranean Restaurant
1,"Logan Square, Fitler Square",American Restaurant,Thai Restaurant,Sushi Restaurant
3,"Society Hill, Old City",New American Restaurant,Pizza Place,American Restaurant
4,"Washington Square West, Midtown Village, China...",Chinese Restaurant,Shanghai Restaurant,Mediterranean Restaurant
11,Mount Airy,Italian Restaurant,Indian Restaurant,Pizza Place
12,"Logan, Olney, Feltonville",Chinese Restaurant,Seafood Restaurant,Korean Restaurant
15,"Northern Liberties, Loft District",Restaurant,American Restaurant,Asian Restaurant
17,"Fishtown, Kensington",Pizza Place,Eastern European Restaurant,Korean Restaurant
19,Manayunk,New American Restaurant,Mexican Restaurant,Italian Restaurant
21,East Falls,Indian Restaurant,Vietnamese Restaurant,Ethiopian Restaurant


In [63]:
cluster3 = philly_merged.loc[philly_merged['Cluster Labels'] == 2, philly_merged.columns[[1] + list(range(5, philly_merged.shape[1]))]]
cluster3

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
24,Strawberry Mansion,Seafood Restaurant,Vietnamese Restaurant,Ethiopian Restaurant
30,West Oak Lane,Caribbean Restaurant,Seafood Restaurant,Vietnamese Restaurant
40,"Whitman, Pennsport, South Philadelphia",Seafood Restaurant,Vietnamese Restaurant,Ethiopian Restaurant


In [64]:
cluster4 = philly_merged.loc[philly_merged['Cluster Labels'] == 3, philly_merged.columns[[1] + list(range(5, philly_merged.shape[1]))]]
cluster4

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
8,Somerton,Asian Restaurant,Vietnamese Restaurant,Ethiopian Restaurant


In [65]:
cluster5 = philly_merged.loc[philly_merged['Cluster Labels'] == 4, philly_merged.columns[[1] + list(range(5, philly_merged.shape[1]))]]
cluster5

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
2,"University City, Belmont, Parkside, Powelton V...",Pizza Place,Japanese Restaurant,Korean Restaurant
5,"Lawndale, Castor Gardens",Pizza Place,Chinese Restaurant,Vietnamese Restaurant
6,Navy Yard,Italian Restaurant,Pizza Place,Japanese Restaurant
18,Oak Lane,Pizza Place,Vietnamese Restaurant,Japanese Restaurant
26,Port Richmond,Pizza Place,Chinese Restaurant,Vietnamese Restaurant
27,Tacony,Fast Food Restaurant,Pizza Place,Vietnamese Restaurant
29,Bridesburg,Pizza Place,Vietnamese Restaurant,Japanese Restaurant
31,Walnut Hill/West,Fast Food Restaurant,Pizza Place,Vietnamese Restaurant
32,Hunting Park,Pizza Place,Restaurant,Seafood Restaurant
44,Rhawnhurst,Pizza Place,Restaurant,Italian Restaurant
