# __Relocation Code - IBM Capstone Project__

In [4]:
# !conda install -c conda-forge geocoder --yes

In [5]:
# Bringing in the necessary libraries
import folium # map rendering library
import geocoder # import geocoder
import json # library to handle JSON files
import matplotlib.cm as cm # Matplotlib and associated plotting modules
import matplotlib.colors as colors
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import requests # library to handle requests
import urllib.request as urlreq

from bs4 import BeautifulSoup as beaut
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from geopy.extra.rate_limiter import RateLimiter
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from sklearn.cluster import KMeans # Import k-means from clustering stage

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
geolocator = Nominatim(user_agent="tsytsik@gmail.com")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1,  max_retries=2)

print('Libraries imported.')

Libraries imported.


In [6]:
bosUrl = 'https://en.wikipedia.org/wiki/Neighborhoods_in_Boston'
bosPage = urlreq.urlopen(bosUrl)
bosSoup = beaut(bosPage, 'html.parser')

denUrl = 'https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Denver'
denPage = urlreq.urlopen(denUrl)
denSoup = beaut(denPage, 'html.parser')

seaUrl = 'https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Seattle'
seaPage = urlreq.urlopen(seaUrl)
seaSoup = beaut(seaPage, 'html.parser')

In [7]:
## BOSTON PARSING SECTION
bosTable = bosSoup.find('ul')
#print(bosTable)

#Looping through the HTML to scrape the essential data
bosList=[]
for row in bosTable.find_all('a'):
    bosList.append(row.find(text=True))
#bosList

In [8]:
## DENVER PARSING SECTION
denTable = denSoup.find_all('ul', class_=False)
# print(denTable)

#Looping through the HTML to scrape the essential data
denList=[]
for i in range(1, 9):
    for row in denTable[i].find_all('li'):
        item = row.a['title'].split(',')[0]
        item = item.split('/')[0]
        denList.append(item)
#denList

In [9]:
## SEATTLE PARSING SECTION
seaTable = seaSoup.find('table', class_='wikitable sortable')
# print(seaTable)

#Looping through the HTML to scrape the essential data
seaList=[]
for sect in seaTable.find_all('tr'):
    seaSub = sect.find('td')
    if seaSub != None:
        seaList.append(seaSub.find(text=True)) 
#seaList

In [10]:
## Converting all of the neighborhood lists into dataframes
bos = pd.DataFrame(bosList, columns=['Hood'])
bos['City'] = 'Boston'
bos['State'] = 'Massachusetts'
bos['Name'] = bos[['Hood', 'City', 'State']].agg(', '.join, axis=1)
bos2 = pd.DataFrame(['Cambridge','Somerville','Brookline','Chelsea','Everett','Medford','Malden','Arlington','Belmont','Watertown','Revere','Winthrop'], columns=['Hood'])
bos2['City'] = ''
bos2['State'] = 'Massachusetts'
bos2['Name'] = bos2[['Hood', 'City', 'State']].agg(', '.join, axis=1)

den = pd.DataFrame(denList, columns=['Hood'])
den['City'] = 'Denver'
den['State'] = 'CO'
den['Name'] = den[['Hood', 'City', 'State']].agg(', '.join, axis=1)

sea = pd.DataFrame(seaList, columns=['Hood'])
sea['City'] = 'Seattle'
sea['State'] = 'WA'
sea['Name'] = sea[['Hood', 'City', 'State']].agg(', '.join, axis=1)
sea['Hood'] = sea['Hood'].str.replace('\n', '')

# Checking their shape
print(bos.shape)
print(bos2.shape)
print(den.shape)
print(sea.shape)

(24, 4)
(12, 4)
(69, 4)
(127, 4)


In [11]:
## Calling the geocoding data for Boston
bos['Location_Detail'] = bos['Name'].apply(geocode)

#Creating latitude and longtitude columns
bos['Latitude'] = bos['Location_Detail'].apply(lambda loc: float(loc.latitude) if loc else None)
bos['Longitude'] = bos['Location_Detail'].apply(lambda loc: float(loc.longitude) if loc else None)

#Checking the data
bos.head()

Unnamed: 0,Hood,City,State,Name,Location_Detail,Latitude,Longitude
0,Allston,Boston,Massachusetts,"Allston, Boston, Massachusetts","(Allston, Boston, Suffolk County, Massachusett...",42.355434,-71.132127
1,Back Bay,Boston,Massachusetts,"Back Bay, Boston, Massachusetts","(Back Bay, Boston, Suffolk County, Massachuset...",42.350707,-71.07973
2,Bay Village,Boston,Massachusetts,"Bay Village, Boston, Massachusetts","(Bay Village, Oak Street, Chinatown, Financial...",42.350011,-71.066948
3,Beacon Hill,Boston,Massachusetts,"Beacon Hill, Boston, Massachusetts","(Beacon Hill, Boston, Suffolk County, Massachu...",42.358708,-71.067829
4,Brighton,Boston,Massachusetts,"Brighton, Boston, Massachusetts","(Brighton, Boston, Suffolk County, Massachuset...",42.350097,-71.156442


In [12]:
## Calling the geocoding data for Boston2
bos2['Location_Detail'] = bos2['Name'].apply(geocode)

#Creating latitude and longtitude columns
bos2['Latitude'] = bos2['Location_Detail'].apply(lambda loc: float(loc.latitude) if loc else None)
bos2['Longitude'] = bos2['Location_Detail'].apply(lambda loc: float(loc.longitude) if loc else None)

#Checking the data
bos2.head()

Unnamed: 0,Hood,City,State,Name,Location_Detail,Latitude,Longitude
0,Cambridge,,Massachusetts,"Cambridge, , Massachusetts","(Cambridge, Middlesex County, Massachusetts, U...",42.3751,-71.105616
1,Somerville,,Massachusetts,"Somerville, , Massachusetts","(Somerville, Middlesex County, Massachusetts, ...",42.387597,-71.099497
2,Brookline,,Massachusetts,"Brookline, , Massachusetts","(Brookline, Massachusetts, United States of Am...",42.331764,-71.121163
3,Chelsea,,Massachusetts,"Chelsea, , Massachusetts","(Chelsea, Suffolk County, Massachusetts, 02150...",42.391764,-71.032828
4,Everett,,Massachusetts,"Everett, , Massachusetts","(Everett, Middlesex County, Massachusetts, Uni...",42.40843,-71.053663


In [13]:
## Calling the geocoding data for Denver
den['Location_Detail'] = den['Name'].apply(geocode)

#Creating latitude and longtitude columns
den['Latitude'] = den['Location_Detail'].apply(lambda loc: float(loc.latitude) if loc else None)
den['Longitude'] = den['Location_Detail'].apply(lambda loc: float(loc.longitude) if loc else None)

#Checking the data
den.head()

Unnamed: 0,Hood,City,State,Name,Location_Detail,Latitude,Longitude
0,Baker,Denver,CO,"Baker, Denver, CO","(Baker, Denver, Denver County, Colorado, Unite...",39.711595,-104.99375
1,Capitol Hill,Denver,CO,"Capitol Hill, Denver, CO","(Capitol Hill, Denver, Denver County, Colorado...",39.735875,-104.979921
2,Central Business District,Denver,CO,"Central Business District, Denver, CO","(Central Business District, Denver, Denver Cou...",39.747378,-104.992737
3,Cherry Creek,Denver,CO,"Cherry Creek, Denver, CO","(Cherry Creek, Denver, Denver County, Colorado...",39.66361,-104.877444
4,Cheesman Park,Denver,CO,"Cheesman Park, Denver, CO","(Cheesman Park, 1599, Cheesman Park, Denver, D...",39.732814,-104.966455


In [14]:
## Calling the geocoding data for Seattle
sea['Location_Detail'] = sea['Name'].apply(geocode)

#Creating latitude and longtitude columns
sea['Latitude'] = sea['Location_Detail'].apply(lambda loc: float(loc.latitude) if loc else None)
sea['Longitude'] = sea['Location_Detail'].apply(lambda loc: float(loc.longitude) if loc else None)

#Checking the data
sea.head()

Unnamed: 0,Hood,City,State,Name,Location_Detail,Latitude,Longitude
0,North Seattle,Seattle,WA,"North Seattle, Seattle, WA","(N, 4200, Mary Gates Memorial Drive Northeast,...",47.660773,-122.291497
1,Broadview,Seattle,WA,"Broadview, Seattle, WA","(Broadview, Seattle, King County, Washington, ...",47.72232,-122.360407
2,Bitter Lake,Seattle,WA,"Bitter Lake, Seattle, WA","(Bitter Lake, Seattle, King County, Washington...",47.726236,-122.348764
3,North Beach,Seattle,WA,"North Beach, Seattle, WA","(North Beach, Seattle, King County, Washington...",47.69621,-122.392362
4,Crown Hill,Seattle,WA,"Crown Hill, Seattle, WA","(Crown Hill, Seattle, King County, Washington,...",47.694715,-122.371459


In [15]:
## A section of the code that was going to be used for add rent information to the neghborhoods
## Removed in the interest of time
# Importing the rental information csv
#rentInfoRaw = pd.read_csv("Neighborhood_Zri_AllHomesPlusMultifamily_Summary.csv")
#rentInfoRaw.head()
#rentInfo = rentInfoRaw[(rentInfoRaw.State.isin(['MA','CO','WA']) & rentInfoRaw.City.isin(['Boston','Denver','Seattle'])) | (rentInfoRaw.State.isin(['MA']) & rentInfoRaw.City.isin(bos2['Hood']))].reset_index()
#rentInfo.rename(columns={'RegionName': 'Hood'}, inplace=True)
#rentInfo[rentInfo['State']=='MA'].sort_values(by='Hood')
#rentInfo.head()

In [16]:
## Combining bos and bos2
bos = bos.append(bos2, ignore_index=True)

## Removing geocoding sites that are not around their respective city
bosClean = bos[(bos['Latitude'] <= np.percentile(bos['Latitude'].dropna().values, 100))
                & (bos['Latitude'] >= np.percentile(bos['Latitude'].dropna().values, 0))
                & (bos['Longitude'] <= np.percentile(bos['Longitude'].dropna().values, 100))
                & (bos['Longitude'] >= np.percentile(bos['Longitude'].dropna().values, 0))]

denClean = den[(den['Latitude'] <= np.percentile(den['Latitude'].dropna().values, 90))
                & (den['Latitude'] >= np.percentile(den['Latitude'].dropna().values, 10))
                & (den['Longitude'] <= np.percentile(den['Longitude'].dropna().values, 90))
                & (den['Longitude'] >= np.percentile(den['Longitude'].dropna().values, 10))]

seaClean = sea[(sea['Latitude'] <= np.percentile(sea['Latitude'].dropna().values, 95))
                & (sea['Latitude'] >= np.percentile(sea['Latitude'].dropna().values, 5))
                & (sea['Longitude'] <= np.percentile(sea['Longitude'].dropna().values, 95))
                & (sea['Longitude'] >= np.percentile(sea['Longitude'].dropna().values, 5))]

totClean = bosClean.append([denClean, seaClean])

In [17]:
# Create map of Boston using latitude and longitude values
bosMap = folium.Map(location=[bosClean['Latitude'].mean(), bosClean['Longitude'].mean()], zoom_start=12)

# add markers to map
for lat, lng, hood in zip(bosClean['Latitude'], bosClean['Longitude'], bosClean['Hood']):
    label = '{}'.format(hood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(bosMap)  

bosMap

In [18]:
# Create map of Denver using latitude and longitude values
denMap = folium.Map(location=[denClean['Latitude'].mean(), denClean['Longitude'].mean()], zoom_start=11)

# add markers to map
for lat, lng, hood in zip(denClean['Latitude'], denClean['Longitude'], denClean['Hood']):
    label = '{}'.format(hood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(denMap)  

denMap

In [19]:
# Create map of Seattle using latitude and longitude values
seaMap = folium.Map(location=[seaClean['Latitude'].mean(), seaClean['Longitude'].mean()], zoom_start=12)

# add markers to map
for lat, lng, hood in zip(seaClean['Latitude'], seaClean['Longitude'], seaClean['Hood']):
    label = '{}'.format(hood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(seaMap)  

seaMap

In [20]:
## Defining functions for ease of access to the Foursquare API
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Hood', 
                  'Hood Latitude', 
                  'Hood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [21]:
## Credential variables
CLIENT_ID = '0CF1WVVT3IUF4XDKK3RQDJNWB5NTQIKT1YA2YZ5DGJMRPIIJ' # your Foursquare ID
CLIENT_SECRET = 'TMT0GGJNQJZBF1HG2WDHKSIYA4LGAC55GWHBKDFRDZKON0PZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

In [22]:
## Grabbing the nearby venues for the neighborhoods
bosVenues = getNearbyVenues(names= bosClean['Hood'], latitudes= bosClean['Latitude'], longitudes= bosClean['Longitude'])
denVenues = getNearbyVenues(names= denClean['Hood'], latitudes= denClean['Latitude'], longitudes= denClean['Longitude'])
seaVenues = getNearbyVenues(names= seaClean['Hood'], latitudes= seaClean['Latitude'], longitudes= seaClean['Longitude'])
totVenues = bosVenues.append([denVenues, seaVenues])
print('Boston shape: ', bosVenues.shape)
print('Denver shape: ', denVenues.shape)
print('Seattle shape: ', seaVenues.shape)
print('Total shape: ', totVenues.shape)
bosVenues.head()

Boston shape:  (1242, 7)
Denver shape:  (1105, 7)
Seattle shape:  (3171, 7)
Total shape:  (5518, 7)


Unnamed: 0,Hood,Hood Latitude,Hood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Allston,42.355434,-71.132127,Lulu's Allston,42.355068,-71.134107,Comfort Food Restaurant
1,Allston,42.355434,-71.132127,Kaju Tofu House,42.354329,-71.132374,Korean Restaurant
2,Allston,42.355434,-71.132127,Azama Grill,42.354422,-71.132358,Falafel Restaurant
3,Allston,42.355434,-71.132127,Fish Market Sushi Bar,42.353039,-71.132975,Sushi Restaurant
4,Allston,42.355434,-71.132127,Allston Diner,42.354979,-71.134295,Diner


In [23]:
## One hot encoding
# Boston iteration
bosOnehot = pd.get_dummies(bosVenues[['Venue Category']], prefix="", prefix_sep="") # applying dummy varaibles
bosOnehot['Hood'] = bosVenues['Hood'] # add neighborhood column back to dataframe
fixedColumns = [bosOnehot.columns[-1]] + list(bosOnehot.columns[:-1]) # move neighborhood column to the first column
bosOnehot = bosOnehot[fixedColumns]
bosGrouped = bosOnehot.groupby('Hood').mean().reset_index()
bosGrouped.head()

# Denver iteration
denOnehot = pd.get_dummies(denVenues[['Venue Category']], prefix="", prefix_sep="") # applying dummy varaibles
denOnehot['Hood'] = denVenues['Hood'] # add neighborhood column back to dataframe
fixedColumns = [denOnehot.columns[-1]] + list(denOnehot.columns[:-1]) # move neighborhood column to the first column
denOnehot = denOnehot[fixedColumns]
denGrouped = denOnehot.groupby('Hood').mean().reset_index()
denGrouped.head()

# Seattle Iteration
seaOnehot = pd.get_dummies(seaVenues[['Venue Category']], prefix="", prefix_sep="") # applying dummy varaibles
seaOnehot['Hood'] = seaVenues['Hood'] # add neighborhood column back to dataframe
fixedColumns = [seaOnehot.columns[-1]] + list(seaOnehot.columns[:-1]) # move neighborhood column to the first column
seaOnehot = seaOnehot[fixedColumns]
seaGrouped = seaOnehot.groupby('Hood').mean().reset_index()
seaGrouped.head()

# Total Iteration
totOnehot = pd.get_dummies(totVenues[['Venue Category']], prefix="", prefix_sep="") # applying dummy varaibles
totOnehot['Hood'] = totVenues['Hood'] # add neighborhood column back to dataframe
fixedColumns = [totOnehot.columns[-1]] + list(totOnehot.columns[:-1]) # move neighborhood column to the first column
totOnehot = totOnehot[fixedColumns]
totGrouped = totOnehot.groupby('Hood').mean().reset_index()
totGrouped.head()

Unnamed: 0,Hood,ATM,Accessories Store,African Restaurant,Alternative Healer,American Restaurant,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Auto Dealership,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat Rental,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Green,Boxing Gym,Brasserie,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burmese Restaurant,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Caribbean Restaurant,Cemetery,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Churrascaria,Clothing Store,Cocktail Bar,Coffee Shop,College Science Building,College Theater,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit,Eye Doctor,Factory,Fair,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Financial or Legal Service,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Golf Driving Range,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Herbs & Spices Store,Historic Site,History Museum,Hockey Arena,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hot Spring,Hotel,Hotel Bar,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Knitting Store,Korean Restaurant,Lake,Latin American Restaurant,Laundry Service,Lawyer,Lebanese Restaurant,Library,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Marijuana Dispensary,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Meze Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Motorsports Shop,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon,National Park,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Nightlife,Other Repair Shop,Outdoor Sculpture,Outdoor Supply Store,Paper / Office Supplies Store,Park,Pastry Shop,Pawn Shop,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Pet Service,Pet Store,Pharmacy,Piano Bar,Pie Shop,Pier,Pilates Studio,Pizza Place,Planetarium,Platform,Playground,Plaza,Poke Place,Pool,Pool Hall,Post Office,Pub,Public Art,Radio Station,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Resort,Restaurant,Rock Club,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Science Museum,Sculpture Garden,Seafood Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Ski Chalet,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Social Club,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Stationery Store,Steakhouse,Storage Facility,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Szechuan Restaurant,Taco Place,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trade School,Trail,Train Station,Tram Station,Travel Lounge,Tunnel,Turkish Restaurant,Udon Restaurant,Vacation Rental,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Adams,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Allston,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.035714,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.02381,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.02381,0.0,0.0,0.0,0.0,0.02381,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.02381,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0
2,Arlington,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.068182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.068182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Atlantic,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Auraria,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.0,0.037037,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.074074,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
numTopVenues = 5

for hood in bosGrouped['Hood']:
    # print("----"+hood+"----")
    temp = bosGrouped[bosGrouped['Hood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    # print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(numTopVenues))
    # print('\n')
    
for hood in denGrouped['Hood']:
    # print("----"+hood+"----")
    temp = denGrouped[denGrouped['Hood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    # print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(numTopVenues))
    # print('\n')
    
for hood in seaGrouped['Hood']:
    # print("----"+hood+"----")
    temp = seaGrouped[seaGrouped['Hood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    # print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(numTopVenues))
    # print('\n')
    
for hood in totGrouped['Hood']:
    # print("----"+hood+"----")
    temp = totGrouped[totGrouped['Hood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    # print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(numTopVenues))
    # print('\n')

In [25]:
## Creating a top 10 type array
numTopVenues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Hood']
for ind in np.arange(numTopVenues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new Boston dataframe
bosHoodsVenuesSorted = pd.DataFrame(columns=columns)
bosHoodsVenuesSorted['Hood'] = bosGrouped['Hood']

for ind in np.arange(bosGrouped.shape[0]):
    bosHoodsVenuesSorted.iloc[ind, 1:] = return_most_common_venues(bosGrouped.iloc[ind, :], numTopVenues)
    
# create a new Denver dataframe
denHoodsVenuesSorted = pd.DataFrame(columns=columns)
denHoodsVenuesSorted['Hood'] = denGrouped['Hood']

for ind in np.arange(denGrouped.shape[0]):
    denHoodsVenuesSorted.iloc[ind, 1:] = return_most_common_venues(denGrouped.iloc[ind, :], numTopVenues)
    
# create a new Seattle dataframe
seaHoodsVenuesSorted = pd.DataFrame(columns=columns)
seaHoodsVenuesSorted['Hood'] = seaGrouped['Hood']

for ind in np.arange(seaGrouped.shape[0]):
    seaHoodsVenuesSorted.iloc[ind, 1:] = return_most_common_venues(seaGrouped.iloc[ind, :], numTopVenues)

# create a new Total dataframe
totHoodsVenuesSorted = pd.DataFrame(columns=columns)
totHoodsVenuesSorted['Hood'] = totGrouped['Hood']

for ind in np.arange(totGrouped.shape[0]):
    totHoodsVenuesSorted.iloc[ind, 1:] = return_most_common_venues(totGrouped.iloc[ind, :], numTopVenues)
    
bosHoodsVenuesSorted.head()

Unnamed: 0,Hood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allston,Korean Restaurant,Vegetarian / Vegan Restaurant,Dance Studio,Pizza Place,Chinese Restaurant,Bakery,Thai Restaurant,Pharmacy,Asian Restaurant,Indian Restaurant
1,Arlington,Pizza Place,Mexican Restaurant,Coffee Shop,Park,Liquor Store,Pharmacy,Burger Joint,Gastropub,Jewelry Store,Sandwich Place
2,Back Bay,Coffee Shop,Seafood Restaurant,Hotel,Italian Restaurant,Cycle Studio,Burger Joint,Shopping Mall,Juice Bar,Bar,Chocolate Shop
3,Bay Village,Sandwich Place,Bakery,Spa,Hotel,Performing Arts Venue,Theater,Italian Restaurant,Seafood Restaurant,Scenic Lookout,Boat or Ferry
4,Beacon Hill,Italian Restaurant,Pizza Place,French Restaurant,Gift Shop,Hotel,Sushi Restaurant,Mexican Restaurant,Bar,Kids Store,Plaza


In [26]:
# set number of clusters
kClusters = 7
totKClusters = kClusters * 3
bosGroupedClustering = bosGrouped.drop('Hood', axis=1)
bosKmeans = KMeans(n_clusters=kClusters, random_state=0).fit(bosGroupedClustering)

denGroupedClustering = denGrouped.drop('Hood', axis=1)
denKmeans = KMeans(n_clusters=kClusters, random_state=0).fit(denGroupedClustering)

seaGroupedClustering = seaGrouped.drop('Hood', axis=1)
seaKmeans = KMeans(n_clusters=kClusters, random_state=0).fit(seaGroupedClustering)

totGroupedClustering = totGrouped.drop('Hood', axis=1)
totKmeans = KMeans(n_clusters=totKClusters, random_state=0).fit(totGroupedClustering)

In [27]:
# add clustering labels

bosMerged = bosClean
bosHoodsVenuesSorted.insert(0, 'Cluster Labels', bosKmeans.labels_)

# merge bostonGrouped with bosClean to add latitude/longitude for each neighborhood
bosMerged = bosMerged.join(bosHoodsVenuesSorted.set_index('Hood'), on='Hood')

# Convert floats to ints, necessary later in the code
bosMerged['Cluster Labels'] = bosMerged['Cluster Labels'].fillna(0.0).astype(int)


denMerged = denClean
denHoodsVenuesSorted.insert(0, 'Cluster Labels', denKmeans.labels_)

# merge denverGrouped with denClean to add latitude/longitude for each neighborhood
denMerged = denMerged.join(denHoodsVenuesSorted.set_index('Hood'), on='Hood')

# Convert floats to ints, necessary later in the code
denMerged['Cluster Labels'] = denMerged['Cluster Labels'].fillna(0.0).astype(int)


seaMerged = seaClean
seaHoodsVenuesSorted.insert(0, 'Cluster Labels', seaKmeans.labels_)

# merge seattleGrouped with seaClean to add latitude/longitude for each neighborhood
seaMerged = seaMerged.join(seaHoodsVenuesSorted.set_index('Hood'), on='Hood')

# Convert floats to ints, necessary later in the code
seaMerged['Cluster Labels'] = seaMerged['Cluster Labels'].fillna(0.0).astype(int)


totMerged = totClean
totHoodsVenuesSorted.insert(0, 'Cluster Labels', totKmeans.labels_)

# merge totGrouped with totClean to add latitude/longitude for each neighborhood
totMerged = totMerged.join(totHoodsVenuesSorted.set_index('Hood'), on='Hood')

# Convert floats to ints, necessary later in the code
totMerged['Cluster Labels'] = totMerged['Cluster Labels'].fillna(0.0).astype(int)

bosMerged.head() # check the last columns!

Unnamed: 0,Hood,City,State,Name,Location_Detail,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allston,Boston,Massachusetts,"Allston, Boston, Massachusetts","(Allston, Boston, Suffolk County, Massachusett...",42.355434,-71.132127,2,Korean Restaurant,Vegetarian / Vegan Restaurant,Dance Studio,Pizza Place,Chinese Restaurant,Bakery,Thai Restaurant,Pharmacy,Asian Restaurant,Indian Restaurant
1,Back Bay,Boston,Massachusetts,"Back Bay, Boston, Massachusetts","(Back Bay, Boston, Suffolk County, Massachuset...",42.350707,-71.07973,2,Coffee Shop,Seafood Restaurant,Hotel,Italian Restaurant,Cycle Studio,Burger Joint,Shopping Mall,Juice Bar,Bar,Chocolate Shop
2,Bay Village,Boston,Massachusetts,"Bay Village, Boston, Massachusetts","(Bay Village, Oak Street, Chinatown, Financial...",42.350011,-71.066948,2,Sandwich Place,Bakery,Spa,Hotel,Performing Arts Venue,Theater,Italian Restaurant,Seafood Restaurant,Scenic Lookout,Boat or Ferry
3,Beacon Hill,Boston,Massachusetts,"Beacon Hill, Boston, Massachusetts","(Beacon Hill, Boston, Suffolk County, Massachu...",42.358708,-71.067829,2,Italian Restaurant,Pizza Place,French Restaurant,Gift Shop,Hotel,Sushi Restaurant,Mexican Restaurant,Bar,Kids Store,Plaza
4,Brighton,Boston,Massachusetts,"Brighton, Boston, Massachusetts","(Brighton, Boston, Suffolk County, Massachuset...",42.350097,-71.156442,2,Bus Station,Bank,Bakery,Pizza Place,Grocery Store,Dry Cleaner,Chinese Restaurant,Pub,Coffee Shop,Deli / Bodega


In [28]:
# create map
bosClusters = folium.Map(location=[bosClean['Latitude'].mean(), bosClean['Longitude'].mean()], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kClusters)
ys = [i + x + (i*x)**2 for i in range(kClusters)]
colors_array = cm.gist_rainbow(np.linspace(0, 1, len(ys)))
gist_rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bosMerged['Latitude'], bosMerged['Longitude'], bosMerged['Hood'], bosMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=gist_rainbow[cluster-1],
        fill=True,
        fill_color=gist_rainbow[cluster-1],
        fill_opacity=0.7).add_to(bosClusters)
       
bosClusters

In [29]:
# create map
denClusters = folium.Map(location=[denClean['Latitude'].mean(), denClean['Longitude'].mean()], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kClusters)
ys = [i + x + (i*x)**2 for i in range(kClusters)]
colors_array = cm.gist_rainbow(np.linspace(0, 1, len(ys)))
gist_rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(denMerged['Latitude'], denMerged['Longitude'], denMerged['Hood'], denMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=gist_rainbow[cluster-1],
        fill=True,
        fill_color=gist_rainbow[cluster-1],
        fill_opacity=0.7).add_to(denClusters)
       
denClusters

In [30]:
# create map
seaClusters = folium.Map(location=[seaClean['Latitude'].mean(), seaClean['Longitude'].mean()], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kClusters)
ys = [i + x + (i*x)**2 for i in range(kClusters)]
colors_array = cm.gist_rainbow(np.linspace(0, 1, len(ys)))
gist_rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(seaMerged['Latitude'], seaMerged['Longitude'], seaMerged['Hood'], seaMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=gist_rainbow[cluster-1],
        fill=True,
        fill_color=gist_rainbow[cluster-1],
        fill_opacity=0.7).add_to(seaClusters)
       
seaClusters

In [31]:
# create map
totBosClusters = folium.Map(location=[bosClean['Latitude'].mean(), bosClean['Longitude'].mean()], zoom_start=11)

# set color scheme for the clusters
x = np.arange(totKClusters)
ys = [i + x + (i*x)**2 for i in range(totKClusters)]
colors_array = cm.gist_rainbow(np.linspace(0, 1, len(ys)))
gist_rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(totMerged['Latitude'], totMerged['Longitude'], totMerged['Hood'], totMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=gist_rainbow[cluster-1],
        fill=True,
        fill_color=gist_rainbow[cluster-1],
        fill_opacity=0.7).add_to(totBosClusters)
       
totBosClusters

In [32]:
# create map
totDenClusters = folium.Map(location=[denClean['Latitude'].mean(), denClean['Longitude'].mean()], zoom_start=11)

# set color scheme for the clusters
x = np.arange(totKClusters)
ys = [i + x + (i*x)**2 for i in range(totKClusters)]
colors_array = cm.gist_rainbow(np.linspace(0, 1, len(ys)))
gist_rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(totMerged['Latitude'], totMerged['Longitude'], totMerged['Hood'], totMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=gist_rainbow[cluster-1],
        fill=True,
        fill_color=gist_rainbow[cluster-1],
        fill_opacity=0.7).add_to(totDenClusters)
       
totDenClusters

In [33]:
# create map
totSeaClusters = folium.Map(location=[seaClean['Latitude'].mean(), seaClean['Longitude'].mean()], zoom_start=11)

# set color scheme for the clusters
x = np.arange(totKClusters)
ys = [i + x + (i*x)**2 for i in range(totKClusters)]
colors_array = cm.gist_rainbow(np.linspace(0, 1, len(ys)))
gist_rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(totMerged['Latitude'], totMerged['Longitude'], totMerged['Hood'], totMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=gist_rainbow[cluster-1],
        fill=True,
        fill_color=gist_rainbow[cluster-1],
        fill_opacity=0.7).add_to(totSeaClusters)
       
totSeaClusters

In [36]:
## Filtering according to clusters to a get a table view of the results
totMerged.loc[totMerged['Cluster Labels'] == 2]

Unnamed: 0,Hood,City,State,Name,Location_Detail,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Montclair,Denver,CO,"Montclair, Denver, CO","(Montclair, Denver, Denver County, Colorado, 8...",39.731735,-104.912948,2,Park,Zoo Exhibit,Exhibit,Factory,Fair,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field
18,View Ridge,Seattle,WA,"View Ridge, Seattle, WA","(View Ridge, Hawthorne Hills, Seattle, King Co...",47.679543,-122.274014,2,Park,Zoo Exhibit,Exhibit,Factory,Fair,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field
