In [3]:
#required imports

import numpy as np # library to handle data in a vectorized manner
import pandas as pd

import matplotlib.cm as cm
import matplotlib.colors as colors

import requests
from pandas.io.json import json_normalize

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!pip install geocoder
import geocoder

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1e             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    certifi-2019.11.28         |   py36h9f0ad1d_1         149 KB  conda-forge
    altair-4.0.1               |             py_0         575 KB  conda-forge
    ------------------------------------------------------------
                       

##### First, we load neighborhood data from https://en.wikipedia.org/wiki/Neighborhoods_of_Kansas_City,_Missouri

#### The simplest way to do this was simply to copy and paste the neighborhoods into an Excel csv file attached to this project in the IBM cloud.
#### The below code loads the csv file into a pandas dataframe from the cloud

In [74]:
import types

from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_28f422c5859046d5a110e5d2a7ef5d28 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='foobar',
    ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_28f422c5859046d5a110e5d2a7ef5d28.get_object(Bucket='whereshouldicreatemysportsbarinka-donotdelete-pr-yolwvme8leirie',Key='KansasCityNeighborhoods.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_1 = pd.read_csv(body)
df_data_1.head(10)


Unnamed: 0,Neighborhood
0,18th and Vine
1,49-63 Coalition
2,Armour Fields
3,Armour Hills
4,Ashland Ridge
5,Bannister Acres
6,Barry Harbour
7,Barry Woods/ Park Hill
8,Battleflood Heights
9,Beacon Hill-McFeders


__Add a column for Latitude and Longitude__

In [75]:
kansas_city_data = df_data_1
kansas_city_data['Latitude'] = 0.0
kansas_city_data['Longitude'] = 0.0
kansas_city_data.head()


Unnamed: 0,Neighborhood,Latitude,Longitude
0,18th and Vine,0.0,0.0
1,49-63 Coalition,0.0,0.0
2,Armour Fields,0.0,0.0
3,Armour Hills,0.0,0.0
4,Ashland Ridge,0.0,0.0


__Request geolocation data__

In [76]:
#function to get coordinates
import time
def get_geo_data(neighborhood):
    time.sleep(1)
    # Create geo_locator object instance
    geo_locator = Nominatim(user_agent="kc_explorer")

    # Attempt to obtain geo data for given place name
    try:
        location = geo_locator.geocode(neighborhood, timeout = 5)
    except Exception:
        raise Exception("Location error")

    if not location:
        raise Exception("Location error")

    return location 

In [77]:
# loop the data getting the location for each row
for index, row in kansas_city_data.iterrows():
    neighborhood = row.Neighborhood
    address = neighborhood + ', Kansas City'
    try:
        location = get_geo_data( address )
        kansas_city_data.at[index,'Latitude'] = location.latitude
        kansas_city_data.at[index,'Longitude'] = location.longitude
    except: 
        kansas_city_data.at[index,'Latitude'] = 0.0
        kansas_city_data.at[index,'Longitude'] = 0.0


In [78]:
kansas_city_data

Unnamed: 0,Neighborhood,Latitude,Longitude
0,18th and Vine,39.094440,-94.553886
1,49-63 Coalition,39.065361,-94.556665
2,Armour Fields,39.004839,-94.598302
3,Armour Hills,39.004702,-94.588570
4,Ashland Ridge,39.057391,-94.473155
5,Bannister Acres,38.946507,-94.503142
6,Barry Harbour,0.000000,0.000000
7,Barry Woods/ Park Hill,0.000000,0.000000
8,Battleflood Heights,0.000000,0.000000
9,Beacon Hill-McFeders,0.000000,0.000000


#### The following function was used to spot-check missing neighborhood data from the above summary

In [126]:
neighborhood = 'Washington-Wheatley'
address = neighborhood + ', Kansas City'

try:
    location = get_geo_data( address )
    print('The geograpical coordinate of the Kansas City neighborhood of {} are {}, {}.'.format(neighborhood, location.latitude, location.longitude))
except:
    print('Have an exception.  Missing coordinates of neighborhood {}'.format(neighborhood))


The geograpical coordinate of the Kansas City neighborhood of Washington-Wheatley are 39.0810097, -94.5471853.


#### For the purposes of this report I am removing any remaining missing neighborhood geolocation data.  They are likely sparse neighborhoods.   But before any real money was put on this venture I would likely use a more thorough (perhaps paid) geolocation service.

In [80]:
kansas_city_data = kansas_city_data[(kansas_city_data.Latitude != 0.0 ) & (kansas_city_data.Longitude != 0.0)]

In [81]:
kansas_city_data.shape


(170, 3)

#### Let's visualize the neighborhoods.  First we find the center of Kansas City:

In [62]:
address = 'Kansas City, Missouri'
location = get_geo_data(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Kansas City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Kansas City are 39.100105, -94.5781416.


#### Next we create a map of the neighborhoods of Kansas City:

In [82]:
# create map of Kansas City using latitude and longitude values
map_kansas_city = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(kansas_city_data['Latitude'], kansas_city_data['Longitude'], kansas_city_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_kansas_city)  
    
map_kansas_city

__Let's start exploring the Kansas City neighborhoods using Foursquare__

In [83]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
CLIENT_ID = 'FWXDOZKTXIXHYOUJ2EY2JJ4E0DOZAS01WEKTDFYOKEZR0DGV' # your Foursquare ID
CLIENT_SECRET = 'AOKQ3MDFNYE43R4Z0LJVKA4YS3J25IYOECNFGUAW43NHPY0U' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FWXDOZKTXIXHYOUJ2EY2JJ4E0DOZAS01WEKTDFYOKEZR0DGV
CLIENT_SECRET:AOKQ3MDFNYE43R4Z0LJVKA4YS3J25IYOECNFGUAW43NHPY0U


In [84]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [85]:
kansas_city_venues = getNearbyVenues(kansas_city_data.Neighborhood, kansas_city_data.Latitude, kansas_city_data.Longitude)
kansas_city_venues.head(10)

18th and Vine
49-63 Coalition
Armour Fields
Armour Hills
Ashland Ridge
Bannister Acres
Blue Hills
Blue Hills Estates
Blue Valley
Blue Vue Hills
Boone Hills
Boulevard Village
Bradford Place
Briarcliff
Briarcliff West
Bridlespur
Brookside
Calico Farms
CBD-Downtown
Center City
Chaumiere
Citadel
Claymont
Claymont North
Clayton
Coachlight Square
Coleman Highlands
Colonial Square
Columbus Park
Country Club
Country Club District
Country Club Plaza
Country Lane Estates
Countryside
Coves North
Crestwood
Crossgates
Crossroads
Davidson
Dunbar
Eastwood Hills
Fairlane
Gashland
Glen Lake
Glen Oaks
Greenwood
Hanover Place
Harlem
Hickman Mills
Hickman Mills South
Hidden Valley
Highland View
Hill Haven
Hillcrest
Holiday Hills
Holmes Park
Hospital Hill
Hyde Park
Independence Plaza
Indian Mound
Ingleside
Ivanhoe
Jefferson Highlands
KCI
Key Coalition
Kirkside
Knobtown
Knoches Park
Lea Manor
Leeds
Legacy East
Lewis Heights
Library District
Little Blue Valley
Loma Vista
Longview
Lykins
Manheim Park
Maple Pa

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,18th and Vine,39.09444,-94.553886,Arthur Bryant's Barbeque,39.091483,-94.556056,BBQ Joint
1,18th and Vine,39.09444,-94.553886,Gates Bar-B-Q,39.098435,-94.555813,BBQ Joint
2,18th and Vine,39.09444,-94.553886,Family Dollar,39.0907,-94.5512,Discount Store
3,18th and Vine,39.09444,-94.553886,Shell,39.095086,-94.555961,Gas Station
4,18th and Vine,39.09444,-94.553886,OK Furniture,39.094982,-94.552179,Furniture / Home Store
5,18th and Vine,39.09444,-94.553886,Park Parade Liquor,39.09517,-94.555942,Liquor Store
6,18th and Vine,39.09444,-94.553886,Church's Chicken,39.09861,-94.551803,Fried Chicken Joint
7,18th and Vine,39.09444,-94.553886,The Pit Lounge,39.098569,-94.555851,Lounge
8,18th and Vine,39.09444,-94.553886,McDonald's,39.095438,-94.551091,Fast Food Restaurant
9,49-63 Coalition,39.065361,-94.556665,Walgreens,39.0677,-94.553855,Pharmacy


__Let's group the venues by neighborhood__

In [86]:
# one hot encoding
kansas_city_onehot = pd.get_dummies(kansas_city_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kansas_city_onehot['Neighborhood'] = kansas_city_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kansas_city_onehot.columns[-1]] + list(kansas_city_onehot.columns[:-1])
kansas_city_onehot = kansas_city_onehot[fixed_columns]

kansas_city_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport,Airport Terminal,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Water Park,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,18th and Vine,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,18th and Vine,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,18th and Vine,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,18th and Vine,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,18th and Vine,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [87]:
kansas_city_onehot.shape

(1731, 246)

In [88]:
kansas_city_grouped = kansas_city_onehot.groupby('Neighborhood').mean().reset_index()
kansas_city_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport,Airport Terminal,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Water Park,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,18th and Vine,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000
1,49-63 Coalition,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.111111,0.000000
2,Armour Fields,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.166667,0.0,0.000000,0.000000,0.000000
3,Armour Hills,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000
4,Ashland Ridge,0.058824,0.000000,0.0,0.0,0.058824,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000
5,Blue Hills,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000
6,Blue Hills Estates,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000
7,Blue Valley,0.000000,0.000000,0.0,0.0,0.054054,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.027027,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000
8,Blue Vue Hills,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000
9,Boone Hills,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000


#### Let's print each neighborhood along with the top 5 most common venues

In [89]:
num_top_venues = 5

for hood in kansas_city_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = kansas_city_grouped[kansas_city_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----18th and Vine----
                    venue  freq
0               BBQ Joint  0.22
1            Liquor Store  0.11
2  Furniture / Home Store  0.11
3          Discount Store  0.11
4     Fried Chicken Joint  0.11


----49-63 Coalition----
           venue  freq
0  Grocery Store  0.11
1  Women's Store  0.11
2   Intersection  0.11
3            Bar  0.11
4         Market  0.11


----Armour Fields----
              venue  freq
0              Park  0.17
1           Theater  0.17
2  Sculpture Garden  0.17
3          Wine Bar  0.17
4        Playground  0.17


----Armour Hills----
           venue  freq
0          Trail   0.5
1           Park   0.5
2            ATM   0.0
3      Pawn Shop   0.0
4  Moving Target   0.0


----Ashland Ridge----
                  venue  freq
0   Sporting Goods Shop  0.18
1                   Bar  0.12
2  Fast Food Restaurant  0.06
3      Business Service  0.06
4          Burger Joint  0.06


----Blue Hills----
               venue  freq
0               Park   1.0
1 

                 venue  freq
0       Sandwich Place  0.08
1            Nightclub  0.05
2  American Restaurant  0.05
3                  Bar  0.05
4                 Park  0.05


----Holiday Hills----
                 venue  freq
0  Fried Chicken Joint  0.33
1       Discount Store  0.17
2           Public Art  0.17
3                 Road  0.17
4                 Park  0.17


----Holmes Park----
                 venue  freq
0  American Restaurant  0.22
1       Cosmetics Shop  0.11
2         Dance Studio  0.11
3                 Park  0.11
4               Bakery  0.11


----Hospital Hill----
                     venue  freq
0                    Hotel  0.13
1                Hotel Bar  0.07
2           Sandwich Place  0.07
3  New American Restaurant  0.07
4                   Office  0.07


----Hyde Park----
               venue  freq
0    Laundry Service   0.2
1        Bus Station   0.2
2            Exhibit   0.2
3     Scenic Lookout   0.2
4  Convenience Store   0.2


----Independence Plaza----

                 venue  freq
0       Sandwich Place  0.08
1            Nightclub  0.05
2  American Restaurant  0.05
3                  Bar  0.05
4                 Park  0.05


----Pendleton Heights----
                     venue  freq
0                Disc Golf  0.25
1         Community Center  0.25
2              Coffee Shop  0.25
3   Thrift / Vintage Store  0.25
4  New American Restaurant  0.00


----Platte Ridge----
                 venue  freq
0         Burger Joint  0.25
1                Hotel  0.25
2  American Restaurant  0.25
3            BBQ Joint  0.25
4         Optical Shop  0.00


----Plaza Westport----
                 venue  freq
0  American Restaurant  0.09
1   Seafood Restaurant  0.09
2       Clothing Store  0.09
3                Hotel  0.09
4           Restaurant  0.04


----Quality Hill----
           venue  freq
0       Dive Bar  0.12
1          Hotel  0.08
2        Theater  0.08
3     Restaurant  0.04
4  Deli / Bodega  0.04


----Ravenwood----
           venue  freq


                  venue  freq
0  Fast Food Restaurant   1.0
1                   ATM   0.0
2     Mobile Phone Shop   0.0
3                Museum   0.0
4           Music Store   0.0


----Waldo----
            venue  freq
0             Gym  0.08
1     Pizza Place  0.05
2  Sandwich Place  0.05
3        Pharmacy  0.05
4     Coffee Shop  0.05


----Ward Estates----
                        venue  freq
0                     Stadium   0.2
1                        Pool   0.2
2                      Arcade   0.2
3                      Casino   0.2
4  Construction & Landscaping   0.2


----Ward Parkway----
           venue  freq
0    Gas Station   1.0
1            ATM   0.0
2           Park   0.0
3  Moving Target   0.0
4         Museum   0.0


----Ward Parkway Plaza----
                  venue  freq
0  Fast Food Restaurant   0.3
1           Video Store   0.1
2           Pizza Place   0.1
3          Liquor Store   0.1
4    Chinese Restaurant   0.1


----Washington-Wheatley----
               venue 

#### Let's put that into a *pandas* dataframe
First, let's write a function to sort the venues in descending order.

In [90]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [91]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = kansas_city_grouped['Neighborhood']

for ind in np.arange(kansas_city_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(kansas_city_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(200)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,18th and Vine,BBQ Joint,Fried Chicken Joint,Lounge,Fast Food Restaurant,Furniture / Home Store,Discount Store,Liquor Store,Gas Station,Fireworks Store,Fish & Chips Shop
1,49-63 Coalition,Bar,Pizza Place,Chinese Restaurant,Grocery Store,Market,Intersection,Pharmacy,Women's Store,Clothing Store,Flea Market
2,Armour Fields,Sculpture Garden,Playground,Wine Bar,Park,Theater,Café,Fast Food Restaurant,Event Space,Exhibit,Farm
3,Armour Hills,Park,Trail,Yoga Studio,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop,Fireworks Store
4,Ashland Ridge,Sporting Goods Shop,Bar,ATM,Bank,Grocery Store,Greek Restaurant,Fast Food Restaurant,Discount Store,Chinese Restaurant,Business Service
5,Blue Hills,Park,Fountain,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop,Fireworks Store
6,Blue Hills Estates,Mexican Restaurant,Diner,BBQ Joint,Furniture / Home Store,Golf Course,Hunan Restaurant,Food & Drink Shop,Food,Food Court,Fondue Restaurant
7,Blue Valley,Sandwich Place,Nightclub,BBQ Joint,Coffee Shop,Bar,Sports Bar,Park,American Restaurant,Gym,Mobile Phone Shop
8,Blue Vue Hills,Discount Store,Insurance Office,Yoga Studio,Exhibit,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop
9,Boone Hills,Athletics & Sports,Yoga Studio,Event Space,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market


### Cluster Neighborhoods
Run *k*-means to cluster the neighborhood into 5 clusters.

In [93]:
# set number of clusters
kclusters = 5

kansas_city_grouped_clustering = kansas_city_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kansas_city_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:40] 

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [97]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

kansas_city_merged = kansas_city_data

# merge kansas_city_grouped with kansas_city_data to add latitude/longitude for each neighborhood
kansas_city_merged = kansas_city_merged.merge(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

kansas_city_merged.head(150) # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,18th and Vine,39.094440,-94.553886,2,BBQ Joint,Fried Chicken Joint,Lounge,Fast Food Restaurant,Furniture / Home Store,Discount Store,Liquor Store,Gas Station,Fireworks Store,Fish & Chips Shop
1,49-63 Coalition,39.065361,-94.556665,2,Bar,Pizza Place,Chinese Restaurant,Grocery Store,Market,Intersection,Pharmacy,Women's Store,Clothing Store,Flea Market
2,Armour Fields,39.004839,-94.598302,2,Sculpture Garden,Playground,Wine Bar,Park,Theater,Café,Fast Food Restaurant,Event Space,Exhibit,Farm
3,Armour Hills,39.004702,-94.588570,2,Park,Trail,Yoga Studio,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop,Fireworks Store
4,Ashland Ridge,39.057391,-94.473155,2,Sporting Goods Shop,Bar,ATM,Bank,Grocery Store,Greek Restaurant,Fast Food Restaurant,Discount Store,Chinese Restaurant,Business Service
5,Blue Hills,39.028717,-94.561469,2,Park,Fountain,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop,Fireworks Store
6,Blue Hills Estates,38.895601,-94.603577,2,Mexican Restaurant,Diner,BBQ Joint,Furniture / Home Store,Golf Course,Hunan Restaurant,Food & Drink Shop,Food,Food Court,Fondue Restaurant
7,Blue Valley,39.100105,-94.578142,2,Sandwich Place,Nightclub,BBQ Joint,Coffee Shop,Bar,Sports Bar,Park,American Restaurant,Gym,Mobile Phone Shop
8,Blue Vue Hills,39.028831,-94.423664,2,Discount Store,Insurance Office,Yoga Studio,Exhibit,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop
9,Boone Hills,38.962668,-94.584024,2,Athletics & Sports,Yoga Studio,Event Space,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market


__Visualize the clusters__

In [98]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kansas_city_merged['Latitude'], kansas_city_merged['Longitude'], kansas_city_merged['Neighborhood'], kansas_city_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

__Examine the clusters__

In [113]:
kansas_city_merged.loc[kansas_city_merged['Cluster Labels'] == 0, kansas_city_merged.columns[[1] + list(range(4, kansas_city_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
69,38.978759,Deli / Bodega,Convenience Store,Mexican Restaurant,Diner,Disc Golf,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant
76,39.300557,Convenience Store,Gas Station,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market
83,39.06136,Convenience Store,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop
137,39.08101,Convenience Store,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop


In [114]:
kansas_city_merged.loc[kansas_city_merged['Cluster Labels'] == 1, kansas_city_merged.columns[[1] + list(range(4, kansas_city_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,37.70771,Pool,Intersection,Golf Course,Yoga Studio,Event Space,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market
31,39.242908,Pool,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop
55,38.910136,Home Service,Pool,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market
58,38.950517,Pool,Construction & Landscaping,Trail,Yoga Studio,Fireworks Store,Exhibit,Farm,Farmers Market,Fast Food Restaurant,Flea Market
124,39.242908,Pool,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop
125,39.005663,Pool,Construction & Landscaping,Intersection,Yoga Studio,Fish & Chips Shop,Exhibit,Farm,Farmers Market,Fast Food Restaurant,Fireworks Store
149,38.893104,Bar,Pool,Construction & Landscaping,Yoga Studio,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market


In [115]:
kansas_city_merged.loc[kansas_city_merged['Cluster Labels'] == 2, kansas_city_merged.columns[[1] + list(range(4, kansas_city_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,39.094440,BBQ Joint,Fried Chicken Joint,Lounge,Fast Food Restaurant,Furniture / Home Store,Discount Store,Liquor Store,Gas Station,Fireworks Store,Fish & Chips Shop
1,39.065361,Bar,Pizza Place,Chinese Restaurant,Grocery Store,Market,Intersection,Pharmacy,Women's Store,Clothing Store,Flea Market
2,39.004839,Sculpture Garden,Playground,Wine Bar,Park,Theater,Café,Fast Food Restaurant,Event Space,Exhibit,Farm
3,39.004702,Park,Trail,Yoga Studio,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop,Fireworks Store
4,39.057391,Sporting Goods Shop,Bar,ATM,Bank,Grocery Store,Greek Restaurant,Fast Food Restaurant,Discount Store,Chinese Restaurant,Business Service
5,39.028717,Park,Fountain,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop,Fireworks Store
6,38.895601,Mexican Restaurant,Diner,BBQ Joint,Furniture / Home Store,Golf Course,Hunan Restaurant,Food & Drink Shop,Food,Food Court,Fondue Restaurant
7,39.100105,Sandwich Place,Nightclub,BBQ Joint,Coffee Shop,Bar,Sports Bar,Park,American Restaurant,Gym,Mobile Phone Shop
8,39.028831,Discount Store,Insurance Office,Yoga Studio,Exhibit,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop
9,38.962668,Athletics & Sports,Yoga Studio,Event Space,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market


In [117]:
kansas_city_merged.loc[kansas_city_merged['Cluster Labels'] == 3, kansas_city_merged.columns[[1] + list(range(4, kansas_city_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
72,38.904743,Construction & Landscaping,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop
121,39.002837,Construction & Landscaping,Yoga Studio,Event Space,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market,Fish & Chips Shop


In [118]:
kansas_city_merged.loc[kansas_city_merged['Cluster Labels'] == 4, kansas_city_merged.columns[[1] + list(range(4, kansas_city_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
67,39.234585,American Restaurant,Yoga Studio,Event Space,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market
95,39.042699,American Restaurant,Yoga Studio,Event Space,Food Service,Food Court,Food & Drink Shop,Food,Fondue Restaurant,Flower Shop,Flea Market


#### Let's isolate and map the locations of neighborhoods that have sports bars as a common venue:

In [119]:
df = neighborhoods_venues_sorted
sb = 'Sports Bar'
f1 = (df['1st Most Common Venue'] == sb)
f2 = (df['2nd Most Common Venue'] == sb)
f3 = (df['3rd Most Common Venue'] == sb)
f4 = (df['4th Most Common Venue'] == sb)
f5 = (df['5th Most Common Venue'] == sb)
f5 = (df['5th Most Common Venue'] == sb)
f6 = (df['6th Most Common Venue'] == sb)
f7 = (df['7th Most Common Venue'] == sb)
f8 = (df['8th Most Common Venue'] == sb)
f9 = (df['9th Most Common Venue'] == sb)
f10 = (df['10th Most Common Venue'] == sb)
sports_bar_neighborhoods = df[ f1 | f2 | f3 | f4 | f5 | f6 | f7 | f8 | f9 | f10 ]['Neighborhood']
sports_bars_data = kansas_city_data[kansas_city_data['Neighborhood'].isin(sports_bar_neighborhoods)]
sports_bars_data

Unnamed: 0,Neighborhood,Latitude,Longitude
15,Blue Valley,39.100105,-94.578142
18,Boulevard Village,39.100105,-94.578142
71,Hanover Place,39.059623,-94.588059
78,Hill Haven,39.100105,-94.578142
79,Hillcrest,39.100105,-94.578142
113,Martin City,38.885271,-94.596325
130,Oakwood,39.100105,-94.578142
136,Park Forest,39.100105,-94.578142
138,Park Plaza,39.216051,-94.641819
141,Paseo West,39.100105,-94.578142


### Create a map of Kansas City neighborhoods that currently have sports bars as top-10 venue.  Add a marker for Arrowhead Stadium, home field for the Kansas City Chiefs.

In [123]:
# create map of Kansas City using latitude and longitude values
map_kansas_city_sports_bars = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(sports_bars_data['Latitude'], sports_bars_data['Longitude'], sports_bars_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_kansas_city_sports_bars)  

folium.Marker( [39.048270, -94.484980], popup='Arrowhead Stadium' ).add_to(map_kansas_city_sports_bars) 

map_kansas_city_sports_bars