In [1]:
# @hidden_cell
# The project token is an authorization token that is used to access project resources like data sources, connections, and used by platform APIs.
from project_lib import Project
project = Project(None, project_id='a2826d3f-d5e2-412c-ab71-9eee03b964b6', project_access_token='p-f9cabcce0ae8899714404f43399d9032eb58b9ab')
pc = project.project_context


<img src = "https://pbs.twimg.com/profile_images/971071764913643522/Crczh-as_400x400.jpg" width = 400> </a>

<h1 align=center><font size = 5>Great Divide Brewing: Michigan Expansion Planning</font></h1>

## Introduction

You have been recruited to assist a up and coming Midwestern Microbrewery, Great Divide Brewing, in locating a brewery and taproom in Michigan.<br/>
Great Divide sells well with Household incomes over $75,000 as well as people over 35.<br/>
It offers a lineup of premium Microbrews in styles that range from Bold IPAs to Easy Drinking Pilsners that appeal to more affluent and established professionals.<br/>
Objective is find a city in Michigan that meets these criteria but also has a low density of Breweries.

## Table of Contents

<div class="alert alert-block alert-info" style="margin-top: 20px">

<font size = 3>

0. <a href="#item0">Install Necessary Libraries</a>
    
1. <a href="#item1">Download and Explore Dataset</a>

2. <a href="#item2">Explore Neighborhoods in New York City</a>

3. <a href="#item3">Analyze Each Neighborhood</a>

4. <a href="#item4">Cluster Neighborhoods</a>

5. <a href="#item5">Examine Clusters</a>    
</font>
</div>

## 0. Install Necessary Libraries

Confirm installation of Beadutifulsoup4, HTML Parsers, and related Libraries for creating dataframes, segmenting, clusting, rendering lat/long values, and plotting in maps.

In [2]:
#Confirm installation of Beadutifulsoup4, lxml and html5 html parsers for support of parsing errors in html (lxml is currently most commonly used), and Request Library for fetching websites
!pip install beautifulsoup4
from bs4 import BeautifulSoup

!pip install lxml
!pip install html5

!pip install requests
import requests



In [3]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


<a id='item1'></a>

## 1. Download and Explore Datasets

Scrape information from https://en.wikipedia.org/wiki/List_of_Michigan_locations_by_per_capita_income.html

#### Load Michigan per capita income HTML data

In [4]:
!wget -q -O 'List_of_Michigan_locations_by_per_capita_income.html' https://en.wikipedia.org/wiki/List_of_Michigan_locations_by_per_capita_income
print('HTML data downloaded!')

HTML data downloaded!


#### Read Michigan per capita income HTML data into BeautfulSoup
Read HTML into BeautifulSoup and display the formatted source code of the webpage

In [5]:
#Read HTML into BeautifulSoup and prettify to format code indents for readability
with open('List_of_Michigan_locations_by_per_capita_income.html') as html_file:
    soup = BeautifulSoup(html_file, 'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of Michigan locations by per capita income - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"Xos-UgpAICgAAAh4hf4AAAAP","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_Michigan_locations_by_per_capita_income","wgTitle":"List of Michigan locations by per capita income","wgCurRevisionId":949302036,"wgRevisionId":949302036,"wgArticleId":1827915,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","United States locations by 

#### Find Michigan County Householf Income Table from Soup

Upon examining the Wikipedia Source Code, the Neighborhood information is located in a class of table called 'Wikitable'.
So, set match condition to retreieve Postal Code table.

In [6]:
County_Income_HTML = soup.find('table' , {'class':'wikitable'})
County_Income_HTML

<table class="wikitable sortable">
<tbody><tr valign="bottom">
<th>Rank
</th>
<th>County
</th>
<th>Per capita<br/>income
</th>
<th>Median<br/>household<br/>income
</th>
<th>Median<br/>family<br/>income
</th>
<th>Population
</th>
<th>Number of<br/>households
</th></tr>
<tr>
<td>1
</td>
<td><a href="/wiki/Kent_County,_Michigan" title="Kent County, Michigan">Kent</a>
</td>
<td>$56,138
</td>
<td>$85,991
</td>
<td>$94,783
</td>
<td>1,202,362
</td>
<td>483,698
</td></tr>
<tr>
<td>2
</td>
<td><a href="/wiki/Leelanau_County,_Michigan" title="Leelanau County, Michigan">Leelanau</a>
</td>
<td>$32,194
</td>
<td>$56,527
</td>
<td>$65,342
</td>
<td>21,708
</td>
<td>9,255
</td></tr>
<tr>
<td>3
</td>
<td><a href="/wiki/Livingston_County,_Michigan" title="Livingston County, Michigan">Livingston</a>
</td>
<td>$31,609
</td>
<td>$72,129
</td>
<td>$82,637
</td>
<td>180,967
</td>
<td>67,380
</td></tr>
<tr>
<td>4
</td>
<td><a href="/wiki/Washtenaw_County,_Michigan" title="Washtenaw County, Michigan">Washten

#### Read Wikitable data into Pandas Dataframe

In [7]:
table_rows = County_Income_HTML.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)

County_Income = pd.DataFrame(res, columns=["Rank", "County", "Per Capita Income", "Median Household Income", "Median Family Income", "Population",  "Number of Households" ])
County_Income

Unnamed: 0,Rank,County,Per Capita Income,Median Household Income,Median Family Income,Population,Number of Households
0,1,Kent,"$56,138","$85,991","$94,783",1202362,483698.0
1,2,Leelanau,"$32,194","$56,527","$65,342",21708,9255.0
2,3,Livingston,"$31,609","$72,129","$82,637",180967,67380.0
3,4,Washtenaw,"$31,316","$59,065","$82,184",344791,137193.0
4,5,Charlevoix,"$28,403","$48,704","$57,022",25949,10882.0
5,6,Midland,"$28,363","$51,103","$63,299",83629,33437.0
6,7,Emmet,"$28,308","$49,235","$61,600",32694,13601.0
7,United States,"$27,334","$51,914","$62,982",308745538,116716292,
8,8,Clinton,"$27,223","$58,016","$69,611",75382,28766.0
9,9,Grand Traverse,"$27,091","$50,647","$61,780",86986,35328.0


#### Drop Rows that contain erroneous information

The next task is to drop rows where United States or Michigan are shown in table.

In [8]:
County_Income.drop(County_Income[County_Income.Rank.str.contains("i")].index, axis=0, inplace=True, )
County_Income.head()

Unnamed: 0,Rank,County,Per Capita Income,Median Household Income,Median Family Income,Population,Number of Households
0,1,Kent,"$56,138","$85,991","$94,783",1202362,483698
1,2,Leelanau,"$32,194","$56,527","$65,342",21708,9255
2,3,Livingston,"$31,609","$72,129","$82,637",180967,67380
3,4,Washtenaw,"$31,316","$59,065","$82,184",344791,137193
4,5,Charlevoix,"$28,403","$48,704","$57,022",25949,10882


Sort County_Income alphabetically by county to prepare for merging with age information.

In [9]:
County_Income=County_Income.sort_values('Median Household Income', ascending=False,)
County_Income.reset_index(inplace=True, drop=True)
County_Income.head()

Unnamed: 0,Rank,County,Per Capita Income,Median Household Income,Median Family Income,Population,Number of Households
0,1,Kent,"$56,138","$85,991","$94,783",1202362,483698
1,3,Livingston,"$31,609","$72,129","$82,637",180967,67380
2,4,Washtenaw,"$31,316","$59,065","$82,184",344791,137193
3,8,Clinton,"$27,223","$58,016","$69,611",75382,28766
4,2,Leelanau,"$32,194","$56,527","$65,342",21708,9255


In [10]:
County_Income.shape

(83, 7)

#### Obtain City Information of Kent County

Kent County meets the brewery's income criteria. Let's explore the related cities of Kent County by scraping related Kent County Wikipedia page.
https://en.wikipedia.org/wiki/Kent_County,_Michigan

In [11]:
!wget -q -O 'List_of_municipalities_in_Michigan.html' https://en.wikipedia.org/wiki/List_of_municipalities_in_Michigan
print('HTML data downloaded!')

HTML data downloaded!


In [12]:
#Read HTML into BeautifulSoup and prettify to format code indents for readability
with open('List_of_municipalities_in_Michigan.html') as html_file:
    soup = BeautifulSoup(html_file, 'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of municipalities in Michigan - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XpGYZwpAEKcAAhGxoi8AAAAI","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_municipalities_in_Michigan","wgTitle":"List of municipalities in Michigan","wgCurRevisionId":950285482,"wgRevisionId":950285482,"wgArticleId":236034,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Commons category link is locally defined","Charter townships in Michigan","Cities in Michigan","Lists

In [13]:
mi_cities_HTML = soup.find('table' , {'class':'wikitable'})
mi_cities_HTML

<table class="wikitable sortable">
<tbody><tr>
<th rowspan="2" scope="col">Name
</th>
<th rowspan="2" scope="col">Type<sup class="reference" id="cite_ref-Census_2010_1-3"><a href="#cite_note-Census_2010-1">[1]</a></sup>
</th>
<th rowspan="2" scope="col"><a href="/wiki/List_of_counties_in_Michigan" title="List of counties in Michigan">County</a><sup class="reference" id="cite_ref-Census_2010_1-4"><a href="#cite_note-Census_2010-1">[1]</a></sup>
</th>
<th rowspan="2" scope="col">Population<br/>(2010)<sup class="reference" id="cite_ref-Census_2010_1-5"><a href="#cite_note-Census_2010-1">[1]</a></sup>
</th>
<th rowspan="2" scope="col">Population<br/>(2000)<sup class="reference" id="cite_ref-Census_2010_1-6"><a href="#cite_note-Census_2010-1">[1]</a></sup>
</th>
<th rowspan="2" scope="col">Change (%)
</th>
<th colspan="2" scope="col">Land area<br/>(2010)<sup class="reference" id="cite_ref-Census_2010_1-7"><a href="#cite_note-Census_2010-1">[1]</a></sup>
</th>
<th data-sort-type="number" row

In [14]:
table_rows = mi_cities_HTML.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)

mi_cities = pd.DataFrame(res, columns=["Neighborhood", "Type", "County", "Population 2010", "Population 2000", "Change %",  "sq. mi.", "sq. km.", "Density"])
#mi_cities[['Population 2010', 'Population 2000']] = mi_cities[['Population 2010', 'Population 2000']].str.replace(',', '').astype(float) #get rid of commas as they prevent data from being read as numeric
mi_cities['Population 2010'] = mi_cities['Population 2010'].str.replace(',', '').astype(float) #get rid of commas as they prevent data from being read as numeric
mi_cities['Population 2000'] = mi_cities['Population 2000'].str.replace(',', '').astype(float) #get rid of commas as they prevent data from being read as numeric
mi_cities

Unnamed: 0,Neighborhood,Type,County,Population 2010,Population 2000,Change %,sq. mi.,sq. km.,Density
0,Acme,Township,Grand Traverse,4375.0,4332.0,+1.0%,25.01,64.8,174.9/sq mi (67.5/km2)
1,Ada,Township,Kent,13142.0,9882.0,+33.0%,36.04,93.3,364.7/sq mi (140.8/km2)
2,Adams,Township,Arenac,563.0,550.0,+2.4%,35.64,92.3,15.8/sq mi (6.1/km2)
3,Adams,Township,Hillsdale,2493.0,2498.0,−0.2%,35.64,92.3,69.9/sq mi (27.0/km2)
4,Adams,Township,Houghton,2573.0,2747.0,−6.3%,47.05,121.9,54.7/sq mi (21.1/km2)
5,Addison,Village,Lenawee,605.0,627.0,−3.5%,0.96,2.5,630.2/sq mi (243.3/km2)
6,Addison,Township,Oakland,6351.0,6439.0,−1.4%,35.52,92.0,178.8/sq mi (69.0/km2)
7,Adrian,City,Lenawee,21133.0,21574.0,−2.0%,7.95,20.6,"2,658.2/sq mi (1,026.4/km2)"
8,Adrian,Charter Township,Lenawee,6035.0,5749.0,+5.0%,33.99,88.0,177.6/sq mi (68.6/km2)
9,Aetna,Township,Mecosta,2299.0,2044.0,+12.5%,35.52,92.0,64.7/sq mi (25.0/km2)


#### Isolate Cities Related to Kent County

In [15]:
kent_cities = mi_cities[mi_cities['County'].isin(['Kent'])]
kent_cities

Unnamed: 0,Neighborhood,Type,County,Population 2010,Population 2000,Change %,sq. mi.,sq. km.,Density
1,Ada,Township,Kent,13142.0,9882.0,+33.0%,36.04,93.3,364.7/sq mi (140.8/km2)
24,Algoma,Township,Kent,9932.0,7596.0,+30.8%,34.13,88.4,291.0/sq mi (112.4/km2)
44,Alpine,Township,Kent,13336.0,13976.0,−4.6%,35.9,93.0,371.5/sq mi (143.4/km2)
196,Bowne,Township,Kent,3084.0,2743.0,+12.4%,35.53,92.0,86.8/sq mi (33.5/km2)
257,Byron,Township,Kent,20317.0,17553.0,+15.7%,36.1,93.0,562.8/sq mi (217.3/km2)
262,Caledonia,Village,Kent,1511.0,1102.0,+37.1%,1.33,3.4,"1,136.1/sq mi (438.6/km2)"
263,Caledonia,Township,Kent,12332.0,8964.0,+37.6%,34.91,90.4,353.3/sq mi (136.4/km2)
274,Cannon,Township,Kent,13336.0,12075.0,+10.4%,35.26,91.3,378.2/sq mi (146.0/km2)
287,Cascade,Charter Township,Kent,17134.0,15107.0,+13.4%,33.88,87.7,505.7/sq mi (195.3/km2)
303,Cedar Springs,City,Kent,3509.0,3112.0,+12.8%,2.03,5.3,"1,728.6/sq mi (667.4/km2)"


In [16]:
kent_cities=kent_cities.sort_values('Population 2010', ascending=False,)
kent_cities.reset_index(inplace=True, drop=True)
kent_cities=kent_cities.head(10)
kent_cities

Unnamed: 0,Neighborhood,Type,County,Population 2010,Population 2000,Change %,sq. mi.,sq. km.,Density
0,Grand Rapids,City,Kent,188040.0,197800.0,−4.9%,44.4,115.0,"4,235.1/sq mi (1,635.2/km2)"
1,Wyoming,City,Kent,72125.0,69368.0,+4.0%,24.64,63.8,"2,927.2/sq mi (1,130.2/km2)"
2,Kentwood,City,Kent,48707.0,45255.0,+7.6%,20.9,54.0,"2,330.5/sq mi (899.8/km2)"
3,Plainfield,Charter Township,Kent,30952.0,30195.0,+2.5%,35.04,90.8,883.3/sq mi (341.1/km2)
4,Gaines,Charter Township,Kent,25146.0,20112.0,+25.0%,35.7,92.0,704.4/sq mi (272.0/km2)
5,Walker,City,Kent,23537.0,21842.0,+7.8%,24.94,64.6,943.7/sq mi (364.4/km2)
6,Byron,Township,Kent,20317.0,17553.0,+15.7%,36.1,93.0,562.8/sq mi (217.3/km2)
7,Cascade,Charter Township,Kent,17134.0,15107.0,+13.4%,33.88,87.7,505.7/sq mi (195.3/km2)
8,Grand Rapids,Charter Township,Kent,16661.0,14056.0,+18.5%,15.34,39.7,"1,086.1/sq mi (419.4/km2)"
9,Grandville,City,Kent,15378.0,16263.0,−5.4%,7.27,18.8,"2,115.3/sq mi (816.7/km2)"


In [17]:
# Remove all columns after 'Population 2010'
kent_cities.drop(kent_cities.iloc[:, 4:9], inplace = True, axis = 1) 
kent_cities

Unnamed: 0,Neighborhood,Type,County,Population 2010
0,Grand Rapids,City,Kent,188040.0
1,Wyoming,City,Kent,72125.0
2,Kentwood,City,Kent,48707.0
3,Plainfield,Charter Township,Kent,30952.0
4,Gaines,Charter Township,Kent,25146.0
5,Walker,City,Kent,23537.0
6,Byron,Township,Kent,20317.0
7,Cascade,Charter Township,Kent,17134.0
8,Grand Rapids,Charter Township,Kent,16661.0
9,Grandville,City,Kent,15378.0


In [18]:
project.save_data(data=kent_cities.to_csv(index=False), file_name = 'kent_cities.csv', overwrite=True)

{'file_name': 'kent_cities.csv',
 'message': 'File saved to project storage.',
 'bucket_name': 'applieddatasciencecapstonebrewery-donotdelete-pr-aus83pbefagv5w',
 'asset_id': '26b1449f-b706-4548-b1fa-9ea2d7800880'}

#### Merge City Latitude, Longitude information

read in latitde and longitude information into dataframe

In [19]:
kent_cities_latlong=pd.read_csv(project.get_file('kent_cities_latlon.csv'))
kent_cities['Latitude'] = kent_cities_latlong['Latitude'].values
kent_cities['Longitude'] = kent_cities_latlong['Longitude'].values
kent_cities

Unnamed: 0,Neighborhood,Type,County,Population 2010,Latitude,Longitude
0,Grand Rapids,City,Kent,188040.0,42.961416,-85.655863
1,Wyoming,City,Kent,72125.0,42.89225,-85.70253
2,Kentwood,City,Kent,48707.0,42.882529,-85.592247
3,Plainfield,Charter Township,Kent,30952.0,43.074193,-85.610032
4,Gaines,Charter Township,Kent,25146.0,42.804475,-85.605024
5,Walker,City,Kent,23537.0,42.984747,-85.747812
6,Byron,Township,Kent,20317.0,42.812529,-85.723917
7,Cascade,Charter Township,Kent,17134.0,42.895864,-85.486688
8,Grand Rapids,Charter Township,Kent,16661.0,42.986416,-85.58225
9,Grandville,City,Kent,15378.0,42.903915,-85.756144


Quickly examine the resulting dataframe.

#### Create a map of Kent County with 10 largest Cities superimposed on top.

In [20]:
address = 'Kent County, MI'

geolocator = Nominatim(user_agent="mi_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Kent County are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Kent County are 43.0024013, -85.5502718.


In [51]:
# create map of New York using latitude and longitude values

map_kent = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(kent_cities['Latitude'], kent_cities['Longitude'], kent_cities['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_kent)    
map_kent

**Folium** is a great visualization library. Feel free to zoom into the above map, and click on each circle mark to reveal the name of the city.

Next, we are going to start utilizing the Foursquare API to explore the Cities and segment them.

In [22]:
kent_data = kent_cities[['Neighborhood', 'Latitude', 'Longitude']].reset_index(drop=True) # resets index position of rows
kent_data.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Grand Rapids,42.961416,-85.655863
1,Wyoming,42.89225,-85.70253
2,Kentwood,42.882529,-85.592247
3,Plainfield,43.074193,-85.610032
4,Gaines,42.804475,-85.605024


#### Define Foursquare Credentials and Version

In [23]:
CLIENT_ID = 'AFGXEVNSTEFRFKS3BBBQT1VLN41FMSX5FTQKBFZA5RFOCEUM' # your Foursquare ID
CLIENT_SECRET = '3VPZVIPPZ3T0HQVKRREF5X2WPG34R233ZPGVI45DSLDHGDZD' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AFGXEVNSTEFRFKS3BBBQT1VLN41FMSX5FTQKBFZA5RFOCEUM
CLIENT_SECRET:3VPZVIPPZ3T0HQVKRREF5X2WPG34R233ZPGVI45DSLDHGDZD


#### Let's explore the first neighborhood in our dataframe.

Get the neighborhood's name.

In [24]:
kent_data.loc[0, 'Neighborhood']

'Grand Rapids'

Get the neighborhood's latitude and longitude values.

In [25]:
neighborhood_latitude = kent_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = kent_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = kent_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Grand Rapids are 42.9614157, -85.6558634.


#### Now, let's get the top 100 venues that are in Grand Rapids within a radius of 1000 meters.

First, let's create the GET request URL. Name your URL **url**.

In [26]:
# type your answer here

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 1000 # define radius
 # create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url



'https://api.foursquare.com/v2/venues/explore?&client_id=AFGXEVNSTEFRFKS3BBBQT1VLN41FMSX5FTQKBFZA5RFOCEUM&client_secret=3VPZVIPPZ3T0HQVKRREF5X2WPG34R233ZPGVI45DSLDHGDZD&v=20180605&ll=42.9614157,-85.6558634&radius=1000&limit=100'

Send the GET request and examine the resutls

In [27]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e93791f29ce6a001b7aa2ba'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Heritage Hill',
  'headerFullLocation': 'Heritage Hill, Grand Rapids',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 100,
  'suggestedBounds': {'ne': {'lat': 42.970415709000015,
    'lng': -85.64358812298839},
   'sw': {'lat': 42.95241569099999, 'lng': -85.66813867701161}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b4fa372f964a520ac0e27e3',
       'name': "Martha's Vineyard",
       'location': {'address': '200 Union Ave NE',
        'crossStreet': 'at Lyon',
        'lat': 42.96754963014905,
        'lng'

From the Foursquare lab in the previous module, we know that all the information is in the *items* key. Before we proceed, let's borrow the **get_category_type** function from the Foursquare lab.

In [28]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a *pandas* dataframe.

In [29]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Martha's Vineyard,Gourmet Shop,42.96755,-85.654398
1,Beijing Restaurant,Chinese Restaurant,42.959913,-85.660396
2,Art of the Table,Gourmet Shop,42.955649,-85.653806
3,Wealthy Street Bakery,Bakery,42.955661,-85.653674
4,Nantucket Bakery,Bakery,42.967702,-85.654471


And how many venues were returned by Foursquare?

In [30]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


<a id='item2'></a>

## 2. Explore other Cities with more than 15,000 residents in Kent County

#### Let's create a function to repeat the same process for remaining cities of interest in Kent County

In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now write the code to run the above function on each neighborhood and create a new dataframe called *kent_venues*.

In [32]:
kent_venues = getNearbyVenues(names=kent_data['Neighborhood'],
                                   latitudes=kent_data['Latitude'],
                                   longitudes=kent_data['Longitude']
                                  )



Grand Rapids
Wyoming
Kentwood
Plainfield
Gaines
Walker
Byron
Cascade
Grand Rapids
Grandville


#### Let's check the size of the resulting dataframe

In [33]:
print(kent_venues.shape)
kent_venues

(389, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Grand Rapids,42.961416,-85.655863,Martha's Vineyard,42.96755,-85.654398,Gourmet Shop
1,Grand Rapids,42.961416,-85.655863,Art of the Table,42.955649,-85.653806,Gourmet Shop
2,Grand Rapids,42.961416,-85.655863,Nantucket Bakery,42.967702,-85.654471,Bakery
3,Grand Rapids,42.961416,-85.655863,Wealthy Street Bakery,42.955661,-85.653674,Bakery
4,Grand Rapids,42.961416,-85.655863,Donkey Taqueria,42.955744,-85.651909,Taco Place
5,Grand Rapids,42.961416,-85.655863,The Winchester,42.95552,-85.652411,Pub
6,Grand Rapids,42.961416,-85.655863,Gita Pita,42.962964,-85.663931,Mediterranean Restaurant
7,Grand Rapids,42.961416,-85.655863,Cherry Deli,42.959474,-85.648066,Deli / Bodega
8,Grand Rapids,42.961416,-85.655863,Maru Sushi & Grill,42.95964,-85.645928,Sushi Restaurant
9,Grand Rapids,42.961416,-85.655863,Bangkok Taste Cuisine,42.962658,-85.664265,Thai Restaurant


Let's check how many venues were returned for each neighborhood

In [34]:
kent_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Byron,28,28,28,28,28,28
Cascade,12,12,12,12,12,12
Gaines,4,4,4,4,4,4
Grand Rapids,152,152,152,152,152,152
Grandville,53,53,53,53,53,53
Kentwood,34,34,34,34,34,34
Plainfield,19,19,19,19,19,19
Walker,34,34,34,34,34,34
Wyoming,53,53,53,53,53,53


#### Let's find out how many unique categories can be curated from all the returned venues

In [35]:
print('There are {} uniques categories.'.format(len(kent_venues['Venue Category'].unique())))

There are 138 uniques categories.


<a id='item3'></a>

## 3. Analyze Each Neighborhood

In [36]:
# one hot encoding
kent_onehot = pd.get_dummies(kent_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kent_onehot['Neighborhood'] = kent_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kent_onehot.columns[-1]] + list(kent_onehot.columns[:-1])                                       
kent_onehot = kent_onehot[fixed_columns]
kent_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Bar,Bistro,Bowling Alley,Breakfast Spot,Brewery,Burrito Place,Bus Stop,Business Service,Butcher,Cafeteria,Café,Chinese Restaurant,Coffee Shop,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Disc Golf,Discount Store,Dive Bar,Dog Run,Donut Shop,Dry Cleaner,Electronics Store,Ethiopian Restaurant,Fabric Shop,Factory,Farmers Market,Fast Food Restaurant,Flea Market,Fondue Restaurant,Food,Food & Drink Shop,Frozen Yogurt Shop,Furniture / Home Store,Garden Center,Gas Station,Gastropub,Gay Bar,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,High School,Hockey Arena,Home Service,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Insurance Office,Intersection,Italian Restaurant,Laundromat,Liquor Store,Market,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motel,Motorsports Shop,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,Nature Preserve,Nightlife Spot,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Record Shop,Rental Car Location,River,Rock Club,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shipping Store,Shop & Service,Skating Rink,Smoke Shop,Soccer Field,Social Club,Soup Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,Grand Rapids,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Grand Rapids,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Grand Rapids,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Grand Rapids,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Grand Rapids,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [37]:
kent_onehot.shape

(389, 139)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [38]:
kent_grouped = kent_onehot.groupby('Neighborhood').mean().reset_index()
kent_grouped

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Bar,Bistro,Bowling Alley,Breakfast Spot,Brewery,Burrito Place,Bus Stop,Business Service,Butcher,Cafeteria,Café,Chinese Restaurant,Coffee Shop,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Disc Golf,Discount Store,Dive Bar,Dog Run,Donut Shop,Dry Cleaner,Electronics Store,Ethiopian Restaurant,Fabric Shop,Factory,Farmers Market,Fast Food Restaurant,Flea Market,Fondue Restaurant,Food,Food & Drink Shop,Frozen Yogurt Shop,Furniture / Home Store,Garden Center,Gas Station,Gastropub,Gay Bar,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,High School,Hockey Arena,Home Service,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Insurance Office,Intersection,Italian Restaurant,Laundromat,Liquor Store,Market,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motel,Motorsports Shop,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,Nature Preserve,Nightlife Spot,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Record Shop,Rental Car Location,River,Rock Club,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shipping Store,Shop & Service,Skating Rink,Smoke Shop,Soccer Field,Social Club,Soup Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,Byron,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.071429,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.071429,0.107143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0
1,Cascade,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Gaines,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Grand Rapids,0.026316,0.006579,0.019737,0.006579,0.0,0.006579,0.0,0.006579,0.0,0.006579,0.006579,0.032895,0.0,0.032895,0.0,0.019737,0.006579,0.0,0.0,0.013158,0.006579,0.0,0.0,0.0,0.0,0.039474,0.019737,0.078947,0.006579,0.006579,0.006579,0.0,0.006579,0.026316,0.013158,0.006579,0.019737,0.0,0.0,0.0,0.0,0.0,0.006579,0.0,0.0,0.006579,0.0,0.0,0.006579,0.0,0.0,0.006579,0.006579,0.006579,0.006579,0.006579,0.006579,0.0,0.006579,0.006579,0.0,0.013158,0.006579,0.006579,0.006579,0.013158,0.0,0.0,0.006579,0.0,0.006579,0.026316,0.019737,0.006579,0.0,0.0,0.019737,0.026316,0.0,0.0,0.006579,0.013158,0.019737,0.0,0.006579,0.0,0.0,0.006579,0.0,0.006579,0.013158,0.0,0.006579,0.0,0.0,0.013158,0.0,0.006579,0.032895,0.0,0.013158,0.0,0.0,0.0,0.006579,0.006579,0.026316,0.013158,0.006579,0.0,0.0,0.0,0.006579,0.0,0.0,0.006579,0.0,0.006579,0.0,0.0,0.0,0.013158,0.013158,0.006579,0.006579,0.006579,0.006579,0.019737,0.013158,0.0,0.0,0.006579,0.0,0.0,0.006579,0.006579,0.006579,0.006579
4,Grandville,0.037736,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.018868,0.0,0.037736,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.018868,0.0,0.056604,0.018868,0.0,0.018868,0.0,0.018868,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.037736,0.0,0.018868,0.0,0.018868,0.018868,0.0,0.0,0.0,0.0,0.056604,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.018868,0.018868,0.018868,0.037736,0.0,0.0,0.018868,0.018868,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.018868,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.018868,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.0,0.0,0.0,0.018868,0.0
5,Kentwood,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.088235,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.058824,0.029412,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.029412,0.058824,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.058824,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.0
6,Plainfield,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.105263,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Walker,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.029412,0.029412,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.058824,0.029412,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.029412,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Wyoming,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.018868,0.0,0.0,0.018868,0.018868,0.0,0.0,0.018868,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.075472,0.0,0.0,0.0,0.0,0.018868,0.037736,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.018868,0.037736,0.0,0.0,0.0,0.0,0.018868,0.018868,0.018868,0.0,0.018868,0.0,0.0,0.0,0.0,0.018868,0.0,0.075472,0.0,0.037736,0.132075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056604,0.0,0.0,0.018868,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.018868,0.0,0.0,0.018868,0.0


#### Let's confirm the new size

In [39]:
kent_grouped.shape

(9, 139)

#### Let's print each neighborhood along with the top 5 most common venues

In [40]:
num_top_venues = 5

for hood in kent_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = kent_grouped[kent_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Byron----
                 venue  freq
0          Pizza Place  0.11
1             Pharmacy  0.07
2                 Bank  0.07
3  American Restaurant  0.04
4          High School  0.04


----Cascade----
              venue  freq
0             River  0.17
1              Park  0.17
2      Soccer Field  0.08
3  Insurance Office  0.08
4      Intersection  0.08


----Gaines----
                        venue  freq
0  Construction & Landscaping  0.50
1                        Park  0.25
2                 Golf Course  0.25
3                   Multiplex  0.00
4                      Museum  0.00


----Grand Rapids----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.04
2  American Restaurant  0.03
3               Bakery  0.03
4          Pizza Place  0.03


----Grandville----
                venue  freq
0  Mexican Restaurant  0.06
1      Discount Store  0.06
2      Baseball Field  0.04
3      Ice Cream Shop  0.04
4        Antique Shop  0.04


----Kentwood----


#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [41]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = kent_grouped['Neighborhood']

for ind in np.arange(kent_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(kent_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Byron,Pizza Place,Bank,Pharmacy,Ice Cream Shop,Chinese Restaurant,Cafeteria,Business Service,Fast Food Restaurant,Liquor Store,Brewery
1,Cascade,River,Park,Insurance Office,Mobile Phone Shop,Home Service,Golf Course,Disc Golf,Soccer Field,Intersection,Dog Run
2,Gaines,Construction & Landscaping,Park,Golf Course,Dog Run,Factory,Fabric Shop,Ethiopian Restaurant,Electronics Store,Dry Cleaner,Donut Shop
3,Grand Rapids,Coffee Shop,Café,Pizza Place,Bakery,Bar,American Restaurant,Hotel,Sandwich Place,Italian Restaurant,Cosmetics Shop
4,Grandville,Discount Store,Mexican Restaurant,Ice Cream Shop,Pizza Place,Baseball Field,Sporting Goods Shop,American Restaurant,Antique Shop,Electronics Store,Miscellaneous Shop
5,Kentwood,Bank,Discount Store,Supermarket,Pizza Place,Intersection,Park,Chinese Restaurant,Pharmacy,Golf Course,Coffee Shop
6,Plainfield,Liquor Store,Bar,Cosmetics Shop,Bakery,Ice Cream Shop,Food,Nightlife Spot,Disc Golf,Pizza Place,Gym / Fitness Center
7,Walker,Golf Course,Discount Store,American Restaurant,Electronics Store,Bus Stop,Butcher,Sandwich Place,Chinese Restaurant,Coffee Shop,Pizza Place
8,Wyoming,Pizza Place,Park,Gas Station,Sandwich Place,Liquor Store,Grocery Store,Pharmacy,Motorsports Shop,Business Service,Brewery


<a id='item4'></a>

## 4. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 3 clusters.

In [43]:
# set number of clusters
kclusters = 3

kent_grouped_clustering = kent_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kent_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 2, 0, 0, 0, 0, 0, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [44]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

kent_merged = kent_data

# merge kent_grouped with kent_data to add latitude/longitude for each neighborhood
kent_merged = kent_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

In [45]:
kent_merged.head(110) # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Grand Rapids,42.961416,-85.655863,0,Coffee Shop,Café,Pizza Place,Bakery,Bar,American Restaurant,Hotel,Sandwich Place,Italian Restaurant,Cosmetics Shop
1,Wyoming,42.89225,-85.70253,0,Pizza Place,Park,Gas Station,Sandwich Place,Liquor Store,Grocery Store,Pharmacy,Motorsports Shop,Business Service,Brewery
2,Kentwood,42.882529,-85.592247,0,Bank,Discount Store,Supermarket,Pizza Place,Intersection,Park,Chinese Restaurant,Pharmacy,Golf Course,Coffee Shop
3,Plainfield,43.074193,-85.610032,0,Liquor Store,Bar,Cosmetics Shop,Bakery,Ice Cream Shop,Food,Nightlife Spot,Disc Golf,Pizza Place,Gym / Fitness Center
4,Gaines,42.804475,-85.605024,2,Construction & Landscaping,Park,Golf Course,Dog Run,Factory,Fabric Shop,Ethiopian Restaurant,Electronics Store,Dry Cleaner,Donut Shop
5,Walker,42.984747,-85.747812,0,Golf Course,Discount Store,American Restaurant,Electronics Store,Bus Stop,Butcher,Sandwich Place,Chinese Restaurant,Coffee Shop,Pizza Place
6,Byron,42.812529,-85.723917,0,Pizza Place,Bank,Pharmacy,Ice Cream Shop,Chinese Restaurant,Cafeteria,Business Service,Fast Food Restaurant,Liquor Store,Brewery
7,Cascade,42.895864,-85.486688,1,River,Park,Insurance Office,Mobile Phone Shop,Home Service,Golf Course,Disc Golf,Soccer Field,Intersection,Dog Run
8,Grand Rapids,42.986416,-85.58225,0,Coffee Shop,Café,Pizza Place,Bakery,Bar,American Restaurant,Hotel,Sandwich Place,Italian Restaurant,Cosmetics Shop
9,Grandville,42.903915,-85.756144,0,Discount Store,Mexican Restaurant,Ice Cream Shop,Pizza Place,Baseball Field,Sporting Goods Shop,American Restaurant,Antique Shop,Electronics Store,Miscellaneous Shop


#### Obtain Neighborhood Latitude, Longitude information

Finally, let's visualize the resulting clusters

In [46]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kent_merged['Latitude'], kent_merged['Longitude'], kent_merged['Neighborhood'], kent_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<a id='item5'></a>

## 5. Examine Clusters

#### Cluster 0
Cluster 0 has a variety of cafe's and restaurants in the top 3 venues.
This would be a good place to look for locating the Brewery, so long as bars and breweries are not common in the Neighborhood.

In [47]:
kent_merged.loc[kent_merged['Cluster Labels'] == 0, kent_merged.columns[[0] + list(range(1, kent_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Grand Rapids,42.961416,-85.655863,0,Coffee Shop,Café,Pizza Place,Bakery,Bar,American Restaurant,Hotel,Sandwich Place,Italian Restaurant,Cosmetics Shop
1,Wyoming,42.89225,-85.70253,0,Pizza Place,Park,Gas Station,Sandwich Place,Liquor Store,Grocery Store,Pharmacy,Motorsports Shop,Business Service,Brewery
2,Kentwood,42.882529,-85.592247,0,Bank,Discount Store,Supermarket,Pizza Place,Intersection,Park,Chinese Restaurant,Pharmacy,Golf Course,Coffee Shop
3,Plainfield,43.074193,-85.610032,0,Liquor Store,Bar,Cosmetics Shop,Bakery,Ice Cream Shop,Food,Nightlife Spot,Disc Golf,Pizza Place,Gym / Fitness Center
5,Walker,42.984747,-85.747812,0,Golf Course,Discount Store,American Restaurant,Electronics Store,Bus Stop,Butcher,Sandwich Place,Chinese Restaurant,Coffee Shop,Pizza Place
6,Byron,42.812529,-85.723917,0,Pizza Place,Bank,Pharmacy,Ice Cream Shop,Chinese Restaurant,Cafeteria,Business Service,Fast Food Restaurant,Liquor Store,Brewery
8,Grand Rapids,42.986416,-85.58225,0,Coffee Shop,Café,Pizza Place,Bakery,Bar,American Restaurant,Hotel,Sandwich Place,Italian Restaurant,Cosmetics Shop
9,Grandville,42.903915,-85.756144,0,Discount Store,Mexican Restaurant,Ice Cream Shop,Pizza Place,Baseball Field,Sporting Goods Shop,American Restaurant,Antique Shop,Electronics Store,Miscellaneous Shop


#### Cluster 1
Cluster 1 has appears to be nature and commercial related.
There would probably not be much demand or zoning for a brewery here.

In [48]:
kent_merged.loc[kent_merged['Cluster Labels'] == 1, kent_merged.columns[[0] + list(range(1, kent_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Cascade,42.895864,-85.486688,1,River,Park,Insurance Office,Mobile Phone Shop,Home Service,Golf Course,Disc Golf,Soccer Field,Intersection,Dog Run


#### Cluster 2
Cluster 2 has appears to be recreation and industrial related.
There would probably not be much demand or zoning for a brewery here either.

In [49]:
kent_merged.loc[kent_merged['Cluster Labels'] == 2, kent_merged.columns[[0] + list(range(1, kent_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Gaines,42.804475,-85.605024,2,Construction & Landscaping,Park,Golf Course,Dog Run,Factory,Fabric Shop,Ethiopian Restaurant,Electronics Store,Dry Cleaner,Donut Shop
