### Importing Libraries

In [1]:
# import sys

# !{sys.executable} -m pip install beautifulsoup4
# !{sys.executable} -m pip install lxml
# !{sys.executable} -m pip install html5lib
# !{sys.executable} -m pip install geopy
# !{sys.executable} -m pip install geocoder
# !{sys.executable} -m pip install uszipcode
# !{sys.executable} -m pip install time
# !{sys.executable} -m pip install jupyterthemes
# !{sys.executable} -m jt -t monokai

from bs4 import BeautifulSoup as bs
import os

import requests as rs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


import folium
import geopy
import geocoder
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors

from uszipcode import SearchEngine
from uszipcode import Zipcode

import time
# from jupyterthemes import jtplot
# jtplot.style(theme="monokai", context="notebook", ticks=True, grid=False)

### Gathering the Names of Places in Maricopa County, Arizona

In [2]:
# Specify the target URL
print('Collecting data from the Phoenix New Times website: \n\n')

# url = 'https://www.ciclt.net/sn/clt/capitolimpact/gw_ziplist.aspx?FIPS=04013'
url = 'https://www.phoenixnewtimes.com/news/all-24-cities-and-towns-in-maricopa-county-ranked-from-worst-to-first-6641342'

# Fetch raw HTML content with a GET request
contents = rs.get(url)

# Parse the HTML content
soup = bs(contents.content, "lxml")

# Find the relevant content for our purposes
story_body = soup.find('div', class_='story-body')
# print(story_body.prettify())

all_strongs = story_body.find_all('strong')
flag = 3
place_list = []
place_list_data = []

for places in all_strongs:
    if flag%3 == 0:
        place_list.append(places.text)
    flag += 1

place_list_df = pd.DataFrame(place_list, columns=['Names'])
# display(place_list_df)
place_list_df.drop(place_list_df.tail(1).index, axis=0, inplace=True)
# display(place_list_df)
place_list_df[['Prefix', 'Place']] = place_list_df['Names'].str.split(')', expand=True, n=1)
# display(place_list_df)
place_list_df.drop(columns=['Names', 'Prefix'], axis=1, inplace=True)
display(place_list_df)

Collecting data from the Phoenix New Times website: 




Unnamed: 0,Place
0,Youngtown
1,Gila Bend
2,El Mirage
3,Tolleson
4,Surprise
5,Guadalupe
6,Avondale
7,Buckeye
8,Carefree
9,Fountain Hills


### Finding the Coordinates and Median Income of the Locations

In [3]:
lat_lng_coords = None
flag = 2
temp_list = []
geocoded_places = pd.DataFrame(columns=['Place', 'Latitude', 'Longitude', 'Median_Income'])

for locations in place_list_df['Place']:
    search_location = locations + ', Arizona'
    g = geocoder.locationiq(search_location, key='*********************')
    lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    
    search = SearchEngine(simple_zipcode=True)
    res = search.by_coordinates(latitude, longitude, radius=30, returns=1)
    median_income = res[0].median_household_income
    
    geocoded_places = geocoded_places.append({'Place': locations, 
                                              'Latitude': latitude, 
                                              'Longitude': longitude, 
                                              'Median_Income': median_income}, ignore_index=True)

#     print(locations)
#     print(search_location)
#     print('Latitude: ', latitude)
#     print('Longitude: ', longitude)
#     print('Median Household Income: ', median_income)

    if flag%2==0:
        time.sleep(1)
    flag+=1

display(geocoded_places)

Unnamed: 0,Place,Latitude,Longitude,Median_Income
0,Youngtown,33.59373,-112.303326,33884
1,Gila Bend,32.947827,-112.716824,30242
2,El Mirage,33.613034,-112.324487,47237
3,Tolleson,33.45005,-112.259309,50066
4,Surprise,33.629227,-112.368019,70302
5,Guadalupe,33.363125,-111.962533,53022
6,Avondale,33.435598,-112.349602,44658
7,Buckeye,33.370275,-112.583867,68839
8,Carefree,33.822261,-111.918203,100338
9,Fountain Hills,33.611711,-111.717361,73608


### Generating a Map using Folium

In [4]:
# Create a Map instance for Phoenix
place = 'Phoenix, Arizona'
geo_address = Nominatim(user_agent="phoenix_explorer")
locate_address = geo_address.geocode(place)
map_phoenix = folium.Map(location=[locate_address.latitude, locate_address.longitude], 
                         zoom_start=9)

# Add markers
for _, row in geocoded_places.iterrows():
    label = '{}, ${}'.format(row.Place, row.Median_Income)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([row.Latitude, row.Longitude], radius=5, popup=label, 
                        color='#252191', fill=True, fill_color='#6ecedb', 
                        fill_opacity=0.95, parse_html=False).add_to(map_phoenix) 

# Generate the Map
map_phoenix

### Using the Foursquare API to return the top 100 venues within a 500 metre radius

In [5]:
# Foursquare API
explore_url_prefix = 'https://api.foursquare.com/v2/venues/explore'
CLIENT_ID = '****************************************************' # Put Your Client Id
CLIENT_SECRET = '************************************************' # Put You Client Secret 
VERSION = '20190712'
LIMIT = 100

# Function to return nearby venues
def getNearbyVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        print('Collecting venues for: ',name)
        
        # create the API request URL
        url = '{}?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            explore_url_prefix, CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)
            
        # make the GET request
        results = rs.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([( name, lat, lng, v['venue']['name'], 
                             v['venue']['location']['lat'], 
                             v['venue']['location']['lng'], 
                             v['venue']['categories'][0]['name']) for v in results])

#     print("\nThe length of the Venues List is:", len(venues_list))
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
#     nearby_venues = pd.DataFrame(venues_list)
    nearby_venues.columns = ['Place', 'Place_Latitude', 'Place_Longitude', 'Venue', 
                             'Venue_Latitude', 'Venue_Longitude', 'Venue_Category']
    
    return(nearby_venues)


# Passing the places for which venues are to be found to the function
phoenix_venues = getNearbyVenues(names=geocoded_places['Place'], 
                                 latitudes=geocoded_places['Latitude'], 
                                 longitudes=geocoded_places['Longitude'], 
                                 radius=2000)

display(phoenix_venues)

Collecting venues for:   Youngtown
Collecting venues for:   Gila Bend
Collecting venues for:   El Mirage
Collecting venues for:   Tolleson
Collecting venues for:   Surprise
Collecting venues for:   Guadalupe
Collecting venues for:   Avondale
Collecting venues for:   Buckeye
Collecting venues for:   Carefree
Collecting venues for:   Fountain Hills
Collecting venues for:   Mesa
Collecting venues for:   Goodyear
Collecting venues for:   Gilbert
Collecting venues for:   Queen Creek
Collecting venues for:   Cave Creek
Collecting venues for:   Wickenburg
Collecting venues for:   Peoria
Collecting venues for:   Litchfield Park
Collecting venues for:   Chandler
Collecting venues for:   Paradise Valley
Collecting venues for:   Glendale
Collecting venues for:   Tempe
Collecting venues for:   Scottsdale
Collecting venues for:   Phoenix


Unnamed: 0,Place,Place_Latitude,Place_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,Youngtown,33.593730,-112.303326,Brenda's Kitchen,33.603695,-112.302043,American Restaurant
1,Youngtown,33.593730,-112.303326,Starbucks,33.602403,-112.290236,Coffee Shop
2,Youngtown,33.593730,-112.303326,QuikTrip,33.605500,-112.302520,Convenience Store
3,Youngtown,33.593730,-112.303326,Nino's,33.600808,-112.287748,Mexican Restaurant
4,Youngtown,33.593730,-112.303326,Mighty Moo Ice Cream,33.594641,-112.299814,Ice Cream Shop
...,...,...,...,...,...,...,...
1388,Phoenix,33.448437,-112.074142,Roosevelt Square Apartments,33.459479,-112.076028,Residential Building (Apartment / Condo)
1389,Phoenix,33.448437,-112.074142,Margaret T Hance Park,33.462160,-112.072843,Park
1390,Phoenix,33.448437,-112.074142,Barrio Café Gran Reserva,33.457025,-112.089526,Mexican Restaurant
1391,Phoenix,33.448437,-112.074142,Gallo Blanco Cafe y Bar,33.455995,-112.060827,Mexican Restaurant


### One-Hot Encoding for the Type of Venue

In [6]:
phoenix_onehot = pd.get_dummies(phoenix_venues[['Venue_Category']], prefix="", prefix_sep="")
phoenix_onehot.insert(0, 'Place_Name', phoenix_venues['Place'])

display(phoenix_onehot.head(15))

venues_count, categories_count = phoenix_onehot.shape
categories_count -= 1
print("There are {} different venues and {} unique venue categories.".format(venues_count, categories_count))

Unnamed: 0,Place_Name,ATM,Accessories Store,Adult Education Center,Advertising Agency,Airport,Airport Service,American Restaurant,Antique Shop,Arcade,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Youngtown,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Youngtown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


There are 1393 different venues and 230 unique venue categories.


### Grouping by Place with the mean frequency of occurrence of the different types of venues

In [7]:
phoenix_grouped = phoenix_onehot.groupby('Place_Name').mean().reset_index()
display(phoenix_grouped)

Unnamed: 0,Place_Name,ATM,Accessories Store,Adult Education Center,Advertising Agency,Airport,Airport Service,American Restaurant,Antique Shop,Arcade,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Avondale,0.0,0.0,0.0,0.0,0.021277,0.021277,0.021277,0.0,0.0,...,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Buckeye,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Carefree,0.0,0.022727,0.0,0.0,0.022727,0.0,0.090909,0.022727,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0
3,Cave Creek,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Chandler,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
5,El Mirage,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.088235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Fountain Hills,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0
7,Gila Bend,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Gilbert,0.0,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0
9,Glendale,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,...,0.013158,0.013158,0.013158,0.0,0.0,0.0,0.013158,0.0,0.0,0.0


### Returning the top 5 types of Venues for each Place

In [8]:
num_top_venues = 5
for neighborhood in phoenix_grouped['Place_Name']:
    print("------{}------".format(neighborhood))
    temp = phoenix_grouped[phoenix_grouped['Place_Name'] == neighborhood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

------ Avondale------
                  venue  freq
0    Mexican Restaurant  0.15
1   Rental Car Location  0.06
2  Fast Food Restaurant  0.06
3        Sandwich Place  0.04
4     Convenience Store  0.04


------ Buckeye------
                venue  freq
0  Mexican Restaurant  0.22
1      Discount Store  0.11
2         Pizza Place  0.11
3                Café  0.06
4                Bank  0.06


------ Carefree------
                 venue  freq
0  American Restaurant  0.09
1          Coffee Shop  0.09
2            Locksmith  0.07
3                 Café  0.05
4                  Bar  0.05


------ Cave Creek------
                    venue  freq
0             Coffee Shop  0.10
1      Mexican Restaurant  0.08
2       Convenience Store  0.05
3  Furniture / Home Store  0.05
4                     Bar  0.05


------ Chandler------
                  venue  freq
0    Mexican Restaurant  0.06
1     Convenience Store  0.04
2           Pizza Place  0.03
3  Fast Food Restaurant  0.03
4            Taco

### Ranking the top 10 most common venues for each place

In [9]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [10]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Place']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Place'] = phoenix_grouped['Place_Name']

for ind in np.arange(phoenix_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(
        phoenix_grouped.iloc[ind, :], num_top_venues)

display(neighborhoods_venues_sorted)

Unnamed: 0,Place,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Avondale,Mexican Restaurant,Fast Food Restaurant,Rental Car Location,Sandwich Place,Convenience Store,Burger Joint,Fried Chicken Joint,Jewelry Store,Big Box Store,Social Club
1,Buckeye,Mexican Restaurant,Pizza Place,Discount Store,Park,Fast Food Restaurant,Spa,Bank,Business Service,Gas Station,Sandwich Place
2,Carefree,Coffee Shop,American Restaurant,Locksmith,Bar,Italian Restaurant,Café,Shopping Mall,Irish Pub,Steakhouse,Food Court
3,Cave Creek,Coffee Shop,Mexican Restaurant,BBQ Joint,Diner,Steakhouse,Bar,Furniture / Home Store,Convenience Store,Antique Shop,American Restaurant
4,Chandler,Mexican Restaurant,Convenience Store,Discount Store,Bank,Taco Place,Fast Food Restaurant,Italian Restaurant,Sandwich Place,Pizza Place,Video Store
5,El Mirage,Mexican Restaurant,Convenience Store,Pizza Place,Video Store,Pharmacy,Bakery,Fast Food Restaurant,Gym / Fitness Center,Big Box Store,Food
6,Fountain Hills,Pizza Place,Coffee Shop,Italian Restaurant,Bank,Park,Pharmacy,Grocery Store,Mexican Restaurant,Shipping Store,Japanese Restaurant
7,Gila Bend,Fast Food Restaurant,Mexican Restaurant,Discount Store,Hotel,Ice Cream Shop,Truck Stop,Memorial Site,Market,Sandwich Place,Gas Station
8,Gilbert,Sandwich Place,Mexican Restaurant,Park,Breakfast Spot,Bank,Cosmetics Shop,Coffee Shop,Sushi Restaurant,Brewery,American Restaurant
9,Glendale,Convenience Store,Mexican Restaurant,Pizza Place,Fast Food Restaurant,Discount Store,Sandwich Place,Bank,Candy Store,American Restaurant,Breakfast Spot


### Clustering Phoenix into 5 Clusters using K-Means Clustering

In [11]:
# Set number of clusters
k = 5

# Drop the neighborhood name column so that each column contains only the feature set.
phoenix_grouped_clustering = phoenix_grouped.drop('Place_Name', 1)

# Run k-means clustering
kmeans = KMeans(n_clusters=k, random_state=0).fit(phoenix_grouped_clustering)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

# Add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

phoenix_merged = geocoded_places

# Merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
phoenix_merged = phoenix_merged.join(neighborhoods_venues_sorted.set_index('Place'), on='Place')

# print(phoenix_merged.shape)
display(phoenix_merged.head(24))

Unnamed: 0,Place,Latitude,Longitude,Median_Income,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Youngtown,33.59373,-112.303326,33884,0,Convenience Store,Mexican Restaurant,Ice Cream Shop,Coffee Shop,Pharmacy,Fast Food Restaurant,Fried Chicken Joint,Breakfast Spot,Café,Sandwich Place
1,Gila Bend,32.947827,-112.716824,30242,0,Fast Food Restaurant,Mexican Restaurant,Discount Store,Hotel,Ice Cream Shop,Truck Stop,Memorial Site,Market,Sandwich Place,Gas Station
2,El Mirage,33.613034,-112.324487,47237,0,Mexican Restaurant,Convenience Store,Pizza Place,Video Store,Pharmacy,Bakery,Fast Food Restaurant,Gym / Fitness Center,Big Box Store,Food
3,Tolleson,33.45005,-112.259309,50066,1,Mexican Restaurant,Convenience Store,Intersection,Deli / Bodega,Burger Joint,Fried Chicken Joint,Seafood Restaurant,Factory,Sandwich Place,Park
4,Surprise,33.629227,-112.368019,70302,4,Fast Food Restaurant,Sandwich Place,Baseball Field,Mobile Phone Shop,Sushi Restaurant,Italian Restaurant,Mexican Restaurant,Coffee Shop,Hardware Store,Big Box Store
5,Guadalupe,33.363125,-111.962533,53022,0,Fast Food Restaurant,Sandwich Place,Hotel,Convenience Store,Mexican Restaurant,Gas Station,Coffee Shop,Video Store,Discount Store,Burger Joint
6,Avondale,33.435598,-112.349602,44658,0,Mexican Restaurant,Fast Food Restaurant,Rental Car Location,Sandwich Place,Convenience Store,Burger Joint,Fried Chicken Joint,Jewelry Store,Big Box Store,Social Club
7,Buckeye,33.370275,-112.583867,68839,3,Mexican Restaurant,Pizza Place,Discount Store,Park,Fast Food Restaurant,Spa,Bank,Business Service,Gas Station,Sandwich Place
8,Carefree,33.822261,-111.918203,100338,2,Coffee Shop,American Restaurant,Locksmith,Bar,Italian Restaurant,Café,Shopping Mall,Irish Pub,Steakhouse,Food Court
9,Fountain Hills,33.611711,-111.717361,73608,4,Pizza Place,Coffee Shop,Italian Restaurant,Bank,Park,Pharmacy,Grocery Store,Mexican Restaurant,Shipping Store,Japanese Restaurant


### Visualizing the Clusters

In [12]:
# Create a map instance
map_phoenix = folium.Map(location=[locate_address.latitude, locate_address.longitude], 
                         zoom_start=9)

# Set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(phoenix_merged['Latitude'], 
                                  phoenix_merged['Longitude'], 
                                  phoenix_merged['Place'], 
                                  phoenix_merged['Cluster Labels']):
    
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon], radius=5, popup=label, 
                        color=rainbow[cluster-1], fill=True, 
                        fill_color=rainbow[cluster-1], fill_opacity=0.7).add_to(map_phoenix)
       
map_phoenix

#### Cluster 1

In [13]:
# cluster_1 = phoenix_merged.loc[phoenix_merged['Cluster Labels'] == 0, phoenix_merged.columns[ [2] + list(range(5, phoenix_merged.shape[1]))]]
cluster_1 = phoenix_merged.loc[phoenix_merged['Cluster Labels'] == 0]
print('Average Median Income for Cluster 1: $', round(cluster_1['Median_Income'].mean()), 2)
display(cluster_1)

Average Median Income for Cluster 1: $ 40582.0 2


Unnamed: 0,Place,Latitude,Longitude,Median_Income,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Youngtown,33.59373,-112.303326,33884,0,Convenience Store,Mexican Restaurant,Ice Cream Shop,Coffee Shop,Pharmacy,Fast Food Restaurant,Fried Chicken Joint,Breakfast Spot,Café,Sandwich Place
1,Gila Bend,32.947827,-112.716824,30242,0,Fast Food Restaurant,Mexican Restaurant,Discount Store,Hotel,Ice Cream Shop,Truck Stop,Memorial Site,Market,Sandwich Place,Gas Station
2,El Mirage,33.613034,-112.324487,47237,0,Mexican Restaurant,Convenience Store,Pizza Place,Video Store,Pharmacy,Bakery,Fast Food Restaurant,Gym / Fitness Center,Big Box Store,Food
5,Guadalupe,33.363125,-111.962533,53022,0,Fast Food Restaurant,Sandwich Place,Hotel,Convenience Store,Mexican Restaurant,Gas Station,Coffee Shop,Video Store,Discount Store,Burger Joint
6,Avondale,33.435598,-112.349602,44658,0,Mexican Restaurant,Fast Food Restaurant,Rental Car Location,Sandwich Place,Convenience Store,Burger Joint,Fried Chicken Joint,Jewelry Store,Big Box Store,Social Club
10,Mesa,33.415112,-111.831479,36586,0,Mexican Restaurant,Convenience Store,Sandwich Place,Bank,Brewery,Museum,Park,Bar,Hotel,Ice Cream Shop
11,Goodyear,33.435367,-112.357601,44658,0,Mexican Restaurant,Fast Food Restaurant,Rental Car Location,Hotel,Furniture / Home Store,Pizza Place,Vietnamese Restaurant,Discount Store,Convenience Store,Cosmetics Shop
15,Wickenburg,33.968096,-112.730135,42375,0,Fast Food Restaurant,Convenience Store,Mexican Restaurant,American Restaurant,Sandwich Place,Dive Bar,Discount Store,Hotel,Ice Cream Shop,Italian Restaurant
16,Peoria,33.580612,-112.237294,45886,0,Fast Food Restaurant,Video Store,Convenience Store,Mexican Restaurant,Theater,Hardware Store,Ice Cream Shop,Chinese Restaurant,Thrift / Vintage Store,Pizza Place
20,Glendale,33.538686,-112.185994,27267,0,Convenience Store,Mexican Restaurant,Pizza Place,Fast Food Restaurant,Discount Store,Sandwich Place,Bank,Candy Store,American Restaurant,Breakfast Spot


In [14]:
# Visualizing the Cluster
map_phoenix = folium.Map(location=[locate_address.latitude, locate_address.longitude], 
                         zoom_start=9)

for _, row in cluster_1.iterrows():
    label = '{}, ${}'.format(row.Place, row.Median_Income)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([row.Latitude, row.Longitude], radius=5, popup=label, 
                        color='#252191', fill=True, fill_color='#6ecedb', 
                        fill_opacity=0.95, parse_html=False).add_to(map_phoenix) 

map_phoenix

#### Cluster 2

In [15]:
cluster_2 = phoenix_merged.loc[phoenix_merged['Cluster Labels'] == 1]
print('Average Median Income for Cluster 2: $', round(cluster_2['Median_Income'].mean()), 2)
display(cluster_2)

Average Median Income for Cluster 2: $ 50066.0 2


Unnamed: 0,Place,Latitude,Longitude,Median_Income,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Tolleson,33.45005,-112.259309,50066,1,Mexican Restaurant,Convenience Store,Intersection,Deli / Bodega,Burger Joint,Fried Chicken Joint,Seafood Restaurant,Factory,Sandwich Place,Park


In [16]:
# Visualizing the Cluster
map_phoenix = folium.Map(location=[locate_address.latitude, locate_address.longitude], 
                         zoom_start=9)

for _, row in cluster_2.iterrows():
    label = '{}, ${}'.format(row.Place, row.Median_Income)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([row.Latitude, row.Longitude], radius=5, popup=label, 
                        color='#252191', fill=True, fill_color='#6ecedb', 
                        fill_opacity=0.95, parse_html=False).add_to(map_phoenix) 

map_phoenix

#### Cluster 3

In [17]:
cluster_3 = phoenix_merged.loc[phoenix_merged['Cluster Labels'] == 2]
print('Average Median Income for Cluster 3: $', round(cluster_3['Median_Income'].mean()), 2)
display(cluster_3)

Average Median Income for Cluster 3: $ 67360.0 2


Unnamed: 0,Place,Latitude,Longitude,Median_Income,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Carefree,33.822261,-111.918203,100338,2,Coffee Shop,American Restaurant,Locksmith,Bar,Italian Restaurant,Café,Shopping Mall,Irish Pub,Steakhouse,Food Court
14,Cave Creek,33.833333,-111.950833,88938,2,Coffee Shop,Mexican Restaurant,BBQ Joint,Diner,Steakhouse,Bar,Furniture / Home Store,Convenience Store,Antique Shop,American Restaurant
19,Paradise Valley,33.532429,-111.950512,109185,2,Spa,American Restaurant,Hotel,Restaurant,Hotel Pool,Mountain,State / Provincial Park,Shipping Store,Business Service,Bar
21,Tempe,33.425506,-111.940012,30582,2,Coffee Shop,Mexican Restaurant,Breakfast Spot,American Restaurant,Sandwich Place,Park,Pizza Place,Thai Restaurant,Brewery,Bar
22,Scottsdale,33.494219,-111.926018,49111,2,Coffee Shop,American Restaurant,Mexican Restaurant,Bar,New American Restaurant,Hotel,Pizza Place,Italian Restaurant,Sushi Restaurant,Taco Place
23,Phoenix,33.448437,-112.074142,26008,2,Coffee Shop,American Restaurant,Hotel,Pizza Place,Art Gallery,Music Venue,Mexican Restaurant,Bar,Salon / Barbershop,Cocktail Bar


In [18]:
# Visualizing the Cluster
map_phoenix = folium.Map(location=[locate_address.latitude, locate_address.longitude], 
                         zoom_start=9)

for _, row in cluster_3.iterrows():
    label = '{}, ${}'.format(row.Place, row.Median_Income)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([row.Latitude, row.Longitude], radius=5, popup=label, 
                        color='#252191', fill=True, fill_color='#6ecedb', 
                        fill_opacity=0.95, parse_html=False).add_to(map_phoenix) 

map_phoenix

#### Cluster 4

In [19]:
cluster_4 = phoenix_merged.loc[phoenix_merged['Cluster Labels'] == 3]
print('Average Median Income for Cluster 4: $', round(cluster_4['Median_Income'].mean()), 2)
display(cluster_4)

Average Median Income for Cluster 4: $ 68839.0 2


Unnamed: 0,Place,Latitude,Longitude,Median_Income,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Buckeye,33.370275,-112.583867,68839,3,Mexican Restaurant,Pizza Place,Discount Store,Park,Fast Food Restaurant,Spa,Bank,Business Service,Gas Station,Sandwich Place


In [20]:
# Visualizing the Cluster
map_phoenix = folium.Map(location=[locate_address.latitude, locate_address.longitude], 
                         zoom_start=9)

for _, row in cluster_4.iterrows():
    label = '{}, ${}'.format(row.Place, row.Median_Income)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([row.Latitude, row.Longitude], radius=5, popup=label, 
                        color='#252191', fill=True, fill_color='#6ecedb', 
                        fill_opacity=0.95, parse_html=False).add_to(map_phoenix) 

map_phoenix

#### Cluster 5

In [21]:
cluster_5 = phoenix_merged.loc[phoenix_merged['Cluster Labels'] == 4]
print('Average Median Income for Cluster 5: $', round(cluster_5['Median_Income'].mean()), 2)
display(cluster_5)

Average Median Income for Cluster 5: $ 68906.0 2


Unnamed: 0,Place,Latitude,Longitude,Median_Income,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Surprise,33.629227,-112.368019,70302,4,Fast Food Restaurant,Sandwich Place,Baseball Field,Mobile Phone Shop,Sushi Restaurant,Italian Restaurant,Mexican Restaurant,Coffee Shop,Hardware Store,Big Box Store
9,Fountain Hills,33.611711,-111.717361,73608,4,Pizza Place,Coffee Shop,Italian Restaurant,Bank,Park,Pharmacy,Grocery Store,Mexican Restaurant,Shipping Store,Japanese Restaurant
12,Gilbert,33.352763,-111.789037,75365,4,Sandwich Place,Mexican Restaurant,Park,Breakfast Spot,Bank,Cosmetics Shop,Coffee Shop,Sushi Restaurant,Brewery,American Restaurant
13,Queen Creek,33.248386,-111.634158,73367,4,Pizza Place,Mexican Restaurant,Gym,Furniture / Home Store,Fast Food Restaurant,Home Service,Pharmacy,Coffee Shop,Park,Cosmetics Shop
17,Litchfield Park,33.49338,-112.358124,64383,4,Coffee Shop,Grocery Store,Mexican Restaurant,Italian Restaurant,Golf Course,Chinese Restaurant,Pizza Place,American Restaurant,Sandwich Place,Pharmacy
18,Chandler,33.30616,-111.84125,56414,4,Mexican Restaurant,Convenience Store,Discount Store,Bank,Taco Place,Fast Food Restaurant,Italian Restaurant,Sandwich Place,Pizza Place,Video Store


In [22]:
# Visualizing the Cluster
map_phoenix = folium.Map(location=[locate_address.latitude, locate_address.longitude], 
                         zoom_start=9)

for _, row in cluster_5.iterrows():
    label = '{}, ${}'.format(row.Place, row.Median_Income)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([row.Latitude, row.Longitude], radius=5, popup=label, 
                        color='#252191', fill=True, fill_color='#6ecedb', 
                        fill_opacity=0.95, parse_html=False).add_to(map_phoenix) 

map_phoenix