Step 1: Import Packages

In [1]:
%matplotlib inline
!pip install --user pandas==1.0.3
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!pip install --user numpy==1.16.4
import numpy as np # library to handle data in a vectorized manner

import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

!python3 -m pip install folium
import folium # map rendering library

import json # library to handle JSON files
from pandas.io.json import json_normalize

from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors



Step 2: Scrape Zip Code, Name, Lat/Long, Population, Median Household Income Data

In [2]:
data_url='https://raw.githubusercontent.com/hmiles1225/Coursera_Capstone/master/houston_coreData.htm'
data_page= requests.get(data_url).text
data = BeautifulSoup(data_page,'xml')

#extract data
table = data.find('table')
zipCode = []
latitude = []
longitude = []
population = []
income = []
mainCount=0

# organize the table
for tr_cell in table.find_all('tr'):
    count = 0
    
    for td_cell in tr_cell.find_all('td'):
        if count == 0: 
            zipCode_var = td_cell.text
        if count == 1: 
            longitude_var = td_cell.text
        if count == 2: 
            latitude_var = td_cell.text
        if count == 3: 
            population_var = int(td_cell.text)
        if count == 4: 
            income_var = int(td_cell.text)
            
            zipCode.append(zipCode_var)
            latitude.append(latitude_var)
            longitude.append(longitude_var)
            population.append(population_var)   
            income.append(income_var)  
        
        mainCount+=1
        count +=1

In [3]:
#create core dataframe, sort the dataframe
houston_data={'ZIP Code':zipCode, 'Longitude':longitude, 'Latitude':latitude, 'Population':population, 'Median HH Income':income}
houston_df = pd.DataFrame.from_dict(houston_data)
houston_df=houston_df.sort_values(by=['ZIP Code'])
houston_df=houston_df.reset_index(drop=True)
print(houston_df)

   ZIP Code  Longitude    Latitude  Population  Median HH Income
0     77002  29.756845  -95.365652       13289             35588
1     77003  29.749563  -95.346265        9195             19252
2     77004  29.724893  -95.363752       30379             20840
3     77005  29.718435  -95.423555       23338            104035
4     77006  29.741003  -95.391271       18875             41746
5     77007   29.77153  -95.414883       22497             37785
6     77008  29.798249  -95.416933       28661             39926
7     77009  29.795344   -95.36759       42380             32995
8     77010   29.75431  -95.361109          76            200000
9     77011  29.742988  -95.307114       23052             24612
10    77012  29.717567  -95.275115       25174             23910
11    77013  29.791808  -95.228991       18378             31834
12    77014  29.981209  -95.463971       20706             40040
13    77015  29.765809  -95.175116       50569             40032
14    77016  29.863166  -

Step 3: Import names of the neighborhoods

In [4]:
data_url='https://raw.githubusercontent.com/hmiles1225/Coursera_Capstone/master/houstonNeighborhoods.htm'
data_page= requests.get(data_url).text
data = BeautifulSoup(data_page,'xml')

#extract data
table = data.find('table')
zipCode2 = []
neighborhood = []
county = []

# organize the table
for tr_cell in table.find_all('tr'):
    count = 0
    
    for td_cell in tr_cell.find_all('td'):
        if count == 0: 
            zipCode2_var = td_cell.text
        if count == 1: 
            neighborhood_var = td_cell.text
        if count == 2:
            county_var = td_cell.text
                
            zipCode2.append(zipCode2_var)
            neighborhood.append(neighborhood_var)
            county.append(county_var)
            mainCount+=1
        count +=1      

In [5]:
#create neighborhood dataframe, sort the dataframe
neighborhood_data={'ZIP Code':zipCode2, 'Neighborhood':neighborhood, 'County':county}
neighborhood_df = pd.DataFrame.from_dict(neighborhood_data)
#We only want neighborhoods in Harris County, Not the Greater Houston Area
neighborhood_df = neighborhood_df[neighborhood_df.County == 'HARRIS']
neighborhood_df=neighborhood_df.sort_values(by=['ZIP Code'])
neighborhood_df=neighborhood_df.reset_index(drop=True)
print(neighborhood_df)

    ZIP Code                                       Neighborhood  County
0      77002         Houston - Inner Loop - HOUSTON,CLUTCH CITY  HARRIS
1      77003                     Houston - Inner Loop - HOUSTON  HARRIS
2      77004                     Houston - Inner Loop - HOUSTON  HARRIS
3      77005  Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE...  HARRIS
4      77006                     Houston - Inner Loop - HOUSTON  HARRIS
5      77007                     Houston - Inner Loop - HOUSTON  HARRIS
6      77008  Houston - Inner Loop - HEIGHTS,HOUSTON,HOUSTON...  HARRIS
7      77009                     Houston - Inner Loop - HOUSTON  HARRIS
8      77010                     Houston - Inner Loop - HOUSTON  HARRIS
9      77011                     Houston - Inner Loop - HOUSTON  HARRIS
10     77012                     Houston - Inner Loop - HOUSTON  HARRIS
11     77013                      Houston - Northeast - HOUSTON  HARRIS
12     77014                      Houston - Northwest - HOUSTON 

Step 4: Merge Neighborhood Names into Core Data

In [6]:
houston_merged_df = pd.merge(houston_df, neighborhood_df, on='ZIP Code')
houston_merged_df

Unnamed: 0,ZIP Code,Longitude,Latitude,Population,Median HH Income,Neighborhood,County
0,77002,29.756845,-95.365652,13289,35588,"Houston - Inner Loop - HOUSTON,CLUTCH CITY",HARRIS
1,77003,29.749563,-95.346265,9195,19252,Houston - Inner Loop - HOUSTON,HARRIS
2,77004,29.724893,-95.363752,30379,20840,Houston - Inner Loop - HOUSTON,HARRIS
3,77005,29.718435,-95.423555,23338,104035,"Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE...",HARRIS
4,77006,29.741003,-95.391271,18875,41746,Houston - Inner Loop - HOUSTON,HARRIS
5,77007,29.77153,-95.414883,22497,37785,Houston - Inner Loop - HOUSTON,HARRIS
6,77008,29.798249,-95.416933,28661,39926,"Houston - Inner Loop - HEIGHTS,HOUSTON,HOUSTON...",HARRIS
7,77009,29.795344,-95.36759,42380,32995,Houston - Inner Loop - HOUSTON,HARRIS
8,77010,29.75431,-95.361109,76,200000,Houston - Inner Loop - HOUSTON,HARRIS
9,77011,29.742988,-95.307114,23052,24612,Houston - Inner Loop - HOUSTON,HARRIS


Step 5: Sort by Median HH Income and Population, Drop other Neighborhoods

In [7]:
#sort by median hh income
houston_merged_df=houston_merged_df.sort_values(by=['Median HH Income'], ascending=False)
houston_merged_df
#drop neighborhoods outside the top 30
houston_merged_df= houston_merged_df.drop(houston_merged_df.index[30:95])
houston_merged_df

Unnamed: 0,ZIP Code,Longitude,Latitude,Population,Median HH Income,Neighborhood,County
8,77010,29.75431,-95.361109,76,200000,Houston - Inner Loop - HOUSTON,HARRIS
90,77094,29.769285,-95.681292,7779,123244,Houston - Southwest - HOUSTON,HARRIS
44,77046,29.733084,-95.430659,471,105863,"Houston - Inner Loop - GREENWAY PLAZA,HOUSTON",HARRIS
55,77059,29.615219,-95.13496,16690,104844,Houston - Southeast - HOUSTON,HARRIS
3,77005,29.718435,-95.423555,23338,104035,"Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE...",HARRIS
22,77024,29.771991,-95.515453,32746,82620,"Houston - Southwest - BUNKER HILL VILLAGE,HEDW...",HARRIS
64,77068,30.00883,-95.487234,9505,77724,Houston - Northwest - HOUSTON,HARRIS
91,77095,29.916055,-95.663077,39275,76814,Houston - Northwest - HOUSTON,HARRIS
58,77062,29.575781,-95.134334,26978,75689,Houston - Southeast - HOUSTON,HARRIS
52,77056,29.749035,-95.469021,14031,71926,Houston - Southwest - HOUSTON,HARRIS


In [8]:
#sort by population
houston_merged_df=houston_merged_df.sort_values(by=['Population'], ascending=False)
#keep the top 20
houston_merged_df= houston_merged_df.drop(houston_merged_df.index[20:30])
houston_merged_df

Unnamed: 0,ZIP Code,Longitude,Latitude,Population,Median HH Income,Neighborhood,County
80,77084,29.82686,-95.65086,63557,53075,"Houston - Northwest - ADDICKS BARKER,HOUSTON",HARRIS
79,77083,29.693991,-95.64783,52061,52931,Houston - Southwest - HOUSTON,HARRIS
73,77077,29.749158,-95.620344,42416,52667,Houston - Southwest - HOUSTON,HARRIS
38,77040,29.876755,-95.532143,39520,50183,"Houston - Northwest - BAMMEL,HOUSTON,JERSEY VI...",HARRIS
91,77095,29.916055,-95.663077,39275,76814,Houston - Northwest - HOUSTON,HARRIS
85,77089,29.588469,-95.218012,36450,53123,Houston - Southeast - HOUSTON,HARRIS
60,77064,29.924216,-95.537411,35966,60945,Houston - Northwest - HOUSTON,HARRIS
53,77057,29.744068,-95.48921,35491,46563,Houston - Southwest - HOUSTON,HARRIS
92,77096,29.674133,-95.478354,33987,46140,Houston - Southwest - HOUSTON,HARRIS
22,77024,29.771991,-95.515453,32746,82620,"Houston - Southwest - BUNKER HILL VILLAGE,HEDW...",HARRIS


Step 5: Connect to FourSquare

In [9]:
CLIENT_ID = '14O12ED3ZS0ZQJ3G1DAOB0B4RAOHOH4FZPGDVZFC0UU1NX35' # your Foursquare ID
CLIENT_SECRET = 'V0K0HTYKF1SY1BLBTPDGHJLBCIFSP1XJSFUHNNRBWE42ZHVD' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#Lat/Long of Houston is 29.7604° N, 95.3698° W
latitude = 29.7604
longitude= -95.3698
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=14O12ED3ZS0ZQJ3G1DAOB0B4RAOHOH4FZPGDVZFC0UU1NX35&client_secret=V0K0HTYKF1SY1BLBTPDGHJLBCIFSP1XJSFUHNNRBWE42ZHVD&v=20180605&ll=29.7604,-95.3698&radius=500&limit=100'

In [10]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ed9507d47e0d6001b29c339'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Downtown Houston',
  'headerFullLocation': 'Downtown Houston, Houston',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 66,
  'suggestedBounds': {'ne': {'lat': 29.764900004500007,
    'lng': -95.36462599088988},
   'sw': {'lat': 29.755899995499995, 'lng': -95.37497400911012}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ae4e065f964a5200d9f21e3',
       'name': 'Hobby Center for the Performing Arts',
       'location': {'address': '800 Bagby St',
        'crossStreet': 'btwn Rusk St & Walker St',
        'lat':

Step 6: Get Venue Categories

In [11]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Step 7: Clean the JSON and convert into a Pandas Dataframe

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
houston_venues = getNearbyVenues(names=houston_merged_df['Neighborhood'], latitudes=houston_merged_df['Longitude'], longitudes=houston_merged_df['Latitude'])

Houston - Northwest - ADDICKS BARKER,HOUSTON
Houston - Southwest - HOUSTON
Houston - Southwest - HOUSTON
Houston - Northwest - BAMMEL,HOUSTON,JERSEY VILLAGE,KOHRVILLE,SATSUMA,JERSEY VLG
Houston - Northwest - HOUSTON
Houston - Southeast - HOUSTON
Houston - Northwest - HOUSTON
Houston - Southwest - HOUSTON
Houston - Southwest - HOUSTON
Houston - Southwest - BUNKER HILL VILLAGE,HEDWIG VILLAGE,HOUSTON,HUNTERS CREEK VILLAGE,MEMORIAL PARK,PINEY POINT,SPRING VALLEY
Houston - Northwest - HOUSTON
Houston - Southwest - ADDICKS,HOUSTON
Houston - Northwest - HOUSTON
Houston - Northwest - HOUSTON,JERSEY VILLAGE,JERSEY VLG
Houston - Southeast - HOUSTON
Houston - Northwest - HOUSTON,JERSEY VILLAGE,JERSEY VLG
Houston - Inner Loop - ASTRODOME,ASTROWORLD,HOUSTON
Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE,WEST UNIVERSITY PLACE,W UNIV PL
Houston - Southeast - HOUSTON
Houston - Inner Loop - HOUSTON


In [14]:
print(houston_venues.shape)
houston_venues.head()

(169, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Houston - Northwest - ADDICKS BARKER,HOUSTON",29.82686,-95.65086,The Prop,29.827831,-95.6521,Brewery
1,Houston - Southwest - HOUSTON,29.693991,-95.64783,Whataburger,29.694935,-95.644489,Burger Joint
2,Houston - Southwest - HOUSTON,29.693991,-95.64783,Subway,29.694618,-95.644408,Sandwich Place
3,Houston - Southwest - HOUSTON,29.693991,-95.64783,Enterprise Rent-A-Car,29.69441,-95.644462,Rental Car Location
4,Houston - Southwest - HOUSTON,29.693991,-95.64783,Popeyes Louisiana Kitchen,29.692129,-95.644251,Fried Chicken Joint


In [15]:
houston_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Houston - Inner Loop - ASTRODOME,ASTROWORLD,HOUSTON",5,5,5,5,5,5
Houston - Inner Loop - HOUSTON,50,50,50,50,50,50
"Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE,WEST UNIVERSITY PLACE,W UNIV PL",5,5,5,5,5,5
"Houston - Northwest - ADDICKS BARKER,HOUSTON",1,1,1,1,1,1
"Houston - Northwest - BAMMEL,HOUSTON,JERSEY VILLAGE,KOHRVILLE,SATSUMA,JERSEY VLG",1,1,1,1,1,1
Houston - Northwest - HOUSTON,16,16,16,16,16,16
"Houston - Northwest - HOUSTON,JERSEY VILLAGE,JERSEY VLG",34,34,34,34,34,34
Houston - Southeast - HOUSTON,5,5,5,5,5,5
"Houston - Southwest - ADDICKS,HOUSTON",13,13,13,13,13,13
"Houston - Southwest - BUNKER HILL VILLAGE,HEDWIG VILLAGE,HOUSTON,HUNTERS CREEK VILLAGE,MEMORIAL PARK,PINEY POINT,SPRING VALLEY",2,2,2,2,2,2


Step 7: Analyze Each Neighborhood

In [16]:
# one hot encoding
houston_onehot = pd.get_dummies(houston_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
houston_onehot['Neighborhood'] = houston_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [houston_onehot.columns[-1]] + list(houston_onehot.columns[:-1])
houston_onehot = houston_onehot[fixed_columns]

houston_onehot.head(10)

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Art Gallery,BBQ Joint,Bakery,Bank,Basketball Court,Beach,Bookstore,Brewery,Burger Joint,Business Service,Café,Cajun / Creole Restaurant,Clothing Store,Coffee Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Cycle Studio,Department Store,Dessert Shop,Eastern European Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gift Shop,Grocery Store,Gun Range,Gym,Gym / Fitness Center,Gymnastics Gym,Hawaiian Restaurant,Health Food Store,Hookah Bar,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Kitchen Supply Store,Latin American Restaurant,Liquor Store,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Mobile Phone Shop,New American Restaurant,Non-Profit,Optical Shop,Outdoors & Recreation,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Recreation Center,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shop & Service,Shopping Mall,Soccer Field,South American Restaurant,Spa,Speakeasy,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Trail,Video Game Store,Video Store,Vietnamese Restaurant
0,"Houston - Northwest - ADDICKS BARKER,HOUSTON",0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Houston - Southwest - HOUSTON,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Houston - Southwest - HOUSTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [17]:
houston_onehot.shape

(169, 92)

Step 8: Group rows by neighborhood and take the mean of the frequency of occurrence of each category

In [18]:
houston_grouped = houston_onehot.groupby('Neighborhood').mean().reset_index()
houston_grouped

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Art Gallery,BBQ Joint,Bakery,Bank,Basketball Court,Beach,Bookstore,Brewery,Burger Joint,Business Service,Café,Cajun / Creole Restaurant,Clothing Store,Coffee Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Cycle Studio,Department Store,Dessert Shop,Eastern European Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gift Shop,Grocery Store,Gun Range,Gym,Gym / Fitness Center,Gymnastics Gym,Hawaiian Restaurant,Health Food Store,Hookah Bar,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Kitchen Supply Store,Latin American Restaurant,Liquor Store,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Mobile Phone Shop,New American Restaurant,Non-Profit,Optical Shop,Outdoors & Recreation,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Recreation Center,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shop & Service,Shopping Mall,Soccer Field,South American Restaurant,Spa,Speakeasy,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Trail,Video Game Store,Video Store,Vietnamese Restaurant
0,"Houston - Inner Loop - ASTRODOME,ASTROWORLD,HO...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Houston - Inner Loop - HOUSTON,0.02,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.04,0.06,0.0,0.0,0.06,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.04,0.02,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.02,0.02,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.02
2,"Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE...",0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Houston - Northwest - ADDICKS BARKER,HOUSTON",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Houston - Northwest - BAMMEL,HOUSTON,JERSEY VI...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Houston - Northwest - HOUSTON,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0625,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0
6,"Houston - Northwest - HOUSTON,JERSEY VILLAGE,J...",0.0,0.0,0.0,0.0,0.029412,0.058824,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.029412,0.0,0.029412,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.088235,0.0,0.0,0.029412,0.029412,0.029412,0.0,0.029412,0.029412,0.0
7,Houston - Southeast - HOUSTON,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
8,"Houston - Southwest - ADDICKS,HOUSTON",0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0
9,"Houston - Southwest - BUNKER HILL VILLAGE,HEDW...",0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
houston_grouped.shape

(11, 92)

Step 9: Print each neighborhood along with the top 5 most common venues

In [20]:
num_top_venues = 5

for hood in houston_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = houston_grouped[houston_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Houston - Inner Loop - ASTRODOME,ASTROWORLD,HOUSTON----
                  venue  freq
0                   Gym   0.2
1                  Park   0.2
2     Recreation Center   0.2
3               Brewery   0.2
4  Gym / Fitness Center   0.2


----Houston - Inner Loop - HOUSTON----
               venue  freq
0        Coffee Shop  0.06
1     Cosmetics Shop  0.06
2        Men's Store  0.06
3  French Restaurant  0.04
4                Gym  0.04


----Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE,WEST UNIVERSITY PLACE,W UNIV PL----
                         venue  freq
0  Eastern European Restaurant   0.2
1                       Bakery   0.2
2                        Beach   0.2
3        Outdoors & Recreation   0.2
4                 Optical Shop   0.2


----Houston - Northwest - ADDICKS BARKER,HOUSTON----
               venue  freq
0            Brewery   1.0
1  Accessories Store   0.0
2        Men's Store   0.0
3          Pet Store   0.0
4               Park   0.0


----Houston - Northwest - B

In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [22]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = houston_grouped['Neighborhood']

for ind in np.arange(houston_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(houston_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Houston - Inner Loop - ASTRODOME,ASTROWORLD,HO...",Park,Recreation Center,Gym / Fitness Center,Gym,Brewery,Frozen Yogurt Shop,Eastern European Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop
1,Houston - Inner Loop - HOUSTON,Men's Store,Coffee Shop,Cosmetics Shop,Gym,Kids Store,Café,Clothing Store,French Restaurant,Vietnamese Restaurant,Kitchen Supply Store
2,"Houston - Inner Loop - HOUSTON,SOUTHSIDE PLACE...",Outdoors & Recreation,Optical Shop,Bakery,Beach,Eastern European Restaurant,Vietnamese Restaurant,Fruit & Vegetable Store,Fast Food Restaurant,Food & Drink Shop,Food Truck
3,"Houston - Northwest - ADDICKS BARKER,HOUSTON",Brewery,Vietnamese Restaurant,Gift Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop
4,"Houston - Northwest - BAMMEL,HOUSTON,JERSEY VI...",Optical Shop,Vietnamese Restaurant,Furniture / Home Store,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop


Step 10: Cluster Neighborhoods

In [23]:
# set number of clusters
kclusters = 5

houston_grouped_clustering = houston_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(houston_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 2, 0, 1, 1, 3, 1, 4], dtype=int32)

In [24]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
houston_merged = houston_merged_df.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
houston_merged

Unnamed: 0,ZIP Code,Longitude,Latitude,Population,Median HH Income,Neighborhood,County,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
80,77084,29.82686,-95.65086,63557,53075,"Houston - Northwest - ADDICKS BARKER,HOUSTON",HARRIS,2,Brewery,Vietnamese Restaurant,Gift Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop
79,77083,29.693991,-95.64783,52061,52931,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
73,77077,29.749158,-95.620344,42416,52667,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
38,77040,29.876755,-95.532143,39520,50183,"Houston - Northwest - BAMMEL,HOUSTON,JERSEY VI...",HARRIS,0,Optical Shop,Vietnamese Restaurant,Furniture / Home Store,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop
91,77095,29.916055,-95.663077,39275,76814,Houston - Northwest - HOUSTON,HARRIS,1,Fast Food Restaurant,Pharmacy,Video Store,Park,Paper / Office Supplies Store,Gymnastics Gym,Gym,Gun Range,Mobile Phone Shop,Mexican Restaurant
85,77089,29.588469,-95.218012,36450,53123,Houston - Southeast - HOUSTON,HARRIS,3,Shop & Service,Business Service,Trail,Playground,Cajun / Creole Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant
60,77064,29.924216,-95.537411,35966,60945,Houston - Northwest - HOUSTON,HARRIS,1,Fast Food Restaurant,Pharmacy,Video Store,Park,Paper / Office Supplies Store,Gymnastics Gym,Gym,Gun Range,Mobile Phone Shop,Mexican Restaurant
53,77057,29.744068,-95.48921,35491,46563,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
92,77096,29.674133,-95.478354,33987,46140,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
22,77024,29.771991,-95.515453,32746,82620,"Houston - Southwest - BUNKER HILL VILLAGE,HEDW...",HARRIS,4,Basketball Court,Clothing Store,Vietnamese Restaurant,Gift Shop,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop


In [25]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(houston_merged['Longitude'], houston_merged['Latitude'], houston_merged['Neighborhood'], houston_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Step 11: Examine Clusters

Cluster 1

In [39]:
houston_merged.loc[houston_merged['Cluster Labels'] == 0, houston_merged.columns[[1] + list(range(5, houston_merged.shape[1]))]]

Unnamed: 0,Longitude,Neighborhood,County,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
38,29.876755,"Houston - Northwest - BAMMEL,HOUSTON,JERSEY VI...",HARRIS,0,Optical Shop,Vietnamese Restaurant,Furniture / Home Store,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop


In [107]:
ZIPs=[]
totalPop=0
totalInc=0     
count=0    
for label, row in houston_merged.iterrows():
    if row['Cluster Labels']==0:
        totalPop+=row['Population']
        totalInc+=row['Median HH Income']
        ZIPs.append(row['ZIP Code'])
        count+=1
        
avgPop = totalPop/count
avgInc = totalInc/count
avgInc=format(avgInc, '.2f')

print('Cluster 1 includes the following ZIP Codes:', ZIPs)
print('The Median Annual Household Income for this Cluster is $' + avgInc)
print('The Average Population for this Cluster is', avgPop)

Cluster 1 includes the following ZIP Codes: ['77040']
The Median Annual Household Income for this Cluster is $50183.00
The Average Population for this Cluster is 39520.0


Cluster 2

In [27]:
houston_merged.loc[houston_merged['Cluster Labels'] == 1, houston_merged.columns[[1] + list(range(5, houston_merged.shape[1]))]]

Unnamed: 0,Longitude,Neighborhood,County,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
79,29.693991,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
73,29.749158,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
91,29.916055,Houston - Northwest - HOUSTON,HARRIS,1,Fast Food Restaurant,Pharmacy,Video Store,Park,Paper / Office Supplies Store,Gymnastics Gym,Gym,Gun Range,Mobile Phone Shop,Mexican Restaurant
60,29.924216,Houston - Northwest - HOUSTON,HARRIS,1,Fast Food Restaurant,Pharmacy,Video Store,Park,Paper / Office Supplies Store,Gymnastics Gym,Gym,Gun Range,Mobile Phone Shop,Mexican Restaurant
53,29.744068,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
92,29.674133,Houston - Southwest - HOUSTON,HARRIS,1,Burger Joint,Gym / Fitness Center,Sandwich Place,Liquor Store,Business Service,Dessert Shop,BBQ Joint,Video Store,Grocery Store,Non-Profit
66,29.97967,Houston - Northwest - HOUSTON,HARRIS,1,Fast Food Restaurant,Pharmacy,Video Store,Park,Paper / Office Supplies Store,Gymnastics Gym,Gym,Gun Range,Mobile Phone Shop,Mexican Restaurant
75,29.774523,"Houston - Southwest - ADDICKS,HOUSTON",HARRIS,1,Salon / Barbershop,Fast Food Restaurant,Park,Hookah Bar,Bakery,Supermarket,Bank,Fried Chicken Joint,Pizza Place,Juice Bar
62,29.958498,Houston - Northwest - HOUSTON,HARRIS,1,Fast Food Restaurant,Pharmacy,Video Store,Park,Paper / Office Supplies Store,Gymnastics Gym,Gym,Gun Range,Mobile Phone Shop,Mexican Restaurant
61,29.926556,"Houston - Northwest - HOUSTON,JERSEY VILLAGE,J...",HARRIS,1,Fast Food Restaurant,Spa,Restaurant,Mobile Phone Shop,Bank,Sandwich Place,Burger Joint,Department Store,Liquor Store,Cupcake Shop


In [108]:
ZIPs=[]
totalPop=0
totalInc=0     
count=0    
for label, row in houston_merged.iterrows():
    if row['Cluster Labels']==1:
        totalPop+=row['Population']
        totalInc+=row['Median HH Income']
        ZIPs.append(row['ZIP Code'])
        count+=1
        
avgPop = totalPop/count
avgInc = totalInc/count
avgInc=format(avgInc, '.2f')

print('Cluster 2 includes the following ZIP Codes:', ZIPs)
print('The Median Annual Household Income for this Cluster is $' + avgInc)
print('The Average Population for this Cluster is', avgPop)

Cluster 2 includes the following ZIP Codes: ['77083', '77077', '77095', '77064', '77057', '77096', '77070', '77079', '77066', '77065', '77041', '77025', '77005', '77019']
The Median Annual Household Income for this Cluster is $60902.29
The Average Population for this Cluster is 31786.428571428572


Cluster 3

In [28]:
houston_merged.loc[houston_merged['Cluster Labels'] == 2, houston_merged.columns[[1] + list(range(5, houston_merged.shape[1]))]]

Unnamed: 0,Longitude,Neighborhood,County,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
80,29.82686,"Houston - Northwest - ADDICKS BARKER,HOUSTON",HARRIS,2,Brewery,Vietnamese Restaurant,Gift Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop


In [109]:
ZIPs=[]
totalPop=0
totalInc=0     
count=0    
for label, row in houston_merged.iterrows():
    if row['Cluster Labels']==2:
        totalPop+=row['Population']
        totalInc+=row['Median HH Income']
        ZIPs.append(row['ZIP Code'])
        count+=1
        
avgPop = totalPop/count
avgInc = totalInc/count
avgInc=format(avgInc, '.2f')

print('Cluster 3 includes the following ZIP Codes:', ZIPs)
print('The Median Annual Household Income for this Cluster is $' + avgInc)
print('The Average Population for this Cluster is', avgPop)

Cluster 3 includes the following ZIP Codes: ['77084']
The Median Annual Household Income for this Cluster is $53075.00
The Average Population for this Cluster is 63557.0


Cluster 4

In [29]:
houston_merged.loc[houston_merged['Cluster Labels'] == 3, houston_merged.columns[[1] + list(range(5, houston_merged.shape[1]))]]

Unnamed: 0,Longitude,Neighborhood,County,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,29.588469,Houston - Southeast - HOUSTON,HARRIS,3,Shop & Service,Business Service,Trail,Playground,Cajun / Creole Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant
58,29.575781,Houston - Southeast - HOUSTON,HARRIS,3,Shop & Service,Business Service,Trail,Playground,Cajun / Creole Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant
55,29.615219,Houston - Southeast - HOUSTON,HARRIS,3,Shop & Service,Business Service,Trail,Playground,Cajun / Creole Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant


In [110]:
ZIPs=[]
totalPop=0
totalInc=0     
count=0    
for label, row in houston_merged.iterrows():
    if row['Cluster Labels']==3:
        totalPop+=row['Population']
        totalInc+=row['Median HH Income']
        ZIPs.append(row['ZIP Code'])
        count+=1
        
avgPop = totalPop/count
avgInc = totalInc/count
avgInc=format(avgInc, '.2f')

print('Cluster 4 includes the following ZIP Codes:', ZIPs)
print('The Median Annual Household Income for this Cluster is $' + avgInc)
print('The Average Population for this Cluster is', avgPop)

Cluster 4 includes the following ZIP Codes: ['77089', '77062', '77059']
The Median Annual Household Income for this Cluster is $77885.33
The Average Population for this Cluster is 26706.0


Cluster 5

In [30]:
houston_merged.loc[houston_merged['Cluster Labels'] == 4, houston_merged.columns[[1] + list(range(5, houston_merged.shape[1]))]]

Unnamed: 0,Longitude,Neighborhood,County,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,29.771991,"Houston - Southwest - BUNKER HILL VILLAGE,HEDW...",HARRIS,4,Basketball Court,Clothing Store,Vietnamese Restaurant,Gift Shop,Fast Food Restaurant,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop


In [111]:
ZIPs=[]
totalPop=0
totalInc=0     
count=0    
for label, row in houston_merged.iterrows():
    if row['Cluster Labels']==4:
        totalPop+=row['Population']
        totalInc+=row['Median HH Income']
        ZIPs.append(row['ZIP Code'])
        count+=1
        
avgPop = totalPop/count
avgInc = totalInc/count
avgInc=format(avgInc, '.2f')

print('Cluster 5 includes the following ZIP Codes:', ZIPs)
print('The Median Annual Household Income for this Cluster is $' + avgInc)
print('The Average Population for this Cluster is', avgPop)

Cluster 5 includes the following ZIP Codes: ['77024']
The Median Annual Household Income for this Cluster is $82620.00
The Average Population for this Cluster is 32746.0
