In [1]:
import requests
import pandas as pd
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [2]:
# Read json data into pandas dataframe
# Houston area zip code data courtesy of City of Houston GIS website https://cohgis-mycity.opendata.arcgis.com/datasets/zip-codes/data

!wget -q -O 'Zip Codes.geojson' https://opendata.arcgis.com/datasets/7237db114eeb416cb481f4450d8a0fa6_7.geojson
print('Data downloaded!')

Data downloaded!


In [3]:
with open('Zip Codes.geojson') as json_data:
    hou_gal_data = json.load(json_data)

In [4]:
# check of imported data

hou_gal_data

{'features': [{'geometry': {'coordinates': [[[-95.00407694575257,
       30.58014636874117],
      [-95.00457493912025, 30.579549377243954],
      [-95.02023596325114, 30.563996366423346],
      [-95.0221289443768, 30.559186365251403],
      [-95.0293999610586, 30.546740376243086],
      [-95.03162896463884, 30.541534378602503],
      [-95.05712997044603, 30.554325372567426],
      [-95.0608509655312, 30.577301382626104],
      [-95.0663719619234, 30.58092138271742],
      [-95.066692964328, 30.580865379899524],
      [-95.06686497239683, 30.5809513710502],
      [-95.06686895568251, 30.581154374598874],
      [-95.06720396564194, 30.58148436491588],
      [-95.06883197289164, 30.582643380180382],
      [-95.06833696394236, 30.583083380309326],
      [-95.06784797438429, 30.583023365107938],
      [-95.0672329580046, 30.583180369920854],
      [-95.07236496942544, 30.594580366252224],
      [-95.10960896790911, 30.570729363990534],
      [-95.08016395899833, 30.55229537312473],
      [

#### Relevant data is in the features key, which is basically a list of the Houston-Galveston area zip codes.
Let’s define a new variable for this data.

In [5]:
zip_data = hou_gal_data['features']

#### Take a look at the first item in this list.

In [6]:
zip_data[0]

{'geometry': {'coordinates': [[[-95.00407694575257, 30.58014636874117],
    [-95.00457493912025, 30.579549377243954],
    [-95.02023596325114, 30.563996366423346],
    [-95.0221289443768, 30.559186365251403],
    [-95.0293999610586, 30.546740376243086],
    [-95.03162896463884, 30.541534378602503],
    [-95.05712997044603, 30.554325372567426],
    [-95.0608509655312, 30.577301382626104],
    [-95.0663719619234, 30.58092138271742],
    [-95.066692964328, 30.580865379899524],
    [-95.06686497239683, 30.5809513710502],
    [-95.06686895568251, 30.581154374598874],
    [-95.06720396564194, 30.58148436491588],
    [-95.06883197289164, 30.582643380180382],
    [-95.06833696394236, 30.583083380309326],
    [-95.06784797438429, 30.583023365107938],
    [-95.0672329580046, 30.583180369920854],
    [-95.07236496942544, 30.594580366252224],
    [-95.10960896790911, 30.570729363990534],
    [-95.08016395899833, 30.55229537312473],
    [-95.06515296396417, 30.52281235434486],
    [-95.103039961486

#### Transform the data into a pandas dataframe
The next task is transforming this data of nested Python dictionaries into a pandas dataframe.  Start by creating an empty dataframe.

In [7]:
# define the dataframe columns
column_names = ['Zip_Code']

# instantiate the dataframe
zips = pd.DataFrame(columns=column_names)

#### Then loop through the data and fill the dataframe one row at a time.

In [8]:
for data in zip_data:
    zipcode = data['properties']['ZIP_CODE']
    
    zips = zips.append({'Zip_Code': zipcode}, ignore_index=True)

zips.head(3)

Unnamed: 0,Zip_Code
0,77371
1,77331
2,77358


In [9]:
# check the dataframe size vs source datafile of 213 qty zip codes.

zips.shape

(213, 1)

In [11]:
# Install geocoder module.  Conda install-line needs to be run 1st time of the day for the notebook.
!conda install -c conda-forge geocoder
import geocoder

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geocoder:   1.38.1-py_0  conda-forge
    orderedset: 2.0-py35_0   conda-forge
    ratelim:    0.1.6-py35_0 conda-forge

orderedset-2.0 100% |################################| Time: 0:00:00  54.11 MB/s
ratelim-0.1.6- 100% |################################| Time: 0:00:00   8.89 MB/s
geocoder-1.38. 100% |################################| Time: 0:00:00  28.33 MB/s


In [12]:
# Add column to postal_df dataframe with column elements equal to 'None'

zips['lat_lng'] = None
zips.head(3)

Unnamed: 0,Zip_Code,lat_lng
0,77371,
1,77331,
2,77358,


In [13]:
# iter through the Pandas dataframe pulling geocode latitude-longitude per zip code.
# Had to run it a second time once to complete the dataframe.  The recommended while loop helped a lot in this case.

for i in range(0,len(zips)):
    while (zips.lat_lng[i] is None):
        zips.lat_lng[i] = geocoder.arcgis('{}'.format(zips.Zip_Code[i])).latlng

In [14]:
# Double-check the end of the dataframe is geocoded.

zips.tail()

Unnamed: 0,Zip_Code,lat_lng
208,77048,"[29.634120000000053, -95.34548999999998]"
209,77053,"[29.593410000000063, -95.47196999999994]"
210,77515,"[29.16663470000003, -95.43169929999999]"
211,77377,"[30.026500300000066, -95.62929969999999]"
212,77433,"[29.886905100000035, -95.70235429999997]"


In [15]:
# Split the lat_lon_coords to 2 new columns in the same dataframe

zips[['Latitude', 'Longitude']] = pd.DataFrame(zips.lat_lng.values.tolist(), index = zips.index)
zips.head()

Unnamed: 0,Zip_Code,lat_lng,Latitude,Longitude
0,77371,"[30.49649000000005, -94.99831999999998]",30.49649,-94.99832
1,77331,"[30.65462000000008, -95.12403989999996]",30.65462,-95.12404
2,77358,"[30.539850000000058, -95.47808999999995]",30.53985,-95.47809
3,77378,"[30.425131400000055, -95.47994559999995]",30.425131,-95.479946
4,77868,"[30.39310420000004, -96.08774149999994]",30.393104,-96.087741


In [16]:
# Drop the lat_lng_coords column for final dataframe

zips.drop('lat_lng', axis = 1, inplace = True)
zips.head(3)

Unnamed: 0,Zip_Code,Latitude,Longitude
0,77371,30.49649,-94.99832
1,77331,30.65462,-95.12404
2,77358,30.53985,-95.47809


In [40]:
#image_name_data['id'] = image_name_data['id'].astype(int).astype('str')    surveys_df['sex'].dtype
# check datatype for Zip_Code column

zips['Zip_Code'].dtype

dtype('O')

In [41]:
zips['Zip_Code'] = zips['Zip_Code'].astype('str')

In [17]:
import numpy as np # library to handle data in a vectorized manner
#import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed import folium # map rendering library
import folium # map rendering library
print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  23.83 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  29.66 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00   8.07 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  40.27 MB/s
Libraries imported.


In [18]:
# Get the geographic coordinates for Houston

address = 'Houston, Texas'
geolocator = geocoder.arcgis('Houston, Texas').latlng
latitude = geolocator[0]
longitude = geolocator[1]
print('The geograpical coordinate of Houston are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Houston are 29.76058000000006, -95.36967999999996.


In [19]:
# create map of Houston using latitude and longitude values
map_houston = folium.Map(location=[latitude, longitude], zoom_start=9)

# add markers to map for Houston area zip codes
for lat, lng, label in zip(zips['Latitude'], zips['Longitude'], zips['Zip_Code']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_houston)  
    
map_houston

In [20]:
# The code was removed by Watson Studio for sharing.

In [21]:
# To explore zipcodes in the Houston metroplex, get the geographic data ready to pass to Foursquare

zipcode_latitude = zips.loc[0, 'Latitude'] # zipcode latitude value
zipcode_longitude = zips.loc[0, 'Longitude'] # zipcode longitude value

zipcode_number = zips.loc[0, 'Zip_Code'] # zipcode number

print('Latitude and longitude values of {} are {}, {}.'.format(zipcode_number, 
                                                               zipcode_latitude, 
                                                               zipcode_longitude))

Latitude and longitude values of 77371 are 30.49649000000005, -94.99831999999998.


In [22]:
# Get the top 100 venues that are in 77371 within a radius of 3219 meters, the "2 mile radius"

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 3219 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    zipcode_latitude, 
    zipcode_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=BF0XAFGTVULX1NEUNNSQBXRJ15GGGDRJ2ALMMW0PWPEIINQY&client_secret=GMRQ4FAK5IG0OIITG3EYTNNAQULI3ECDWX1V0G0XHXYGK42N&v=20180605&ll=30.49649000000005,-94.99831999999998&radius=3219&limit=100'

In [23]:
# Send the GET request and check the results

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c59a69e9fb6b7681cc0f700'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4db8a73f93a0f369e85eeee0-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/deli_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d1c5941735',
         'name': 'Sandwich Place',
         'pluralName': 'Sandwich Places',
         'primary': True,
         'shortName': 'Sandwiches'}],
       'id': '4db8a73f93a0f369e85eeee0',
       'location': {'cc': 'US',
        'city': 'Shepherd',
        'country': 'United States',
        'distance': 1873,
        'formattedAddress': ['Shepherd, TX', 'United States'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 30.510733123409672,
          'lng': -94.98791966989346}],
        'lat': 30.5

In [24]:
# From the Foursquare lab in the previous module, we know that all the information is in the items key. 
# Borrow the get_category_type function from the Foursquare lab.

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [25]:
# Now we are ready to clean the json and structure it into a pandas dataframe.

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(3)

Unnamed: 0,name,categories,lat,lng
0,SUBWAY,Sandwich Place,30.510733,-94.98792
1,Church's Chicken,Fried Chicken Joint,30.489171,-94.995199
2,SONIC Drive In,Fast Food Restaurant,30.488055,-94.996168


In [26]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


## 2. Explore Zipcodes in the Houston metroplex
#### Let's replicate the function to repeat the same process to all the zipcodes in the greater Houston area

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Zipcode', 
                  'Zipcode Latitude', 
                  'Zipcode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now run the above function on each neighborhood and create a new dataframe called *toronto_venues*.

In [31]:
houston_venues = getNearbyVenues(names=zips['Zip_Code'],
                                   latitudes=zips['Latitude'],
                                   longitudes=zips['Longitude']
                                  )

77371
77331
77358
77378
77868
77327
77303
77880
77426
77301
77306
77372
77575
77568
77564
77510
77357
77535
77445
77085
77015
77336
77418
77339
77034
77090
77068
77014
77069
77066
77060
77067
77064
77065
77086
77038
77037
77474
77088
77093
77016
77514
77597
77076
77091
77028
77022
77043
77055
77026
77008
77013
77562
77078
77532
77049
77530
77365
77345
77554
77029
77020
77079
77007
77493
77094
77077
77057
77056
77002
77027
77003
77536
77019
77547
77010
77011
77006
77012
77506
77503
77023
77004
77098
77046
77005
77485
77401
77081
77441
77030
77074
77036
77072
77087
77021
77083
77017
77092
77025
77054
77033
77502
77096
77099
77051
77061
77071
77031
77035
77045
77587
77505
77504
77346
77471
77362
77598
77062
77082
77546
77565
77573
77417
77018
77650
77591
77590
77517
77563
77444
77550
77577
77041
77541
77080
77084
77070
77586
77507
77058
77059
77044
77396
77338
77009
77040
77024
77551
77450
77073
77461
77089
77423
77494
77479
77518
77539
77042
77486
77063
77534
77430
77511
77459
77477
7737

In [32]:
print(houston_venues.shape)
houston_venues.head()

(1993, 7)


Unnamed: 0,Zipcode,Zipcode Latitude,Zipcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,77371,30.49649,-94.99832,El Taquito,30.49744,-94.996809,Mexican Restaurant
1,77371,30.49649,-94.99832,Action Termite & Pest Control LLC,30.492542,-94.999037,Home Service
2,77331,30.65462,-95.12404,Cape Royal Marina,30.653005,-95.121911,Harbor / Marina
3,77358,30.53985,-95.47809,New Waverly Supermarket,30.53886,-95.482885,Grocery Store
4,77358,30.53985,-95.47809,Down Home Pizza,30.537173,-95.481061,Pizza Place


In [33]:
#Let's check how many venues were returned for each zipcode

houston_venues.groupby('Zipcode').count()

Unnamed: 0_level_0,Zipcode Latitude,Zipcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
77002,89,89,89,89,89,89
77003,8,8,8,8,8,8
77004,29,29,29,29,29,29
77005,35,35,35,35,35,35
77006,38,38,38,38,38,38
77007,30,30,30,30,30,30
77008,21,21,21,21,21,21
77009,12,12,12,12,12,12
77010,78,78,78,78,78,78
77011,6,6,6,6,6,6


#### Let's find out how many unique categories can be curated from all the returned venues

In [34]:
print('There are {} uniques categories.'.format(len(houston_venues['Venue Category'].unique())))

There are 266 uniques categories.


## 3. Analyze Each Zipcode

In [35]:
# one hot encoding
houston_onehot = pd.get_dummies(houston_venues[['Venue Category']], prefix="", prefix_sep="")

# add zipcode column back to dataframe
houston_onehot['Zipcode'] = houston_venues['Zipcode'] 

# move zipcode column to the first column
fixed_columns = [houston_onehot.columns[-1]] + list(houston_onehot.columns[:-1])
houston_onehot = houston_onehot[fixed_columns]

houston_onehot.head(3)

Unnamed: 0,Zipcode,ATM,Accessories Store,American Restaurant,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Assisted Living,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Station,Bus Stop,Business Service,Café,Cajun / Creole Restaurant,Camera Store,Caribbean Restaurant,Carpet Store,Casino,Check Cashing Service,Chinese Restaurant,Churrascaria,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Doctor's Office,Donut Shop,Dry Cleaner,Dumpling Restaurant,Electronics Store,Event Space,Fabric Shop,Factory,Farm,Farmers Market,Fast Food Restaurant,Financial or Legal Service,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gun Range,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hospital,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Hunan Restaurant,Hunting Supply,IT Services,Ice Cream Shop,Indian Restaurant,Insurance Office,Internet Cafe,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Laundromat,Lawyer,Library,Light Rail Station,Lighthouse,Lingerie Store,Liquor Store,Locksmith,Lounge,Mac & Cheese Joint,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Mongolian Restaurant,Motel,Motorcycle Shop,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Non-Profit,Noodle House,Opera House,Optical Shop,Other Nightlife,Other Repair Shop,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pawn Shop,Performing Arts Venue,Peruvian Restaurant,Pet Store,Pharmacy,Photography Studio,Pilates Studio,Pizza Place,Playground,Plaza,Pool,Pool Hall,Print Shop,Pub,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Restaurant,Rock Club,Roller Rink,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skating Rink,Ski Area,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Tourist Information Center,Toy / Game Store,Track,Trail,Travel & Transport,Tree,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Vineyard,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,77371,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,77371,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,77331,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [36]:
# Check the new dataframe size.

houston_onehot.shape

(1993, 267)

#### Next, group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [37]:
houston_grouped = houston_onehot.groupby('Zipcode').mean().reset_index()
houston_grouped

Unnamed: 0,Zipcode,ATM,Accessories Store,American Restaurant,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Assisted Living,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Station,Bus Stop,Business Service,Café,Cajun / Creole Restaurant,Camera Store,Caribbean Restaurant,Carpet Store,Casino,Check Cashing Service,Chinese Restaurant,Churrascaria,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Doctor's Office,Donut Shop,Dry Cleaner,Dumpling Restaurant,Electronics Store,Event Space,Fabric Shop,Factory,Farm,Farmers Market,Fast Food Restaurant,Financial or Legal Service,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gun Range,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hospital,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Hunan Restaurant,Hunting Supply,IT Services,Ice Cream Shop,Indian Restaurant,Insurance Office,Internet Cafe,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Laundromat,Lawyer,Library,Light Rail Station,Lighthouse,Lingerie Store,Liquor Store,Locksmith,Lounge,Mac & Cheese Joint,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Mongolian Restaurant,Motel,Motorcycle Shop,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Non-Profit,Noodle House,Opera House,Optical Shop,Other Nightlife,Other Repair Shop,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pawn Shop,Performing Arts Venue,Peruvian Restaurant,Pet Store,Pharmacy,Photography Studio,Pilates Studio,Pizza Place,Playground,Plaza,Pool,Pool Hall,Print Shop,Pub,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Restaurant,Rock Club,Roller Rink,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skating Rink,Ski Area,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Tourist Information Center,Toy / Game Store,Track,Trail,Travel & Transport,Tree,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Vineyard,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,77002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.011236,0.011236,0.044944,0.0,0.0,0.0,0.011236,0.011236,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05618,0.011236,0.0,0.0,0.0,0.011236,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05618,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022472,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.0,0.022472,0.0,0.0,0.0,0.0,0.0,0.011236,0.022472,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022472,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.011236,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.089888,0.022472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022472,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022472,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.022472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.022472,0.0,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.022472,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.05618,0.0,0.0,0.022472,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.033708,0.011236,0.011236,0.0,0.0,0.0,0.033708,0.0,0.0,0.011236,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.022472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0
1,77003,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,77004,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.103448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,77005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.085714,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.057143,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571
4,77006,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.078947,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.078947,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.078947,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0
5,77007,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.266667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,77008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0
7,77009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,77010,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.025641,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.012821,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.012821,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.025641,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.025641,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.012821,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.025641,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.012821,0.025641,0.0,0.0,0.012821,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.0
9,77011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's confirm the new size

In [38]:
houston_grouped.shape

(190, 267)

In [42]:
# zips['Zip_Code'] = zips['Zip_Code'].astype('str')

houston_grouped['Zipcode'] = houston_grouped['Zipcode'].astype('str')

#### Let's print each zipcode along with the top 5 most common venues

In [43]:
num_top_venues = 5

for zipcode in houston_grouped['Zipcode']:
    print("----"+zipcode+"----")
    temp = houston_grouped[houston_grouped['Zipcode'] == zipcode].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----77002----
            venue  freq
0           Hotel  0.09
1     Coffee Shop  0.06
2  Sandwich Place  0.06
3    Burger Joint  0.06
4             Bar  0.04


----77003----
            venue  freq
0  Discount Store  0.12
1     Gas Station  0.12
2     Coffee Shop  0.12
3             Bar  0.12
4       BBQ Joint  0.12


----77004----
                             venue  freq
0  Southern / Soul Food Restaurant  0.10
1                      Music Venue  0.03
2                   Cosmetics Shop  0.03
3                       Food Truck  0.03
4                            Track  0.03


----77005----
                  venue  freq
0    Mexican Restaurant  0.09
1  Gym / Fitness Center  0.06
2                   Spa  0.06
3   Japanese Restaurant  0.03
4     Other Repair Shop  0.03


----77006----
                 venue  freq
0              Gay Bar  0.11
1   Italian Restaurant  0.08
2                  Bar  0.08
3   Mexican Restaurant  0.08
4  Japanese Restaurant  0.05


----77007----
                 v

#### Let's put that into a *pandas* dataframe

In [44]:
# First, let's use the function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [56]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Zipcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
zipcodes_venues_sorted = pd.DataFrame(columns=columns)
zipcodes_venues_sorted['Zipcode'] = houston_grouped['Zipcode']

for ind in np.arange(houston_grouped.shape[0]):
    zipcodes_venues_sorted.iloc[ind, 1:] = return_most_common_venues(houston_grouped.iloc[ind, :], num_top_venues)

zipcodes_venues_sorted.head()

Unnamed: 0,Zipcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,77002,Hotel,Coffee Shop,Burger Joint,Sandwich Place,Bar,Steakhouse,Southern / Soul Food Restaurant,Deli / Bodega,Gastropub,Thai Restaurant
1,77003,Bar,Gas Station,Beer Garden,BBQ Joint,Fast Food Restaurant,Coffee Shop,Discount Store,American Restaurant,Gastropub,Garden Center
2,77004,Southern / Soul Food Restaurant,Mexican Restaurant,Food Truck,Lounge,Sushi Restaurant,Gift Shop,Gastropub,Caribbean Restaurant,Museum,Music Venue
3,77005,Mexican Restaurant,Spa,Gym / Fitness Center,Yoga Studio,Southern / Soul Food Restaurant,Smoothie Shop,Shipping Store,Sandwich Place,Restaurant,Pharmacy
4,77006,Gay Bar,Italian Restaurant,Bar,Mexican Restaurant,Japanese Restaurant,Coffee Shop,Nightclub,Mediterranean Restaurant,Pizza Place,Taco Place


## 4. Cluster Neighborhoods
Run *k*-means to cluster the neighborhood into 5 clusters.

In [57]:
# set number of clusters
kclusters = 5

houston_grouped_clustering = houston_grouped.drop('Zipcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(houston_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [58]:
# add clustering labels
zipcodes_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

houston_merged = zips

# merge houston_grouped with zips to add latitude/longitude for each neighborhood
houston_merged = houston_merged.join(zipcodes_venues_sorted.set_index('Zipcode'), on='Zip_Code')

houston_merged.head() # check the last columns!

# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

#toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
#toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

#toronto_merged.head() # check the last columns!

Unnamed: 0,Zip_Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,77371,30.49649,-94.99832,4.0,Home Service,Mexican Restaurant,Yoga Studio,Frozen Yogurt Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store
1,77331,30.65462,-95.12404,0.0,Harbor / Marina,Yoga Studio,Flower Shop,Gastropub,Gas Station,Garden Center,Garden,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop
2,77358,30.53985,-95.47809,0.0,Donut Shop,Grocery Store,Pizza Place,Yoga Studio,Frozen Yogurt Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint
3,77378,30.425131,-95.479946,0.0,Ice Cream Shop,Home Service,Pizza Place,Pet Store,Restaurant,Seafood Restaurant,Shipping Store,Mexican Restaurant,Liquor Store,Hardware Store
4,77868,30.393104,-96.087741,0.0,Discount Store,Grocery Store,Fast Food Restaurant,Convenience Store,Asian Restaurant,Garden Center,Garden,Furniture / Home Store,Gas Station,Food


In [59]:
# zips['Zip_Code'] = zips['Zip_Code'].astype('str')     zips['Zip_Code'].dtype

houston_merged['Cluster Labels'].dtype

dtype('float64')

In [61]:
#df[list("ABCD")] = df[list("ABCD")].fillna(0.0).astype(int)

houston_merged['Cluster Labels'] = houston_merged['Cluster Labels'].fillna(0.0).astype(int)

In [62]:
houston_merged.head()

Unnamed: 0,Zip_Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,77371,30.49649,-94.99832,4,Home Service,Mexican Restaurant,Yoga Studio,Frozen Yogurt Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store
1,77331,30.65462,-95.12404,0,Harbor / Marina,Yoga Studio,Flower Shop,Gastropub,Gas Station,Garden Center,Garden,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop
2,77358,30.53985,-95.47809,0,Donut Shop,Grocery Store,Pizza Place,Yoga Studio,Frozen Yogurt Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint
3,77378,30.425131,-95.479946,0,Ice Cream Shop,Home Service,Pizza Place,Pet Store,Restaurant,Seafood Restaurant,Shipping Store,Mexican Restaurant,Liquor Store,Hardware Store
4,77868,30.393104,-96.087741,0,Discount Store,Grocery Store,Fast Food Restaurant,Convenience Store,Asian Restaurant,Garden Center,Garden,Furniture / Home Store,Gas Station,Food


Finally, let's visualize the resulting clusters

In [63]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(houston_merged['Latitude'], houston_merged['Longitude'], houston_merged['Zip_Code'], houston_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters
Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

#### Cluster 1

In [67]:
houston_merged.loc[houston_merged['Cluster Labels'] == 0, houston_merged.columns[[0] + list(range(4, houston_merged.shape[1]))]]

Unnamed: 0,Zip_Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,77331,Harbor / Marina,Yoga Studio,Flower Shop,Gastropub,Gas Station,Garden Center,Garden,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop
2,77358,Donut Shop,Grocery Store,Pizza Place,Yoga Studio,Frozen Yogurt Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint
3,77378,Ice Cream Shop,Home Service,Pizza Place,Pet Store,Restaurant,Seafood Restaurant,Shipping Store,Mexican Restaurant,Liquor Store,Hardware Store
4,77868,Discount Store,Grocery Store,Fast Food Restaurant,Convenience Store,Asian Restaurant,Garden Center,Garden,Furniture / Home Store,Gas Station,Food
5,77327,Discount Store,Grocery Store,Chinese Restaurant,Yoga Studio,Frozen Yogurt Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint
6,77303,,,,,,,,,,
7,77880,,,,,,,,,,
8,77426,Bar,Plaza,Antique Shop,Museum,Café,Historic Site,Fast Food Restaurant,Fried Chicken Joint,Food & Drink Shop,Food Court
9,77301,Home Service,Grocery Store,Beer Garden,Lawyer,Yoga Studio,French Restaurant,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint
11,77372,BBQ Joint,Skating Rink,Roller Rink,Yoga Studio,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant


#### Cluster 2

In [66]:
houston_merged.loc[houston_merged['Cluster Labels'] == 1, houston_merged.columns[[0] + list(range(4, houston_merged.shape[1]))]]

Unnamed: 0,Zip_Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
34,77086,Park,Playground,Bus Station,Yoga Studio,Fried Chicken Joint,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fruit & Vegetable Store
56,77530,Park,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Flea Market
74,77547,Park,Business Service,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop
99,77025,Park,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Flea Market
105,77051,Park,Burger Joint,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop
126,77591,Park,Basketball Court,Yoga Studio,Fried Chicken Joint,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Flower Shop
135,77080,Park,Furniture / Home Store,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop
142,77044,Park,Water Park,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop
175,77385,Park,Playground,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop
189,77050,Clothing Store,Park,BBQ Joint,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint


#### Cluster 3

In [69]:
houston_merged.loc[houston_merged['Cluster Labels'] == 2, houston_merged.columns[[0] + list(range(4, houston_merged.shape[1]))]]

Unnamed: 0,Zip_Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
150,77073,Trail,Juice Bar,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop
202,77545,Trail,Yoga Studio,Fried Chicken Joint,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Flea Market


#### Cluster 4

In [71]:
houston_merged.loc[houston_merged['Cluster Labels'] == 3, houston_merged.columns[[0] + list(range(4, houston_merged.shape[1]))]]

Unnamed: 0,Zip_Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,77306,Construction & Landscaping,Yoga Studio,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Flea Market
16,77357,Construction & Landscaping,Yoga Studio,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Flea Market
109,77035,Chinese Restaurant,Construction & Landscaping,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Yoga Studio,Flower Shop
110,77045,Construction & Landscaping,Yoga Studio,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Flea Market


#### Cluster 5

In [72]:
houston_merged.loc[houston_merged['Cluster Labels'] == 4, houston_merged.columns[[0] + list(range(4, houston_merged.shape[1]))]]

Unnamed: 0,Zip_Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,77371,Home Service,Mexican Restaurant,Yoga Studio,Frozen Yogurt Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store
33,77065,Mexican Restaurant,Yoga Studio,Flower Shop,Gastropub,Gas Station,Garden Center,Garden,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop
102,77502,Mexican Restaurant,Yoga Studio,Flower Shop,Gastropub,Gas Station,Garden Center,Garden,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop
178,77571,Mexican Restaurant,Chinese Restaurant,BBQ Joint,Food Truck,Frozen Yogurt Shop,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Yoga Studio
