# Applied Data Science Capstone Project

### A comparison of Neighborhoods in Downtown Chicago, Illinois & Houston, Texas

## Part 1 - Data Import & Cleaning 

In [None]:
# Import Libraries
! conda install lxml --yes
! conda install html5lib  --yes
! conda install BeautifulSoup4  --yes
import html5lib
import lxml
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
print('Libraries imported.')

#### Chicago

In [3]:
# Import data set that includes the zip code, and coordinates
url = pd.read_csv('https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&timezone=America/Chicago&use_labels_for_header=true', sep = ';')
# Delete unwanted columns
url = url.drop(['Timezone', 'Daylight savings time flag', 'geopoint'], axis = 1)
# Create a dataframe for Chicago Zip Codes
chi_zip = url[(url['City'] == 'Chicago') & (url['State'] == 'IL')].reset_index(drop=True)
chi_zip.rename(columns={'Zip':'Zip_Code'}, inplace=True) # rename column name
chi_zip.head()

Unnamed: 0,Zip_Code,City,State,Latitude,Longitude
0,60651,Chicago,IL,41.901485,-87.74055
1,60697,Chicago,IL,41.811929,-87.68732
2,60667,Chicago,IL,41.811929,-87.68732
3,60694,Chicago,IL,41.811929,-87.68732
4,60684,Chicago,IL,41.811929,-87.68732


In [4]:
# Import a data set that has the names of neighborhoods along with the zip codes for Chicago
zipcode = pd.read_html('https://www.chicagotribune.com/chi-community-areas-htmlstory.html')[3] # using pandas
chi_column_names = ['ZipCode', 'AreaName'] # rename columns
zipcode.columns = chi_column_names

In [5]:
# Merge both dataframes to create a new dataframe 
chi_data = pd.merge(chi_zip, zipcode, left_on='Zip_Code', right_on='ZipCode')
# Ensure there are no missing values
print(chi_data.isna().any())
print(chi_data.isnull().any())
# Delete unwanted columns
del chi_data['ZipCode'] # delete the second ZipCode column

Zip_Code     False
City         False
State        False
Latitude     False
Longitude    False
ZipCode      False
AreaName     False
dtype: bool
Zip_Code     False
City         False
State        False
Latitude     False
Longitude    False
ZipCode      False
AreaName     False
dtype: bool


In [6]:
# Check the data 
chi_data.head()

Unnamed: 0,Zip_Code,City,State,Latitude,Longitude,AreaName
0,60651,Chicago,IL,41.901485,-87.74055,"Austin, Humboldt Park"
1,60644,Chicago,IL,41.881331,-87.75671,Austin
2,60646,Chicago,IL,41.995331,-87.7601,"Forest Glen, Jefferson Park, North Park, Norwo..."
3,60616,Chicago,IL,41.8474,-87.63126,"Armour Square, Bridgeport, Douglas,Lower West ..."
4,60647,Chicago,IL,41.921126,-87.70085,"Hermosa, Humboldt Park, Logan Square, West Town"


#### Houston

In [7]:
# Use the same data set from above to extract the zip code and coordinates for Houston
houst_zip = url[(url['City'] == 'Houston') & (url['State'] == 'TX')].reset_index(drop=True)
houst_zip.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude
0,77046,Houston,TX,29.733181,-95.43131
1,77015,Houston,TX,29.778526,-95.18118
2,77289,Houston,TX,29.83399,-95.434241
3,77072,Houston,TX,29.700898,-95.59002
4,77216,Houston,TX,29.83399,-95.434241


In [8]:
# Import a data set that has the names of neighborhoods along with the zip codes for Houston
# Data was uploaded to a csv file from 'https://web.har.com/zipcode'
zipcode_houston = pd.read_csv('Houston_Zip.csv')
zipcode_houston.head()

Unnamed: 0,Zip Code,City (City Alias Name(s))
0,77002,"Houston - Inner Loop (HOUSTON,CLUTCH CITY,)"
1,77003,"Houston - Inner Loop (HOUSTON,)"
2,77004,"Houston - Inner Loop (HOUSTON,)"
3,77005,"Houston - Inner Loop (HOUSTON,SOUTHSIDE PLACE,..."
4,77006,"Houston - Inner Loop (HOUSTON,)"


In [9]:
# Merge both dataframes to create a new dataframe 
houst_data = pd.merge(houst_zip, zipcode_houston, left_on='Zip', right_on='Zip Code')
houst_data.rename(columns={'City (City Alias Name(s))':'AreaName'}, inplace=True) # rename column name
# Ensure there are no missing values
print(houst_data.isna().any())
print(houst_data.isnull().any())
# Delete unwanted columns
del houst_data['Zip Code'] # delete the second Zip Code column

Zip          False
City         False
State        False
Latitude     False
Longitude    False
Zip Code     False
AreaName     False
dtype: bool
Zip          False
City         False
State        False
Latitude     False
Longitude    False
Zip Code     False
AreaName     False
dtype: bool


In [10]:
# Check the data 
houst_data.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,AreaName
0,77046,Houston,TX,29.733181,-95.43131,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)"
1,77015,Houston,TX,29.778526,-95.18118,"Houston - Northeast (CLOVERLEAF,GREENS BAYOU,H..."
2,77072,Houston,TX,29.700898,-95.59002,"Houston - Southwest (HOUSTON,)"
3,77034,Houston,TX,29.63643,-95.21789,"Houston - Southeast (HOUSTON,)"
4,77003,Houston,TX,29.749278,-95.34741,"Houston - Inner Loop (HOUSTON,)"


## Part 2 - Segmentation & Clustering

### Segmentation & Clustering - Downtown Chicago

In [None]:
# import libraries 

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

##### Create a map for Chicago

In [12]:
# Get the coordinates to create the map of Chicago
address = 'Chicago, Illinois'

geolocator = Nominatim(user_agent="chi_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Chicago are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Chicago are 41.8755616, -87.6244212.


In [13]:
# create map of Chicago using latitude and longitude values
map_chicago = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, city, areaName in zip(chi_data['Latitude'], chi_data['Longitude'], chi_data['City'], chi_data['AreaName']):
    label = '{}, {}'.format(city, areaName)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chicago)  
    
map_chicago

In [14]:
# Create a dataframe for neighborhoods in Downtown Chicago
chi_downtown_data = chi_data[chi_data['AreaName'].str.contains('Loop')].reset_index(drop=True)
chi_downtown_data.head()

Unnamed: 0,Zip_Code,City,State,Latitude,Longitude,AreaName
0,60606,Chicago,IL,41.882582,-87.6376,"Loop, Near West Side"
1,60607,Chicago,IL,41.875882,-87.65114,"Loop, Near West Side, Near South Side"
2,60601,Chicago,IL,41.886456,-87.62325,Loop
3,60605,Chicago,IL,41.860019,-87.6187,"Loop, Near South Side"
4,60603,Chicago,IL,41.880446,-87.63014,Loop


In [15]:
# Get the coordinates to create the map of Downtown Chicago
address = 'Loop, Chicago'

geolocator = Nominatim(user_agent="chi_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Chicago are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Chicago are 41.8755616, -87.6244212.


In [16]:
# create map of Downtown Chicago using latitude and longitude values
map_chi_downtown = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(chi_downtown_data['Latitude'], chi_downtown_data['Longitude'], chi_downtown_data['AreaName']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chi_downtown)  
    
map_chi_downtown

##### Define Foursquare credentials and version

In [None]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

In [19]:
# Explore the first area
chi_downtown_data.loc[0, 'AreaName']

'Loop, Near West Side'

##### Get the coordinates for the first area

In [20]:
neighbourhood_latitude = chi_downtown_data.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = chi_downtown_data.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name = chi_downtown_data.loc[0, 'AreaName'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Loop, Near West Side are 41.882582, -87.6376.


##### Get the top 100 venues of Loop, Near West Side within a 500 mile radius

In [21]:
# type your answer here
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighbourhood_latitude, neighbourhood_longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=UYMKCHYL54FW2SFZOGLGUDLR0F441WWKEVH45SM5MAYXTBIQ&client_secret=AOXXD24PEYNEBU1PKMG4EDJ0UISQ0DZ5CFVDN3T3EUAICMJY&ll=41.882582,-87.6376&v=20180605&radius=500&limit=100'

In [None]:
# Send the GET request
results = requests.get(url).json()
results

In [23]:
# Function to extract the category of the venues
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [24]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Civic Opera House,Opera House,41.882626,-87.637067
1,Naf Naf Grill,Middle Eastern Restaurant,41.883276,-87.635563
2,The Doughnut Vault,Donut Shop,41.884019,-87.639744
3,Garrett Popcorn Shops - Citigroup Center,Snack Place,41.882227,-87.640505
4,Small Cheval,Burger Joint,41.884801,-87.638482


In [25]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


##### Explore neighborhoods in Downtown Chicago

In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
# Create a dataframe for the Downtown Chicago Venues
chi_downtown_venues = getNearbyVenues(names=chi_downtown_data['AreaName'],
                                   latitudes=chi_downtown_data['Latitude'],
                                   longitudes=chi_downtown_data['Longitude']
                                  )

Loop, Near West Side
Loop, Near West Side, Near South Side
Loop
Loop, Near South Side
Loop
Loop
Loop
Loop, Near West Side


In [28]:
# Check size of dataframe
print(chi_downtown_venues.shape)
chi_downtown_venues.head()

(685, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Loop, Near West Side",41.882582,-87.6376,Civic Opera House,41.882626,-87.637067,Opera House
1,"Loop, Near West Side",41.882582,-87.6376,Naf Naf Grill,41.883276,-87.635563,Middle Eastern Restaurant
2,"Loop, Near West Side",41.882582,-87.6376,The Doughnut Vault,41.884019,-87.639744,Donut Shop
3,"Loop, Near West Side",41.882582,-87.6376,Garrett Popcorn Shops - Citigroup Center,41.882227,-87.640505,Snack Place
4,"Loop, Near West Side",41.882582,-87.6376,Small Cheval,41.884801,-87.638482,Burger Joint


In [29]:
# Number of venues per neighbourhood
chi_downtown_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Loop,400,400,400,400,400,400
"Loop, Near South Side",21,21,21,21,21,21
"Loop, Near West Side",200,200,200,200,200,200
"Loop, Near West Side, Near South Side",64,64,64,64,64,64


In [30]:
# Number of unique venue categories
print('There are {} uniques categories.'.format(len(chi_downtown_venues['Venue Category'].unique())))

There are 156 uniques categories.


#### Analyze each neighborhood

In [31]:
# one hot encoding
chi_downtown_onehot = pd.get_dummies(chi_downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
chi_downtown_onehot['Neighbourhood'] = chi_downtown_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [chi_downtown_onehot.columns[-1]] + list(chi_downtown_onehot.columns[:-1])
chi_downtown_onehot = chi_downtown_onehot[fixed_columns]

chi_downtown_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Amphitheater,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bakery,Bank,Bar,Beer Garden,Big Box Store,Bike Rental / Bike Share,Bistro,Boat or Ferry,Bookstore,Boutique,Boxing Gym,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Stadium,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,Exhibit,Eye Doctor,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Food Court,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Garden,Gas Station,Gastropub,General Entertainment,General Travel,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Monument / Landmark,Movie Theater,Museum,Music Venue,New American Restaurant,Noodle House,Opera House,Optical Shop,Outdoor Sculpture,Paper / Office Supplies Store,Park,Parking,Performing Arts Venue,Pharmacy,Pizza Place,Plaza,Poke Place,Polish Restaurant,Portuguese Restaurant,Pub,Public Art,Record Shop,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Snack Place,Soccer Field,South American Restaurant,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Stationery Store,Steakhouse,Sushi Restaurant,Taco Place,Tea Room,Theater,Tiki Bar,Tour Provider,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfront,Whisky Bar,Women's Store
0,"Loop, Near West Side",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Loop, Near West Side",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Loop, Near West Side",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Loop, Near West Side",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Loop, Near West Side",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [32]:
chi_downtown_onehot.shape

(685, 157)

##### Group each neighborhood

In [33]:
chi_downtown_grouped = chi_downtown_onehot.groupby('Neighbourhood').mean().reset_index()
chi_downtown_grouped

Unnamed: 0,Neighbourhood,American Restaurant,Amphitheater,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bakery,Bank,Bar,Beer Garden,Big Box Store,Bike Rental / Bike Share,Bistro,Boat or Ferry,Bookstore,Boutique,Boxing Gym,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Stadium,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,Exhibit,Eye Doctor,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Food Court,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Garden,Gas Station,Gastropub,General Entertainment,General Travel,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Monument / Landmark,Movie Theater,Museum,Music Venue,New American Restaurant,Noodle House,Opera House,Optical Shop,Outdoor Sculpture,Paper / Office Supplies Store,Park,Parking,Performing Arts Venue,Pharmacy,Pizza Place,Plaza,Poke Place,Polish Restaurant,Portuguese Restaurant,Pub,Public Art,Record Shop,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Snack Place,Soccer Field,South American Restaurant,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Stationery Store,Steakhouse,Sushi Restaurant,Taco Place,Tea Room,Theater,Tiki Bar,Tour Provider,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfront,Whisky Bar,Women's Store
0,Loop,0.0175,0.0025,0.005,0.0,0.0025,0.01,0.01,0.0075,0.0175,0.0025,0.015,0.0025,0.005,0.0,0.0,0.0025,0.0025,0.005,0.0,0.0025,0.0,0.0025,0.0025,0.0125,0.0025,0.0,0.0,0.005,0.0125,0.0025,0.0025,0.0,0.0,0.0075,0.0,0.0075,0.005,0.06,0.0,0.0125,0.0025,0.015,0.0,0.005,0.0075,0.0025,0.0075,0.0125,0.0025,0.005,0.0025,0.01,0.0025,0.0025,0.005,0.005,0.005,0.01,0.005,0.01,0.0,0.0,0.0025,0.005,0.005,0.0,0.0075,0.0025,0.0025,0.005,0.0,0.005,0.0,0.005,0.01,0.0025,0.0025,0.0025,0.0,0.0025,0.005,0.07,0.01,0.0,0.005,0.0325,0.0,0.0025,0.0025,0.0,0.0,0.005,0.005,0.0125,0.005,0.0075,0.0175,0.0025,0.0025,0.0025,0.02,0.0025,0.01,0.0,0.0,0.0025,0.0025,0.0,0.005,0.0,0.005,0.005,0.015,0.0225,0.005,0.0,0.01,0.01,0.0175,0.005,0.0,0.0025,0.015,0.0025,0.0525,0.0025,0.0175,0.0,0.0,0.0125,0.0025,0.0025,0.01,0.015,0.0,0.005,0.0025,0.0,0.0,0.0,0.0,0.005,0.01,0.0025,0.0075,0.01,0.0375,0.0025,0.0025,0.005,0.0025,0.0175,0.0,0.0025,0.0025,0.0025
1,"Loop, Near South Side",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Loop, Near West Side",0.005,0.0,0.0,0.005,0.0,0.0,0.0,0.025,0.015,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.005,0.005,0.0,0.01,0.005,0.0,0.0,0.015,0.01,0.0,0.0,0.0,0.005,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.075,0.0,0.0,0.015,0.005,0.01,0.005,0.0,0.0,0.0,0.005,0.0,0.01,0.0,0.025,0.0,0.0,0.0,0.0,0.015,0.0,0.02,0.0,0.005,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.01,0.015,0.025,0.015,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.025,0.005,0.0,0.015,0.01,0.005,0.0,0.015,0.045,0.0,0.035,0.005,0.0,0.0,0.0,0.0,0.0,0.05,0.01,0.005,0.0,0.0,0.005,0.01,0.0,0.0,0.0,0.015,0.0,0.01,0.005,0.0,0.005,0.0,0.0,0.0,0.025,0.005,0.005,0.08,0.005,0.005,0.0,0.005,0.0,0.0,0.0,0.0,0.02,0.0,0.005,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.015,0.01,0.005,0.015,0.0,0.0,0.0,0.005,0.025,0.015,0.0,0.0,0.0
3,"Loop, Near West Side, Near South Side",0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.015625,0.015625,0.0,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.046875,0.0,0.0625,0.0,0.0,0.0,0.015625,0.0,0.015625,0.0,0.0,0.046875,0.015625,0.015625,0.015625,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.015625,0.0,0.0,0.0,0.0,0.140625,0.0,0.015625,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.015625,0.046875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.078125,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.015625,0.0,0.03125,0.0,0.015625,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0


In [34]:
# Confirm new size
chi_downtown_grouped.shape

(4, 157)

#### Top 5 Venues in each neighborhood

In [35]:
num_top_venues = 5

for hood in chi_downtown_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = chi_downtown_grouped[chi_downtown_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Loop----
                venue  freq
0               Hotel  0.07
1         Coffee Shop  0.06
2      Sandwich Place  0.05
3             Theater  0.04
4  Italian Restaurant  0.03


----Loop, Near South Side----
                 venue  freq
0     Football Stadium  0.14
1  Sporting Goods Shop  0.10
2        Historic Site  0.10
3                 Park  0.10
4         Cocktail Bar  0.05


----Loop, Near West Side----
                      venue  freq
0            Sandwich Place  0.08
1               Coffee Shop  0.08
2   New American Restaurant  0.05
3  Mediterranean Restaurant  0.04
4        Mexican Restaurant  0.04


----Loop, Near West Side, Near South Side----
              venue  freq
0  Greek Restaurant  0.14
1    Sandwich Place  0.08
2              Café  0.06
3       Bus Station  0.05
4       Coffee Shop  0.05




In [36]:
# function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### New Dataframe with Top 10 venues in each Neighborhood

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = chi_downtown_grouped['Neighbourhood']

for ind in np.arange(chi_downtown_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(chi_downtown_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Loop,Hotel,Coffee Shop,Sandwich Place,Theater,Italian Restaurant,Plaza,Museum,American Restaurant,Bakery,Public Art
1,"Loop, Near South Side",Football Stadium,Historic Site,Park,Sporting Goods Shop,Sports Club,Harbor / Marina,Donut Shop,Museum,Parking,Grocery Store
2,"Loop, Near West Side",Sandwich Place,Coffee Shop,New American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Italian Restaurant,Grocery Store,BBQ Joint
3,"Loop, Near West Side, Near South Side",Greek Restaurant,Sandwich Place,Café,Coffee Shop,Bus Station,Pizza Place,Dance Studio,Bar,Sports Bar,Shipping Store


#### Cluster Neighborhoods

###### Since the number of grouped neighborhoods are 4, the number of clusters will be limited to 2

In [38]:
# set number of clusters
kclusters = 2

chi_downtown_grouped_clustering = chi_downtown_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(chi_downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 0, 0], dtype=int32)

In [39]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

chi_downtown_merged = chi_downtown_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
chi_downtown_merged = chi_downtown_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='AreaName')

chi_downtown_merged.head() # check the last columns!

Unnamed: 0,Zip_Code,City,State,Latitude,Longitude,AreaName,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,60606,Chicago,IL,41.882582,-87.6376,"Loop, Near West Side",0,Sandwich Place,Coffee Shop,New American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Italian Restaurant,Grocery Store,BBQ Joint
1,60607,Chicago,IL,41.875882,-87.65114,"Loop, Near West Side, Near South Side",0,Greek Restaurant,Sandwich Place,Café,Coffee Shop,Bus Station,Pizza Place,Dance Studio,Bar,Sports Bar,Shipping Store
2,60601,Chicago,IL,41.886456,-87.62325,Loop,0,Hotel,Coffee Shop,Sandwich Place,Theater,Italian Restaurant,Plaza,Museum,American Restaurant,Bakery,Public Art
3,60605,Chicago,IL,41.860019,-87.6187,"Loop, Near South Side",1,Football Stadium,Historic Site,Park,Sporting Goods Shop,Sports Club,Harbor / Marina,Donut Shop,Museum,Parking,Grocery Store
4,60603,Chicago,IL,41.880446,-87.63014,Loop,0,Hotel,Coffee Shop,Sandwich Place,Theater,Italian Restaurant,Plaza,Museum,American Restaurant,Bakery,Public Art


#### Visualize the clusters

In [40]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(chi_downtown_merged['Latitude'], chi_downtown_merged['Longitude'], chi_downtown_merged['AreaName'], chi_downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Segmentation & Clustering - Downtown Houston

#### Create a map for Houston

In [41]:
# Get the coordinates to create the map of Chicago
address = 'Houston, Texas'

geolocator = Nominatim(user_agent="houst_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Houston are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Houston are 29.7589382, -95.3676974.


In [42]:
# create map of Houston using latitude and longitude values
map_houston = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, city, areaName in zip(houst_data['Latitude'], houst_data['Longitude'], houst_data['City'], houst_data['AreaName']):
    label = '{}, {}'.format(city, areaName)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_houston)  
    
map_houston

In [43]:
# Create a dataframe for neighborhoods in Downtown Chicago
houst_downtown_data = houst_data[houst_data['AreaName'].str.contains('Loop')].reset_index(drop=True)
houst_downtown_data.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,AreaName
0,77046,Houston,TX,29.733181,-95.43131,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)"
1,77003,Houston,TX,29.749278,-95.34741,"Houston - Inner Loop (HOUSTON,)"
2,77004,Houston,TX,29.728779,-95.3657,"Houston - Inner Loop (HOUSTON,)"
3,77030,Houston,TX,29.704584,-95.40466,"Houston - Inner Loop (HOUSTON,V A HOSPITAL,)"
4,77005,Houston,TX,29.717529,-95.42821,"Houston - Inner Loop (HOUSTON,SOUTHSIDE PLACE,..."


In [44]:
# Get the coordinates to create the map of Downtown Houston
address = 'Downtown, Houston'

geolocator = Nominatim(user_agent="houst_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Houston are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Houston are 29.76428215, -95.3674131325204.


In [45]:
# create map of Downtown Houston using latitude and longitude values
map_houst_downtown = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(houst_downtown_data['Latitude'], houst_downtown_data['Longitude'], houst_downtown_data['AreaName']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_houst_downtown)  
    
map_houst_downtown

In [46]:
# Explore the first area
houst_downtown_data.loc[0, 'AreaName']

'Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)'

##### Get the coordinates of the first area

In [47]:
neighbourhood_latitude2 = houst_downtown_data.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude2 = houst_downtown_data.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name2 = houst_downtown_data.loc[0, 'AreaName'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name2, 
                                                               neighbourhood_latitude2, 
                                                               neighbourhood_longitude2))

Latitude and longitude values of Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,) are 29.733181, -95.43131.


##### Get the Top 100 Venues for Houston - Inner Loop (GREENWAY PLAZA,HOUSTON) within a 500 radius

In [48]:
# type your answer here
radius = 500
LIMIT = 100
url2 = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighbourhood_latitude2, neighbourhood_longitude2, VERSION, radius, LIMIT)
url2

'https://api.foursquare.com/v2/venues/explore?client_id=UYMKCHYL54FW2SFZOGLGUDLR0F441WWKEVH45SM5MAYXTBIQ&client_secret=AOXXD24PEYNEBU1PKMG4EDJ0UISQ0DZ5CFVDN3T3EUAICMJY&ll=29.733181,-95.43131&v=20180605&radius=500&limit=100'

In [None]:
# Send the GET request
results2 = requests.get(url2).json()
results2

In [50]:
houst_venues = results2['response']['groups'][0]['items']
    
nearby_venues2 = json_normalize(houst_venues) # flatten JSON

# filter columns
filtered_columns2 = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues2 =nearby_venues2.loc[:, filtered_columns2]

# filter the category for each row
nearby_venues2['venue.categories'] = nearby_venues2.apply(get_category_type, axis=1)

# clean columns
nearby_venues2.columns = [col.split(".")[-1] for col in nearby_venues2.columns]

nearby_venues2.head()

Unnamed: 0,name,categories,lat,lng
0,Greenway Coffee & Tea,Coffee Shop,29.731085,-95.431795
1,Eunice,Seafood Restaurant,29.73291,-95.428648
2,burger-chan,Burger Joint,29.731167,-95.431571
3,Pi Pizza Truck,Food Truck,29.735232,-95.429087
4,DoubleTree by Hilton Hotel Houston - Greenway ...,Hotel,29.731058,-95.432844


In [51]:
print('{} venues were returned by Foursquare.'.format(nearby_venues2.shape[0]))

15 venues were returned by Foursquare.


##### Explore neighborhoods in Downtown Houston

In [52]:
# Create a dataframe for the Downtown Houston Venues
houst_downtown_venues = getNearbyVenues(names=houst_downtown_data['AreaName'],
                                   latitudes=houst_downtown_data['Latitude'],
                                   longitudes=houst_downtown_data['Longitude']
                                  )

Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,V A HOSPITAL,)
Houston - Inner Loop (HOUSTON,SOUTHSIDE PLACE,WEST UNIVERSITY PLACE,W UNIV PL,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (ASTRODOME,ASTROWORLD,HOUSTON,)
Houston - Inner Loop (HOUSTON,CLUTCH CITY,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HOUSTON,)
Houston - Inner Loop (HEIGHTS,HOUSTON,HOUSTON HEIGHTS,)
Houston - Inner Loop (HOUSTON,)


In [53]:
# Check size of dataframe
print(houst_downtown_venues.shape)
houst_downtown_venues.head()

(504, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",29.733181,-95.43131,Greenway Coffee & Tea,29.731085,-95.431795,Coffee Shop
1,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",29.733181,-95.43131,Eunice,29.73291,-95.428648,Seafood Restaurant
2,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",29.733181,-95.43131,burger-chan,29.731167,-95.431571,Burger Joint
3,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",29.733181,-95.43131,Pi Pizza Truck,29.735232,-95.429087,Food Truck
4,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",29.733181,-95.43131,DoubleTree by Hilton Hotel Houston - Greenway ...,29.731058,-95.432844,Hotel


In [54]:
# Number of venues per neighbourhood
houst_downtown_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Houston - Inner Loop (ASTRODOME,ASTROWORLD,HOUSTON,)",4,4,4,4,4,4
"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",15,15,15,15,15,15
"Houston - Inner Loop (HEIGHTS,HOUSTON,HOUSTON HEIGHTS,)",15,15,15,15,15,15
"Houston - Inner Loop (HOUSTON,)",384,384,384,384,384,384
"Houston - Inner Loop (HOUSTON,CLUTCH CITY,)",69,69,69,69,69,69
"Houston - Inner Loop (HOUSTON,SOUTHSIDE PLACE,WEST UNIVERSITY PLACE,W UNIV PL,)",3,3,3,3,3,3
"Houston - Inner Loop (HOUSTON,V A HOSPITAL,)",14,14,14,14,14,14


In [55]:
# Number of unique venue categories
print('There are {} uniques categories.'.format(len(houst_downtown_venues['Venue Category'].unique())))

There are 149 uniques categories.


#### Analyze each neighborhood

In [56]:
# one hot encoding
houst_downtown_onehot = pd.get_dummies(houst_downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
houst_downtown_onehot['Neighbourhood'] = houst_downtown_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [houst_downtown_onehot.columns[-1]] + list(houst_downtown_onehot.columns[:-1])
houst_downtown_onehot = houst_downtown_onehot[fixed_columns]

houst_downtown_onehot.head()

Unnamed: 0,Neighbourhood,ATM,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Basketball Court,Basketball Stadium,Beer Bar,Beer Garden,Big Box Store,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Bus Station,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Technology Building,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Doctor's Office,Donut Shop,Dumpling Restaurant,Electronics Store,Eye Doctor,Fast Food Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Hobby Shop,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kids Store,Kitchen Supply Store,Library,Lingerie Store,Liquor Store,Lounge,Market,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Optical Shop,Other Repair Shop,Outdoors & Recreation,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Pool,Print Shop,Pub,Record Shop,Recreation Center,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Smoke Shop,Smoothie Shop,Snack Place,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supplement Shop,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tanning Salon,Tea Room,Thai Restaurant,Thrift / Vintage Store,Trail,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [57]:
houst_downtown_onehot.shape

(504, 150)

##### Group each neighborhood

In [58]:
houst_downtown_grouped = houst_downtown_onehot.groupby('Neighbourhood').mean().reset_index()
houst_downtown_grouped

Unnamed: 0,Neighbourhood,ATM,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Basketball Court,Basketball Stadium,Beer Bar,Beer Garden,Big Box Store,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Bus Station,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Technology Building,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Doctor's Office,Donut Shop,Dumpling Restaurant,Electronics Store,Eye Doctor,Fast Food Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Hobby Shop,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kids Store,Kitchen Supply Store,Library,Lingerie Store,Liquor Store,Lounge,Market,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Optical Shop,Other Repair Shop,Outdoors & Recreation,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Pool,Print Shop,Pub,Record Shop,Recreation Center,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Smoke Shop,Smoothie Shop,Snack Place,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supplement Shop,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tanning Salon,Tea Room,Thai Restaurant,Thrift / Vintage Store,Trail,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Houston - Inner Loop (ASTRODOME,ASTROWORLD,HOU...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Houston - Inner Loop (HEIGHTS,HOUSTON,HOUSTON ...",0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667
3,"Houston - Inner Loop (HOUSTON,)",0.0,0.007812,0.002604,0.002604,0.005208,0.002604,0.002604,0.002604,0.015625,0.020833,0.039062,0.002604,0.002604,0.005208,0.002604,0.002604,0.002604,0.002604,0.005208,0.002604,0.002604,0.002604,0.002604,0.002604,0.023438,0.002604,0.010417,0.010417,0.002604,0.005208,0.007812,0.002604,0.03125,0.002604,0.005208,0.002604,0.007812,0.018229,0.002604,0.002604,0.007812,0.002604,0.002604,0.005208,0.002604,0.002604,0.005208,0.005208,0.002604,0.013021,0.002604,0.002604,0.002604,0.005208,0.002604,0.002604,0.023438,0.007812,0.005208,0.010417,0.002604,0.002604,0.010417,0.015625,0.010417,0.002604,0.002604,0.005208,0.002604,0.002604,0.002604,0.039062,0.005208,0.002604,0.002604,0.010417,0.002604,0.002604,0.023438,0.007812,0.005208,0.005208,0.002604,0.002604,0.0,0.005208,0.007812,0.013021,0.002604,0.002604,0.018229,0.005208,0.039062,0.002604,0.005208,0.005208,0.002604,0.018229,0.007812,0.005208,0.002604,0.002604,0.0,0.002604,0.010417,0.002604,0.010417,0.028646,0.002604,0.002604,0.010417,0.002604,0.0,0.007812,0.002604,0.002604,0.005208,0.033854,0.013021,0.007812,0.002604,0.005208,0.002604,0.005208,0.007812,0.0,0.005208,0.005208,0.002604,0.005208,0.007812,0.018229,0.0,0.010417,0.002604,0.015625,0.002604,0.002604,0.002604,0.002604,0.002604,0.002604,0.005208,0.0,0.002604,0.002604,0.005208,0.013021,0.002604
4,"Houston - Inner Loop (HOUSTON,CLUTCH CITY,)",0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.028986,0.014493,0.014493,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.014493,0.014493,0.0,0.014493,0.0,0.0,0.057971,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.0,0.014493,0.0,0.0,0.028986,0.0,0.0,0.014493,0.014493,0.014493,0.0,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.101449,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028986,0.014493,0.0,0.014493,0.0,0.028986,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.014493,0.0,0.072464,0.043478,0.0,0.0,0.028986,0.0,0.0,0.0,0.014493,0.014493,0.014493,0.0,0.0,0.0,0.043478,0.014493,0.0,0.0,0.014493,0.0,0.0,0.028986,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0
5,"Houston - Inner Loop (HOUSTON,SOUTHSIDE PLACE,...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Houston - Inner Loop (HOUSTON,V A HOSPITAL,)",0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
# Confirm new size
houst_downtown_grouped.shape

(7, 150)

#### Top 5 Venues in each neighborhood

In [60]:
num_top_venues = 5

for hood in houst_downtown_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = houst_downtown_grouped[houst_downtown_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Houston - Inner Loop (ASTRODOME,ASTROWORLD,HOUSTON,)----
               venue  freq
0  Recreation Center  0.25
1           Pharmacy  0.25
2               Park  0.25
3                Gym  0.25
4       Optical Shop  0.00


----Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)----
                venue  freq
0  Seafood Restaurant  0.13
1                Bank  0.13
2        Burger Joint  0.07
3  Mexican Restaurant  0.07
4           Nightclub  0.07


----Houston - Inner Loop (HEIGHTS,HOUSTON,HOUSTON HEIGHTS,)----
                  venue  freq
0           Yoga Studio  0.07
1           Beer Garden  0.07
2     Indian Restaurant  0.07
3    Mexican Restaurant  0.07
4  Gym / Fitness Center  0.07


----Houston - Inner Loop (HOUSTON,)----
                venue  freq
0               Hotel  0.04
1                 Bar  0.04
2  Mexican Restaurant  0.04
3         Pizza Place  0.03
4         Coffee Shop  0.03


----Houston - Inner Loop (HOUSTON,CLUTCH CITY,)----
                venue  freq
0             

#### New DataFrame with Top 10 venues in each Neighborhood

In [61]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted2 = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted2['Neighbourhood'] = houst_downtown_grouped['Neighbourhood']

for ind in np.arange(houst_downtown_grouped.shape[0]):
    neighbourhoods_venues_sorted2.iloc[ind, 1:] = return_most_common_venues(houst_downtown_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted2.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Houston - Inner Loop (ASTRODOME,ASTROWORLD,HOU...",Gym,Recreation Center,Pharmacy,Park,Doctor's Office,Food,Fast Food Restaurant,Eye Doctor,Electronics Store,Dumpling Restaurant
1,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",Bank,Seafood Restaurant,Bar,Burger Joint,Food Truck,Café,Sandwich Place,Nightclub,Clothing Store,Mexican Restaurant
2,"Houston - Inner Loop (HEIGHTS,HOUSTON,HOUSTON ...",Yoga Studio,Italian Restaurant,Mexican Restaurant,Breakfast Spot,Fast Food Restaurant,Beer Garden,Sushi Restaurant,Gourmet Shop,Gym / Fitness Center,Donut Shop
3,"Houston - Inner Loop (HOUSTON,)",Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
4,"Houston - Inner Loop (HOUSTON,CLUTCH CITY,)",Hotel,Sandwich Place,Coffee Shop,Steakhouse,Seafood Restaurant,Burger Joint,Bakery,Shopping Mall,Lounge,Fast Food Restaurant


#### Cluster Neighborhoods

###### Since the number of grouped neighborhoods are 7, the number of clusters will be limited to 3

In [62]:
# set number of clusters
kclusters = 3

houst_downtown_grouped_clustering = houst_downtown_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(houst_downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 0, 0, 0, 0, 1, 0], dtype=int32)

In [63]:
# add clustering labels
neighbourhoods_venues_sorted2.insert(0, 'Cluster Labels', kmeans.labels_)

houst_downtown_merged = houst_downtown_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
houst_downtown_merged = houst_downtown_merged.join(neighbourhoods_venues_sorted2.set_index('Neighbourhood'), on='AreaName')

houst_downtown_merged.head() # check the last columns!

Unnamed: 0,Zip,City,State,Latitude,Longitude,AreaName,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,77046,Houston,TX,29.733181,-95.43131,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0,Bank,Seafood Restaurant,Bar,Burger Joint,Food Truck,Café,Sandwich Place,Nightclub,Clothing Store,Mexican Restaurant
1,77003,Houston,TX,29.749278,-95.34741,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
2,77004,Houston,TX,29.728779,-95.3657,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
3,77030,Houston,TX,29.704584,-95.40466,"Houston - Inner Loop (HOUSTON,V A HOSPITAL,)",0,Hotel,Bank,Fast Food Restaurant,Gym / Fitness Center,Gym,Library,Mediterranean Restaurant,Deli / Bodega,Pizza Place,Café
4,77005,Houston,TX,29.717529,-95.42821,"Houston - Inner Loop (HOUSTON,SOUTHSIDE PLACE,...",1,Bakery,Outdoors & Recreation,Shopping Mall,Donut Shop,Food & Drink Shop,Food,Fast Food Restaurant,Eye Doctor,Electronics Store,Dumpling Restaurant


#### Visualize the clusters

In [64]:
# create map
map_clusters2 = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(houst_downtown_merged['Latitude'], houst_downtown_merged['Longitude'], houst_downtown_merged['AreaName'], houst_downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters2)
       
map_clusters2

## Part 3 - Clustering Analysis / Result

#### Examine Individual Clusters

#### Chicago

##### Cluster 1

In [65]:
chi_downtown_merged.loc[chi_downtown_merged['Cluster Labels'] == 0, chi_downtown_merged.columns[[1] + list(range(5, chi_downtown_merged.shape[1]))]]

Unnamed: 0,City,AreaName,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Chicago,"Loop, Near West Side",0,Sandwich Place,Coffee Shop,New American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Italian Restaurant,Grocery Store,BBQ Joint
1,Chicago,"Loop, Near West Side, Near South Side",0,Greek Restaurant,Sandwich Place,Café,Coffee Shop,Bus Station,Pizza Place,Dance Studio,Bar,Sports Bar,Shipping Store
2,Chicago,Loop,0,Hotel,Coffee Shop,Sandwich Place,Theater,Italian Restaurant,Plaza,Museum,American Restaurant,Bakery,Public Art
4,Chicago,Loop,0,Hotel,Coffee Shop,Sandwich Place,Theater,Italian Restaurant,Plaza,Museum,American Restaurant,Bakery,Public Art
5,Chicago,Loop,0,Hotel,Coffee Shop,Sandwich Place,Theater,Italian Restaurant,Plaza,Museum,American Restaurant,Bakery,Public Art
6,Chicago,Loop,0,Hotel,Coffee Shop,Sandwich Place,Theater,Italian Restaurant,Plaza,Museum,American Restaurant,Bakery,Public Art
7,Chicago,"Loop, Near West Side",0,Sandwich Place,Coffee Shop,New American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Italian Restaurant,Grocery Store,BBQ Joint


##### This cluster is mainly characterized by Hotels, Coffee and Sandwich shops as is expected of a business district. This cluster can be named "The Business District".

##### Cluster 2

In [66]:
chi_downtown_merged.loc[chi_downtown_merged['Cluster Labels'] == 1, chi_downtown_merged.columns[[1] + list(range(5, chi_downtown_merged.shape[1]))]]

Unnamed: 0,City,AreaName,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Chicago,"Loop, Near South Side",1,Football Stadium,Historic Site,Park,Sporting Goods Shop,Sports Club,Harbor / Marina,Donut Shop,Museum,Parking,Grocery Store


##### This cluster is mainly characterized by Sports related and Historic venues. We can assume that a lot of leisure activities go on around this area and name it "The Home of Sports".

#### Houston

##### Cluster 1

In [68]:
houst_downtown_merged.loc[houst_downtown_merged['Cluster Labels'] == 0, houst_downtown_merged.columns[[1] + list(range(5, houst_downtown_merged.shape[1]))]]

Unnamed: 0,City,AreaName,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Houston,"Houston - Inner Loop (GREENWAY PLAZA,HOUSTON,)",0,Bank,Seafood Restaurant,Bar,Burger Joint,Food Truck,Café,Sandwich Place,Nightclub,Clothing Store,Mexican Restaurant
1,Houston,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
2,Houston,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
3,Houston,"Houston - Inner Loop (HOUSTON,V A HOSPITAL,)",0,Hotel,Bank,Fast Food Restaurant,Gym / Fitness Center,Gym,Library,Mediterranean Restaurant,Deli / Bodega,Pizza Place,Café
5,Houston,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
7,Houston,"Houston - Inner Loop (HOUSTON,CLUTCH CITY,)",0,Hotel,Sandwich Place,Coffee Shop,Steakhouse,Seafood Restaurant,Burger Joint,Bakery,Shopping Mall,Lounge,Fast Food Restaurant
8,Houston,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
9,Houston,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
10,Houston,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank
11,Houston,"Houston - Inner Loop (HOUSTON,)",0,Bar,Mexican Restaurant,Hotel,Sandwich Place,Coffee Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Burger Joint,Bank


##### This cluster is mainly characterized by Restaurants, Hotels, Coffee and Sandwich shops as is expected of a business district. This cluster can be named "The Business District".

##### Cluster 2

In [69]:
houst_downtown_merged.loc[houst_downtown_merged['Cluster Labels'] == 1, houst_downtown_merged.columns[[1] + list(range(5, houst_downtown_merged.shape[1]))]]

Unnamed: 0,City,AreaName,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Houston,"Houston - Inner Loop (HOUSTON,SOUTHSIDE PLACE,...",1,Bakery,Outdoors & Recreation,Shopping Mall,Donut Shop,Food & Drink Shop,Food,Fast Food Restaurant,Eye Doctor,Electronics Store,Dumpling Restaurant


##### This cluster has a combination of venues that provide leisure and fun activities and will be named "The Fun Spot".

##### CLuster 3

In [70]:
houst_downtown_merged.loc[houst_downtown_merged['Cluster Labels'] == 2, houst_downtown_merged.columns[[1] + list(range(5, houst_downtown_merged.shape[1]))]]

Unnamed: 0,City,AreaName,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Houston,"Houston - Inner Loop (ASTRODOME,ASTROWORLD,HOU...",2,Gym,Recreation Center,Pharmacy,Park,Doctor's Office,Food,Fast Food Restaurant,Eye Doctor,Electronics Store,Dumpling Restaurant


##### This cluster seems to have a lot of health related venues and can be named "The Health Corner".

## Part 4 - Conclusion / Summary

##### Following the segmentation and clustering of the neighborhoods in both Downtown Chicago and Houston, we can observe that there are a lot of similarities between both cities. As is expected, both cities downtown area represent the business district. 