# Final Project - Code

## Neighborhood Data from Wikipedia
https://en.wikipedia.org/wiki/Neighborhoods_in_Baton_Rouge,_Louisiana

### Scraping the Web Data

In [13]:
import requests
from bs4 import BeautifulSoup

In [14]:
url='https://en.wikipedia.org/wiki/Neighborhoods_in_Baton_Rouge,_Louisiana'
page=requests.get(url).text
soup = BeautifulSoup(page, 'html.parser')

In [15]:
n_list = soup.find('ul')

In [16]:
n_list

<ul><li><b>Downtown</b> - Baton Rouge's central business district.</li>
<li><b><a href="/wiki/Spanish_Town,_Baton_Rouge,_Louisiana" title="Spanish Town, Baton Rouge, Louisiana">Spanish Town</a></b> - Located between the <a href="/wiki/Mississippi_River" title="Mississippi River">Mississippi River</a> and <a class="mw-redirect" href="/wiki/I-110_(Louisiana)" title="I-110 (Louisiana)">I-110</a>, it is one of the city's more diverse neighborhoods and home to the State Capitol and the city's largest <a href="/wiki/Mardi_Gras" title="Mardi Gras">Mardi Gras</a> Parade.</li>
<li><b><a href="/wiki/Beauregard_Town" title="Beauregard Town">Beauregard Town</a></b> - A historic district between the downtown area and <a href="/wiki/Old_South_Baton_Rouge" title="Old South Baton Rouge">Old South Baton Rouge</a>. Many of the homes have been renovated and are used as law offices.</li>
<li><b><a class="mw-redirect" href="/wiki/Garden_District,_Baton_Rouge" title="Garden District, Baton Rouge">Garden Dis

In [17]:
neighborhoods=[]
for row in n_list.findAll('b'):
    cell = {}
    cell['Neighborhood'] = row.text
    neighborhoods.append(cell)

print(neighborhoods)

[{'Neighborhood': 'Downtown'}, {'Neighborhood': 'Spanish Town'}, {'Neighborhood': 'Beauregard Town'}, {'Neighborhood': 'Garden District'}, {'Neighborhood': 'Old South Baton Rouge'}, {'Neighborhood': 'LSU/Lakeshore'}, {'Neighborhood': 'Mid-City'}, {'Neighborhood': 'McDonald Land'}, {'Neighborhood': 'Brookstown'}, {'Neighborhood': 'Melrose Place'}, {'Neighborhood': 'Melrose East'}, {'Neighborhood': 'Inniswold'}, {'Neighborhood': 'Goodwood'}, {'Neighborhood': 'Hundred Oaks Addition'}, {'Neighborhood': 'Southdowns'}, {'Neighborhood': 'Gardere'}, {'Neighborhood': 'Westminster'}, {'Neighborhood': 'Oak Hills Place'}, {'Neighborhood': 'Broadmoor'}, {'Neighborhood': 'Scotlandville'}, {'Neighborhood': 'Shenandoah'}, {'Neighborhood': 'Sherwood Forest'}, {'Neighborhood': 'Brownfields'}, {'Neighborhood': 'Zion City'}, {'Neighborhood': 'Monticello'}, {'Neighborhood': 'Park Forest'}, {'Neighborhood': 'Glen Oaks'}, {'Neighborhood': 'University Club'}, {'Neighborhood': 'Centurion Place'}, {'Neighborhoo

### Creating a dataframe from scraped data

In [18]:
import pandas as pd
import numpy as np

In [19]:
df=pd.DataFrame(neighborhoods)
df.shape

(30, 1)

In [20]:
df.head()

Unnamed: 0,Neighborhood
0,Downtown
1,Spanish Town
2,Beauregard Town
3,Garden District
4,Old South Baton Rouge


In [21]:
#check that all text is legible and no issues (i.e. lack of spacing, etc.)
n_list = df['Neighborhood'].unique()
n_list

array(['Downtown', 'Spanish Town', 'Beauregard Town', 'Garden District',
       'Old South Baton Rouge', 'LSU/Lakeshore', 'Mid-City',
       'McDonald Land', 'Brookstown', 'Melrose Place', 'Melrose East',
       'Inniswold', 'Goodwood', 'Hundred Oaks Addition', 'Southdowns',
       'Gardere', 'Westminster', 'Oak Hills Place', 'Broadmoor',
       'Scotlandville', 'Shenandoah', 'Sherwood Forest', 'Brownfields',
       'Zion City', 'Monticello', 'Park Forest', 'Glen Oaks',
       'University Club', 'Centurion Place', 'Northdale'], dtype=object)

### Manually Gathering Latitude & Longitude (Geocoder did not work)

In [22]:
# !pip install geocoder #not used - was not gathering latitude/longitude

In [23]:
new_df = pd.read_csv('baton-rouge-neighborhoods.csv')
new_df=new_df.drop(columns='Unnamed: 3')
new_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Downtown,30.448365,-91.18606
1,Spanish Town,30.4548,-91.1835
2,Beauregard Town,30.4452,-91.1876
3,Garden District,29.9292,-90.0829
4,Old South Baton Rouge,30.350581,-91.087355


In [24]:
df = pd.merge(df, new_df, on='Neighborhood')
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Downtown,30.448365,-91.18606
1,Spanish Town,30.4548,-91.1835
2,Beauregard Town,30.4452,-91.1876
3,Garden District,29.9292,-90.0829
4,Old South Baton Rouge,30.350581,-91.087355
5,LSU/Lakeshore,30.4116,-91.1674
6,Mid-City,30.4491,-91.1531
7,McDonald Land,30.2336,-91.0923
8,Brookstown,30.4935,-91.1284
9,Melrose Place,30.4552,-91.1348


In [25]:
df.isna().sum()

Neighborhood    0
Latitude        1
Longitude       1
dtype: int64

In [26]:
### Drop NA value
df.dropna(inplace=True)
df.shape

(29, 3)

In [27]:
df.isna().sum()

Neighborhood    0
Latitude        0
Longitude       0
dtype: int64

In [28]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="br_explorer")
location = geolocator.geocode("Baton Rouge, Louisiana")
print(location.address)
print((location.latitude, location.longitude))

Baton Rouge, East Baton Rouge Parish, Louisiana, United States
(30.4459596, -91.18738)


In [29]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

In [30]:
!pip install folium
import folium # map rendering library



In [31]:
map_br = folium.Map(location=[30.4459596, -91.18738], zoom_start=10)

In [32]:
# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_br)  

map_br

### Get Venue Details from Foursquare

In [33]:
CLIENT_ID = 'NX0423ISYO5SDHOY24VMVHY0U01XWOMK34EKRADPMFDMVUX4' # your Foursquare ID
CLIENT_SECRET = 'V33FG3BRCIK3SLP2KFISD0ACOQIAHVVTZEGAPBTLEA2EOROM' # your Foursquare Secret
VERSION = '20210705' # Foursquare API version
LIMIT = 100 #Default value

In [34]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [35]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [36]:
br_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )
print(br_venues.shape)

(200, 7)


In [37]:
br_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Downtown,30.448365,-91.18606,"Watermark Baton Rouge, Autograph Collection",30.448089,-91.187936,Hotel
1,Downtown,30.448365,-91.18606,Jolie Pearl Oyster Bar,30.447168,-91.187836,Seafood Restaurant
2,Downtown,30.448365,-91.18606,Stroubes Seafood and Steaks,30.447414,-91.188315,Steakhouse
3,Downtown,30.448365,-91.18606,Tsunami Sushi,30.447641,-91.189034,Sushi Restaurant
4,Downtown,30.448365,-91.18606,Shaw Center,30.447657,-91.189002,Performing Arts Venue


In [38]:
br_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Beauregard Town,32,32,32,32,32,32
Broadmoor,2,2,2,2,2,2
Brookstown,2,2,2,2,2,2
Brownfields,2,2,2,2,2,2
Centurion Place,2,2,2,2,2,2
Downtown,42,42,42,42,42,42
Garden District,41,41,41,41,41,41
Gardere,4,4,4,4,4,4
Glen Oaks,4,4,4,4,4,4
Goodwood,4,4,4,4,4,4


In [39]:
print('There are {} unique categories.'.format(len(br_venues['Venue Category'].unique())))

There are 93 unique categories.


### One-hot-encoding the Venue category

In [40]:
# one hot encoding
br_onehot = pd.get_dummies(br_venues[['Venue Category']], prefix="", prefix_sep="")

# add postal code column back to dataframe
br_onehot['Neighborhood'] = br_venues['Neighborhood'] 

# move postal code column to the first column
fixed_columns = [br_onehot.columns[-1]] + list(br_onehot.columns[:-1])
br_onehot = br_onehot[fixed_columns]

br_onehot.shape

(200, 93)

In [41]:
br_grouped = br_onehot.groupby('Neighborhood').mean().reset_index()
br_grouped.head()

Unnamed: 0,Neighborhood,Wine Bar,Accessories Store,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Art Museum,BBQ Joint,Bakery,Bank,Bar,Bed & Breakfast,Beer Store,Bookstore,Bowling Alley,Breakfast Spot,Burrito Place,Cafeteria,Café,Cajun / Creole Restaurant,Casino,Chinese Restaurant,Clothing Store,Coffee Shop,College Gym,Construction & Landscaping,Convenience Store,Dance Studio,Discount Store,Distillery,Donut Shop,Farmers Market,Fishing Store,Flower Shop,Food,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,...,Jewelry Store,Juice Bar,Kids Store,Lake,Light Rail Station,Lounge,Market,Mexican Restaurant,Middle Eastern Restaurant,Museum,Music Venue,New American Restaurant,Nightclub,Other Repair Shop,Park,Performing Arts Venue,Pharmacy,Pizza Place,Plaza,Pool,Pub,Public Art,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Smoke Shop,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Sports Bar,Steakhouse,Street Art,Sushi Restaurant,Theater,Trail
0,Beauregard Town,0.0,0.0,0.0,0.125,0.0,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.09375,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.03125,0.0,0.0,0.0625,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0625,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0
1,Broadmoor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Brookstown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.5,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Brownfields,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Centurion Place,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
br_grouped.shape

(25, 93)

### Exploring the Most Common Venue Types by Neighborhood

In [43]:
num_top_venues = 5

for n in br_grouped['Neighborhood']:
    print("----"+n+"----")
    temp = br_grouped[br_grouped['Neighborhood'] == n].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Beauregard Town----
                 venue  freq
0  American Restaurant  0.12
1          Coffee Shop  0.09
2       History Museum  0.06
3   Seafood Restaurant  0.06
4          Music Venue  0.06


----Broadmoor----
                   venue  freq
0           Home Service   0.5
1           Intersection   0.5
2                 Market   0.0
3  Performing Arts Venue   0.0
4                   Park   0.0


----Brookstown----
                    venue  freq
0           Fishing Store   0.5
1  Furniture / Home Store   0.5
2                Wine Bar   0.0
3                  Market   0.0
4                    Park   0.0


----Brownfields----
                        venue  freq
0  Construction & Landscaping   0.5
1           Other Repair Shop   0.5
2                    Wine Bar   0.0
3          Mexican Restaurant   0.0
4       Performing Arts Venue   0.0


----Centurion Place----
                   venue  freq
0                   Bank   0.5
1                    Bar   0.5
2               Wine Bar  

In [44]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [45]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = br_grouped['Neighborhood']

for ind in np.arange(br_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(br_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Beauregard Town,American Restaurant,Coffee Shop,History Museum,Museum,Music Venue,Performing Arts Venue,Seafood Restaurant,Hotel,Cajun / Creole Restaurant,Nightclub
1,Broadmoor,Home Service,Intersection,Trail,Flower Shop,Construction & Landscaping,Convenience Store,Dance Studio,Discount Store,Distillery,Donut Shop
2,Brookstown,Furniture / Home Store,Fishing Store,Trail,Flower Shop,College Gym,Construction & Landscaping,Convenience Store,Dance Studio,Discount Store,Distillery
3,Brownfields,Other Repair Shop,Construction & Landscaping,Trail,Fishing Store,College Gym,Convenience Store,Dance Studio,Discount Store,Distillery,Donut Shop
4,Centurion Place,Bank,Bar,Trail,Food,Construction & Landscaping,Convenience Store,Dance Studio,Discount Store,Distillery,Donut Shop


In [46]:
neighborhoods_venues_sorted['1st Most Common Venue'].value_counts()

Home Service              5
Theater                   2
Sandwich Place            2
Other Repair Shop         2
Pharmacy                  1
Furniture / Home Store    1
Rental Car Location       1
Bank                      1
Clothing Store            1
Golf Course               1
Lake                      1
Chinese Restaurant        1
Park                      1
Hotel                     1
Gym                       1
Trail                     1
Historic Site             1
American Restaurant       1
Name: 1st Most Common Venue, dtype: int64

In [47]:
neighborhoods_venues_sorted['2nd Most Common Venue'].value_counts()

American Restaurant           3
Trail                         3
Intersection                  3
Construction & Landscaping    2
BBQ Joint                     1
Airport Terminal              1
Hawaiian Restaurant           1
Dance Studio                  1
Discount Store                1
Garden                        1
Flower Shop                   1
Fishing Store                 1
Hotel                         1
Coffee Shop                   1
Casino                        1
Nightclub                     1
Breakfast Spot                1
Bar                           1
Name: 2nd Most Common Venue, dtype: int64

In [48]:
neighborhoods_venues_sorted['3rd Most Common Venue'].value_counts()

Trail                         7
Flower Shop                   3
College Gym                   2
Shipping Store                1
Fishing Store                 1
Fried Chicken Joint           1
Furniture / Home Store        1
Cafeteria                     1
Pool                          1
Construction & Landscaping    1
Coffee Shop                   1
History Museum                1
Park                          1
Pharmacy                      1
Ice Cream Shop                1
Bowling Alley                 1
Name: 3rd Most Common Venue, dtype: int64

In [49]:
neighborhoods_venues_sorted['4th Most Common Venue'].value_counts()

Flower Shop                   5
Food                          3
College Gym                   3
Trail                         2
Fishing Store                 2
Gas Station                   1
Pizza Place                   1
Clothing Store                1
Seafood Restaurant            1
Construction & Landscaping    1
Coffee Shop                   1
Park                          1
Museum                        1
Sandwich Place                1
Juice Bar                     1
Name: 4th Most Common Venue, dtype: int64

In [50]:
neighborhoods_venues_sorted['5th Most Common Venue'].value_counts()

Construction & Landscaping    7
College Gym                   4
Fishing Store                 2
Music Venue                   2
Golf Course                   2
Farmers Market                2
Flower Shop                   1
Park                          1
Smoke Shop                    1
Convenience Store             1
Public Art                    1
American Restaurant           1
Name: 5th Most Common Venue, dtype: int64

In [51]:
neighborhoods_venues_sorted['6th Most Common Venue'].value_counts()

Convenience Store             10
College Gym                    3
Construction & Landscaping     3
Bar                            3
Performing Arts Venue          1
Gas Station                    1
Gift Shop                      1
Food                           1
Dance Studio                   1
Farmers Market                 1
Name: 6th Most Common Venue, dtype: int64

In [52]:
neighborhoods_venues_sorted['7th Most Common Venue'].value_counts()

Dance Studio                       9
Convenience Store                  4
Construction & Landscaping         3
College Gym                        2
Performing Arts Venue              1
Donut Shop                         1
Discount Store                     1
Southern / Soul Food Restaurant    1
Seafood Restaurant                 1
Farmers Market                     1
Lounge                             1
Name: 7th Most Common Venue, dtype: int64

In [53]:
neighborhoods_venues_sorted['8th Most Common Venue'].value_counts()

Discount Store                9
Dance Studio                  4
Convenience Store             3
Hotel                         1
Salon / Barbershop            1
Museum                        1
Golf Course                   1
Distillery                    1
Construction & Landscaping    1
Middle Eastern Restaurant     1
Gym                           1
Market                        1
Name: 8th Most Common Venue, dtype: int64

In [54]:
neighborhoods_venues_sorted['9th Most Common Venue'].value_counts()

Distillery                    9
Discount Store                4
Dance Studio                  3
Convenience Store             2
Burrito Place                 1
Farmers Market                1
Cajun / Creole Restaurant     1
Museum                        1
Construction & Landscaping    1
Middle Eastern Restaurant     1
Donut Shop                    1
Name: 9th Most Common Venue, dtype: int64

In [55]:
neighborhoods_venues_sorted['10th Most Common Venue'].value_counts()

Donut Shop                 9
Distillery                 5
Discount Store             2
Farmers Market             1
New American Restaurant    1
Convenience Store          1
Mexican Restaurant         1
Dance Studio               1
Clothing Store             1
College Gym                1
Grocery Store              1
Nightclub                  1
Name: 10th Most Common Venue, dtype: int64

### Clustering Neighborhoods

In [56]:
# set number of clusters
kclusters = 8

br_grouped_clustering = br_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(init="k-means++", n_clusters=kclusters, n_init=12, random_state=0) 
kmeans.fit(br_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 2, 3, 1, 0, 3, 3, 3, 3, 4], dtype=int32)

In [57]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted.head(5)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,3,Beauregard Town,American Restaurant,Coffee Shop,History Museum,Museum,Music Venue,Performing Arts Venue,Seafood Restaurant,Hotel,Cajun / Creole Restaurant,Nightclub
1,2,Broadmoor,Home Service,Intersection,Trail,Flower Shop,Construction & Landscaping,Convenience Store,Dance Studio,Discount Store,Distillery,Donut Shop
2,3,Brookstown,Furniture / Home Store,Fishing Store,Trail,Flower Shop,College Gym,Construction & Landscaping,Convenience Store,Dance Studio,Discount Store,Distillery
3,1,Brownfields,Other Repair Shop,Construction & Landscaping,Trail,Fishing Store,College Gym,Convenience Store,Dance Studio,Discount Store,Distillery,Donut Shop
4,0,Centurion Place,Bank,Bar,Trail,Food,Construction & Landscaping,Convenience Store,Dance Studio,Discount Store,Distillery,Donut Shop


In [58]:
neighborhoods_venues_sorted['Cluster Labels'].value_counts()

3    12
2     5
4     2
1     2
7     1
6     1
5     1
0     1
Name: Cluster Labels, dtype: int64

In [59]:
neighborhoods_venues_sorted.shape

(25, 12)

In [60]:
df.shape

(29, 3)

In [61]:
br_merged = df
br_merged = br_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')


In [62]:
br_merged.dropna(inplace=True)

In [63]:
br_merged['Cluster Labels']=br_merged['Cluster Labels'].astype(int)
br_merged['Cluster Labels'].unique()

array([3, 1, 4, 2, 5, 6, 7, 0])

In [64]:
br_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown,30.448365,-91.18606,3,Hotel,American Restaurant,Coffee Shop,Seafood Restaurant,Music Venue,Bar,Performing Arts Venue,Museum,Middle Eastern Restaurant,Mexican Restaurant
1,Spanish Town,30.4548,-91.1835,3,Sandwich Place,American Restaurant,Park,Food,Farmers Market,Convenience Store,Lounge,Middle Eastern Restaurant,Museum,New American Restaurant
2,Beauregard Town,30.4452,-91.1876,3,American Restaurant,Coffee Shop,History Museum,Museum,Music Venue,Performing Arts Venue,Seafood Restaurant,Hotel,Cajun / Creole Restaurant,Nightclub
3,Garden District,29.9292,-90.0829,3,Historic Site,Breakfast Spot,Furniture / Home Store,Coffee Shop,Public Art,Bar,Southern / Soul Food Restaurant,Market,Burrito Place,Clothing Store
5,LSU/Lakeshore,30.4116,-91.1674,3,Lake,Hotel,College Gym,Juice Bar,Park,Gas Station,Farmers Market,Golf Course,Construction & Landscaping,Convenience Store


In [65]:
# create map
map_clusters = folium.Map(location=[location.latitude, location.longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(br_merged['Latitude'], br_merged['Longitude'], br_merged['Neighborhood'], br_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Exploring the Clusters

In [66]:
br_merged['Cluster Labels'].value_counts()

3    12
2     5
4     2
1     2
7     1
6     1
5     1
0     1
Name: Cluster Labels, dtype: int64

In [98]:
c1=br_merged[br_merged['Cluster Labels'] == 3]
c1['Neighborhood'].unique()

array(['Downtown', 'Spanish Town', 'Beauregard Town', 'Garden District',
       'LSU/Lakeshore', 'Mid-City', 'Brookstown', 'Southdowns', 'Gardere',
       'Scotlandville', 'Monticello', 'Glen Oaks'], dtype=object)

In [99]:
c1['1st Most Common Venue'].value_counts() 

Sandwich Place            2
Hotel                     1
Lake                      1
Pharmacy                  1
Furniture / Home Store    1
Historic Site             1
Clothing Store            1
Rental Car Location       1
Gym                       1
Chinese Restaurant        1
American Restaurant       1
Name: 1st Most Common Venue, dtype: int64

In [100]:
c1['2nd Most Common Venue'].value_counts() 

American Restaurant    2
Hotel                  1
Breakfast Spot         1
Fishing Store          1
Casino                 1
Airport Terminal       1
Dance Studio           1
BBQ Joint              1
Discount Store         1
Hawaiian Restaurant    1
Coffee Shop            1
Name: 2nd Most Common Venue, dtype: int64

In [70]:
c1['3rd Most Common Venue'].value_counts() 

Park                          1
Pharmacy                      1
Construction & Landscaping    1
College Gym                   1
Fried Chicken Joint           1
Furniture / Home Store        1
Bowling Alley                 1
Pool                          1
Shipping Store                1
Trail                         1
Coffee Shop                   1
History Museum                1
Name: 3rd Most Common Venue, dtype: int64

In [71]:
c1['4th Most Common Venue'].value_counts() 

Food                  2
Gas Station           1
Flower Shop           1
Pizza Place           1
Museum                1
Sandwich Place        1
Seafood Restaurant    1
Juice Bar             1
Trail                 1
Coffee Shop           1
Fishing Store         1
Name: 4th Most Common Venue, dtype: int64

In [72]:
c1['5th Most Common Venue'].value_counts() 

Fishing Store     2
Music Venue       2
College Gym       2
Farmers Market    2
Park              1
Smoke Shop        1
Golf Course       1
Public Art        1
Name: 5th Most Common Venue, dtype: int64

In [74]:
c2=br_merged[br_merged['Cluster Labels'] == 2]
c2['Neighborhood'].unique()

array(['Inniswold', 'Westminster', 'Oak Hills Place', 'Broadmoor',
       'Sherwood Forest'], dtype=object)

In [75]:
c2['1st Most Common Venue'].value_counts() 

Home Service    5
Name: 1st Most Common Venue, dtype: int64

In [77]:
c2['2nd Most Common Venue'].value_counts() 

Intersection                  3
Construction & Landscaping    1
Trail                         1
Name: 2nd Most Common Venue, dtype: int64

In [78]:
c2['3rd Most Common Venue'].value_counts()

Trail          4
Flower Shop    1
Name: 3rd Most Common Venue, dtype: int64

In [79]:
c2['4th Most Common Venue'].value_counts() 

Flower Shop    4
College Gym    1
Name: 4th Most Common Venue, dtype: int64

In [80]:
c2['5th Most Common Venue'].value_counts() 

Construction & Landscaping    4
College Gym                   1
Name: 5th Most Common Venue, dtype: int64

In [81]:
c3=br_merged[br_merged['Cluster Labels'] == 4]
c3['Neighborhood'].unique()

array(['Melrose East', 'Goodwood'], dtype=object)

In [82]:
c3['1st Most Common Venue'].value_counts() 

Theater    2
Name: 1st Most Common Venue, dtype: int64

In [83]:
c3['2nd Most Common Venue'].value_counts() 

Garden       1
Nightclub    1
Name: 2nd Most Common Venue, dtype: int64

In [84]:
c3['3rd Most Common Venue'].value_counts()

Flower Shop    1
Cafeteria      1
Name: 3rd Most Common Venue, dtype: int64

In [85]:
c3['4th Most Common Venue'].value_counts() 

Clothing Store    1
Park              1
Name: 4th Most Common Venue, dtype: int64

In [86]:
c3['5th Most Common Venue'].value_counts() 

Golf Course            1
American Restaurant    1
Name: 5th Most Common Venue, dtype: int64

In [87]:
c4=br_merged[br_merged['Cluster Labels'] == 1]
c3['Neighborhood'].unique()

array(['Melrose East', 'Goodwood'], dtype=object)

In [88]:
c4['1st Most Common Venue'].value_counts() 

Other Repair Shop    2
Name: 1st Most Common Venue, dtype: int64

In [89]:
c4['2nd Most Common Venue'].value_counts() 

Construction & Landscaping    1
American Restaurant           1
Name: 2nd Most Common Venue, dtype: int64

In [90]:
c4['3rd Most Common Venue'].value_counts()

Ice Cream Shop    1
Trail             1
Name: 3rd Most Common Venue, dtype: int64

In [91]:
c4['4th Most Common Venue'].value_counts() 

Trail            1
Fishing Store    1
Name: 4th Most Common Venue, dtype: int64

In [92]:
c4['5th Most Common Venue'].value_counts() 

Flower Shop    1
College Gym    1
Name: 5th Most Common Venue, dtype: int64