## Coursera Capstone Project

### Scraping the New Orleans Wikipedia webpage and creating the dataset

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
print("Hello Capstone Project Course")

Hello Capstone Project Course


In [4]:
df=pd.read_html("https://en.wikipedia.org/wiki/Neighborhoods_in_New_Orleans")[0]

In [5]:
df.head()

Unnamed: 0,Neighborhood,Longitude,Latitude
0,U.S. NAVAL BASE,-90.026093,29.946085
1,ALGIERS POINT,-90.051606,29.952462
2,WHITNEY,-90.042357,29.9472
3,AUDUBON,-90.12145,29.932994
4,OLD AURORA,-90.0,29.92444


In [6]:
df.shape

(72, 3)

In [7]:
df_sorted = df.sort_values(by=['Neighborhood'])

In [9]:
df_sorted.head()

Unnamed: 0,Neighborhood,Longitude,Latitude
1,ALGIERS POINT,-90.051606,29.952462
3,AUDUBON,-90.12145,29.932994
5,B. W. COOPER,-90.091753,29.951774
6,BAYOU ST. JOHN,-90.086517,29.976071
7,BEHRMAN,-90.026436,29.934817


In [10]:
df_sorted.reset_index(drop=True, inplace=True)
df_sorted.shape

(72, 3)

### Segmenting and clustering New Orleans neighborhoods

In [11]:
import json # library to handle JSON files

!pip install geopy
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
!pip install folium==0.5
import folium # map rendering library

print('Libraries imported.')

Collecting folium==0.5
  Downloading folium-0.5.0.tar.gz (79 kB)
[K     |████████████████████████████████| 79 kB 7.9 MB/s  eta 0:00:01
[?25hCollecting branca
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Building wheels for collected packages: folium
  Building wheel for folium (setup.py) ... [?25ldone
[?25h  Created wheel for folium: filename=folium-0.5.0-py3-none-any.whl size=76240 sha256=4f7eaffcc5f999a119acb980efbf38f3c98d68e71ea6cbcd1057de5efbfd1bc7
  Stored in directory: /tmp/wsuser/.cache/pip/wheels/b2/2f/2c/109e446b990d663ea5ce9b078b5e7c1a9c45cca91f377080f8
Successfully built folium
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.5.0
Libraries imported.


In [12]:
address = 'New Orleans, LA'

geolocator = Nominatim(user_agent="nola_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New Orleans, Louisiana are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New Orleans, Louisiana are 29.9499323, -90.0701156.


In [13]:
# create map of New Orleans using latitude and longitude values
map_newOrleans = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(df_sorted['Latitude'], df_sorted['Longitude'], df_sorted['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newOrleans)  
    
map_newOrleans

In [17]:
print('The dataframe has {} neighborhoods.'.format(len(df_sorted['Neighborhood']),
        df_sorted.shape[0]
    )
)

The dataframe has 72 neighborhoods.


### Requesting information from Foursquare API

In [18]:
CLIENT_ID = 'PLVCWWLH4OLCZWPQ0XSVAOIHIBUF3ARXXT530SUHAJFKO5OJ' # your Foursquare ID
CLIENT_SECRET = '4CFUBKQ5LBVMZPZLHX1AVKAP2ANWGKO3POYMPZNARTAEUENC' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: PLVCWWLH4OLCZWPQ0XSVAOIHIBUF3ARXXT530SUHAJFKO5OJ
CLIENT_SECRET:4CFUBKQ5LBVMZPZLHX1AVKAP2ANWGKO3POYMPZNARTAEUENC


In [21]:
df_sorted.loc[0, 'Neighborhood']

'ALGIERS POINT'

In [23]:
neighborhood_latitude = df_sorted.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_sorted.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_sorted.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of ALGIERS POINT are 29.95246187, -90.0516057.


In [24]:
#GET request from Foursquare API for URL
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=PLVCWWLH4OLCZWPQ0XSVAOIHIBUF3ARXXT530SUHAJFKO5OJ&client_secret=4CFUBKQ5LBVMZPZLHX1AVKAP2ANWGKO3POYMPZNARTAEUENC&ll=29.95246187,-90.0516057&v=20180605&radius=500&limit=100'

In [25]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '6007606ed4ffb81a7ce4a815'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'},
    {'name': '$-$$$$', 'key': 'price'}]},
  'headerLocation': 'Algiers Point',
  'headerFullLocation': 'Algiers Point, New Orleans',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 25,
  'suggestedBounds': {'ne': {'lat': 29.956961874500006,
    'lng': -90.04642172567442},
   'sw': {'lat': 29.947961865499995, 'lng': -90.05678967432557}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ba101aaf964a5204a8e37e3',
       'name': 'Tout de Suite Café',
       'location': {'address': '347 Verret St',
        'crossStreet': 'Alix',
        'lat': 29.95212113610269,
        'lng': -90.

In [26]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [27]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Tout de Suite Café,Café,29.952121,-90.05109
1,Congregation Coffee Roasters,Coffee Shop,29.951918,-90.053395
2,The Crown & Anchor,Bar,29.951416,-90.05422
3,Levee @ Algiers Point,Scenic Lookout,29.95176,-90.048747
4,Old Point Bar,Bar,29.95494,-90.050226


In [28]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

25 venues were returned by Foursquare.


### Exploring the neighborhoods in New Orleans

In [29]:
#Exploring neighbourhoods in New Orleans
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
newOrleans_venues = getNearbyVenues(names=df_sorted['Neighborhood'],
                                  latitudes=df_sorted['Latitude'],
                                  longitudes=df_sorted['Longitude']
                                  )

ALGIERS POINT
AUDUBON
B. W. COOPER
BAYOU ST. JOHN
BEHRMAN
BLACK PEARL
BROADMOOR
BYWATER
CENTRAL BUSINESS DISTRICT
CENTRAL CITY
CITY PARK
DESIRE AREA
DILLARD
DIXON
EAST CARROLLTON
EAST RIVERSIDE
FAIRGROUNDS
FILMORE
FISCHER DEV
FLORIDA AREA
FLORIDA DEV
FRENCH QUARTER
FRERET
GARDEN DISTRICT
GENTILLY TERRACE
GENTILLY WOODS
GERT TOWN
HOLLYGROVE
HOLY CROSS
IBERVILLE
IRISH CHANNEL
LAKE CATHERINE
LAKE TERRACE & OAKS
LAKESHORE - LAKE VISTA
LAKEVIEW
LAKEWOOD
LEONIDAS
LITTLE WOODS
LOWER GARDEN DISTRICT
LOWER NINTH WARD
MARIGNY
MARLYVILLE - FONTAINEBLEAU
MID-CITY
MILAN
MILNEBURG
McDONOGH
NAVARRE
NEW AURORA - ENGLISH TURN
OLD AURORA
PINES VILLAGE
PLUM ORCHARD
PONTCHARTRAIN PARK
READ BLVD EAST
READ BLVD WEST
SEVENTH WARD
ST. ANTHONY
ST. BERNARD AREA
ST. CLAUDE
ST. ROCH
ST. THOMAS DEV
TALL TIMBERS - BRECHTEL
TOURO
TREME - LAFITTE
TULANE - GRAVIER
U.S. NAVAL BASE
UPTOWN
VIAVANT - VENETIAN ISLES
VILLAGE DE LEST
WEST END
WEST LAKE FOREST
WEST RIVERSIDE
WHITNEY


In [31]:
print(newOrleans_venues.shape)
newOrleans_venues.head()

(918, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,ALGIERS POINT,29.952462,-90.051606,Tout de Suite Café,29.952121,-90.05109,Café
1,ALGIERS POINT,29.952462,-90.051606,Congregation Coffee Roasters,29.951918,-90.053395,Coffee Shop
2,ALGIERS POINT,29.952462,-90.051606,The Crown & Anchor,29.951416,-90.05422,Bar
3,ALGIERS POINT,29.952462,-90.051606,Levee @ Algiers Point,29.95176,-90.048747,Scenic Lookout
4,ALGIERS POINT,29.952462,-90.051606,Old Point Bar,29.95494,-90.050226,Bar


In [32]:
newOrleans_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALGIERS POINT,25,25,25,25,25,25
AUDUBON,7,7,7,7,7,7
B. W. COOPER,4,4,4,4,4,4
BAYOU ST. JOHN,6,6,6,6,6,6
BLACK PEARL,5,5,5,5,5,5
...,...,...,...,...,...,...
UPTOWN,27,27,27,27,27,27
WEST END,3,3,3,3,3,3
WEST LAKE FOREST,10,10,10,10,10,10
WEST RIVERSIDE,25,25,25,25,25,25


In [34]:
print('There are {} unique categories.'.format(len(newOrleans_venues['Venue Category'].unique())))

There are 204 unique categories.


### Analyzing each neighborhood

In [35]:
#Analyzing each neighbourhood
# one hot encoding
newOrleans_onehot = pd.get_dummies(newOrleans_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
newOrleans_onehot['Neighborhood'] = newOrleans_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [newOrleans_onehot.columns[-1]] + list(newOrleans_onehot.columns[:-1])
newOrleans_onehot = newOrleans_onehot[fixed_columns]

newOrleans_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Astrologer,Athletics & Sports,Auto Garage,...,Vegetarian / Vegan Restaurant,Vehicle Inspection Station,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
newOrleans_onehot.shape

(918, 204)

In [38]:
#calculating the mean of frequency of occurrence of each venue
newOrleans_grouped = newOrleans_onehot.groupby('Neighborhood').mean().reset_index()
newOrleans_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Astrologer,Athletics & Sports,...,Vegetarian / Vegan Restaurant,Vehicle Inspection Station,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,ALGIERS POINT,0.000000,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.04,0.0
1,AUDUBON,0.000000,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.00,0.0
2,B. W. COOPER,0.000000,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.00,0.0
3,BAYOU ST. JOHN,0.000000,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.00,0.0
4,BLACK PEARL,0.000000,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,UPTOWN,0.037037,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.00,0.0
62,WEST END,0.000000,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.00,0.0
63,WEST LAKE FOREST,0.000000,0.0,0.1,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.00,0.0
64,WEST RIVERSIDE,0.000000,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.00,0.0


In [39]:
newOrleans_grouped.shape

(66, 204)

In [40]:
#printing out top 5 venues of each neighborhood
num_top_venues = 5

for hood in newOrleans_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = newOrleans_grouped[newOrleans_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----ALGIERS POINT----
              venue  freq
0     Boat or Ferry  0.12
1               Bar  0.12
2              Park  0.08
3    Scenic Lookout  0.08
4  Recording Studio  0.04


----AUDUBON----
                   venue  freq
0           Concert Hall  0.14
1          Smoothie Shop  0.14
2  College Arts Building  0.14
3  Outdoors & Recreation  0.14
4            Coffee Shop  0.14


----B. W. COOPER----
                     venue  freq
0        Recreation Center  0.25
1               Boxing Gym  0.25
2               Food Truck  0.25
3     Gym / Fitness Center  0.25
4  New American Restaurant  0.00


----BAYOU ST. JOHN----
                  venue  freq
0                Lounge  0.17
1           Gas Station  0.17
2            Playground  0.17
3  Other Great Outdoors  0.17
4        Sandwich Place  0.17


----BLACK PEARL----
            venue  freq
0           Plaza   0.4
1      Food Truck   0.2
2   Grocery Store   0.2
3  Farmers Market   0.2
4     Yoga Studio   0.0


----BROADMOOR----
      

In [41]:
#making a pandas dataframe out of thie venues / neighbourhood information
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = newOrleans_grouped['Neighborhood']

for ind in np.arange(newOrleans_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(newOrleans_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALGIERS POINT,Bar,Boat or Ferry,Scenic Lookout,Park,Recording Studio,Music Venue,Gift Shop,Coffee Shop,Grocery Store,Café
1,AUDUBON,Sandwich Place,Coffee Shop,Smoothie Shop,Outdoors & Recreation,Concert Hall,Plaza,College Arts Building,Cupcake Shop,Dance Studio,Flower Shop
2,B. W. COOPER,Food Truck,Recreation Center,Gym / Fitness Center,Boxing Gym,Convenience Store,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
3,BAYOU ST. JOHN,Park,Playground,Gas Station,Lounge,Sandwich Place,Other Great Outdoors,Eastern European Restaurant,Flower Shop,Flea Market,Fast Food Restaurant
4,BLACK PEARL,Plaza,Grocery Store,Farmers Market,Food Truck,Women's Store,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant


### Clustering the neighborhoods

In [85]:
#clustering the neighbourhoods
# set number of clusters
kclusters = 5

newOrleans_grouped_clustering = newOrleans_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(newOrleans_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 4], dtype=int32)

In [88]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

newOrleans_merged = df_sorted

# merge newOrleans_grouped with df_sorted to add latitude/longitude for each neighborhood
newOrleans_merged = newOrleans_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

newOrleans_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Longitude,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALGIERS POINT,-90.051606,29.952462,1.0,Bar,Boat or Ferry,Scenic Lookout,Park,Recording Studio,Music Venue,Gift Shop,Coffee Shop,Grocery Store,Café
1,AUDUBON,-90.12145,29.932994,1.0,Sandwich Place,Coffee Shop,Smoothie Shop,Outdoors & Recreation,Concert Hall,Plaza,College Arts Building,Cupcake Shop,Dance Studio,Flower Shop
2,B. W. COOPER,-90.091753,29.951774,1.0,Food Truck,Recreation Center,Gym / Fitness Center,Boxing Gym,Convenience Store,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
3,BAYOU ST. JOHN,-90.086517,29.976071,1.0,Park,Playground,Gas Station,Lounge,Sandwich Place,Other Great Outdoors,Eastern European Restaurant,Flower Shop,Flea Market,Fast Food Restaurant
4,BEHRMAN,-90.026436,29.934817,,,,,,,,,,,


In [75]:
newOrleans_merged.shape

(72, 14)

In [106]:
newOrleans_merged.dropna(subset=["Cluster Labels"], axis=0, inplace=True)

In [107]:
newOrleans_merged.reset_index(drop=True, inplace=True)

In [108]:
newOrleans_merged.shape

(66, 14)

In [109]:
newOrleans_merged.head()

Unnamed: 0,Neighborhood,Longitude,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALGIERS POINT,-90.051606,29.952462,1.0,Bar,Boat or Ferry,Scenic Lookout,Park,Recording Studio,Music Venue,Gift Shop,Coffee Shop,Grocery Store,Café
1,AUDUBON,-90.12145,29.932994,1.0,Sandwich Place,Coffee Shop,Smoothie Shop,Outdoors & Recreation,Concert Hall,Plaza,College Arts Building,Cupcake Shop,Dance Studio,Flower Shop
2,B. W. COOPER,-90.091753,29.951774,1.0,Food Truck,Recreation Center,Gym / Fitness Center,Boxing Gym,Convenience Store,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
3,BAYOU ST. JOHN,-90.086517,29.976071,1.0,Park,Playground,Gas Station,Lounge,Sandwich Place,Other Great Outdoors,Eastern European Restaurant,Flower Shop,Flea Market,Fast Food Restaurant
4,BLACK PEARL,-90.134883,29.935895,1.0,Plaza,Grocery Store,Farmers Market,Food Truck,Women's Store,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant


In [110]:
newOrleans_merged['Cluster Labels'] = newOrleans_merged['Cluster Labels'].astype(int)

In [111]:
newOrleans_merged.head()

Unnamed: 0,Neighborhood,Longitude,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALGIERS POINT,-90.051606,29.952462,1,Bar,Boat or Ferry,Scenic Lookout,Park,Recording Studio,Music Venue,Gift Shop,Coffee Shop,Grocery Store,Café
1,AUDUBON,-90.12145,29.932994,1,Sandwich Place,Coffee Shop,Smoothie Shop,Outdoors & Recreation,Concert Hall,Plaza,College Arts Building,Cupcake Shop,Dance Studio,Flower Shop
2,B. W. COOPER,-90.091753,29.951774,1,Food Truck,Recreation Center,Gym / Fitness Center,Boxing Gym,Convenience Store,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
3,BAYOU ST. JOHN,-90.086517,29.976071,1,Park,Playground,Gas Station,Lounge,Sandwich Place,Other Great Outdoors,Eastern European Restaurant,Flower Shop,Flea Market,Fast Food Restaurant
4,BLACK PEARL,-90.134883,29.935895,1,Plaza,Grocery Store,Farmers Market,Food Truck,Women's Store,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant


In [112]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(newOrleans_merged['Latitude'], newOrleans_merged['Longitude'], newOrleans_merged['Neighborhood'], newOrleans_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examining the individual clusters

In [113]:
#Examining the individual clusters
#Cluster 0 -- 
newOrleans_merged.loc[newOrleans_merged['Cluster Labels'] == 0, newOrleans_merged.columns[[0] + list(range(4, newOrleans_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,DESIRE AREA,Skate Park,Women's Store,Eastern European Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant,Farmers Market,Event Space


In [114]:
#Examining the individual clusters
#Cluster 1 -- 
newOrleans_merged.loc[newOrleans_merged['Cluster Labels'] == 1, newOrleans_merged.columns[[0] + list(range(4, newOrleans_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALGIERS POINT,Bar,Boat or Ferry,Scenic Lookout,Park,Recording Studio,Music Venue,Gift Shop,Coffee Shop,Grocery Store,Café
1,AUDUBON,Sandwich Place,Coffee Shop,Smoothie Shop,Outdoors & Recreation,Concert Hall,Plaza,College Arts Building,Cupcake Shop,Dance Studio,Flower Shop
2,B. W. COOPER,Food Truck,Recreation Center,Gym / Fitness Center,Boxing Gym,Convenience Store,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
3,BAYOU ST. JOHN,Park,Playground,Gas Station,Lounge,Sandwich Place,Other Great Outdoors,Eastern European Restaurant,Flower Shop,Flea Market,Fast Food Restaurant
4,BLACK PEARL,Plaza,Grocery Store,Farmers Market,Food Truck,Women's Store,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant
5,BROADMOOR,Food Truck,Music Venue,Recreation Center,Coffee Shop,Taco Place,Eastern European Restaurant,Food,Flower Shop,Flea Market,Fast Food Restaurant
6,BYWATER,Music Venue,Coffee Shop,Vegetarian / Vegan Restaurant,Park,Hotel,Antique Shop,Indie Theater,Boat or Ferry,Fried Chicken Joint,Caribbean Restaurant
7,CENTRAL BUSINESS DISTRICT,Hotel,Hotel Bar,Bar,Coffee Shop,Café,Restaurant,Grocery Store,Gym / Fitness Center,Vietnamese Restaurant,Seafood Restaurant
8,CENTRAL CITY,Grocery Store,Fried Chicken Joint,Pharmacy,Seafood Restaurant,Park,Cajun / Creole Restaurant,Donut Shop,Flower Shop,Flea Market,Fast Food Restaurant
11,DILLARD,Women's Store,Mobile Phone Shop,Discount Store,Tourist Information Center,Gym / Fitness Center,Donut Shop,Entertainment Service,Food & Drink Shop,Food,Flower Shop


In [115]:
#Examining the individual clusters
#Cluster 2 -- 
newOrleans_merged.loc[newOrleans_merged['Cluster Labels'] == 2, newOrleans_merged.columns[[0] + list(range(4, newOrleans_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,FISCHER DEV,Nightclub,Women's Store,Eastern European Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant,Farmers Market,Event Space
65,WHITNEY,Construction & Landscaping,Nightclub,Women's Store,Eastern European Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant,Farmers Market


In [116]:
#Examining the individual clusters
#Cluster 3 -- 
newOrleans_merged.loc[newOrleans_merged['Cluster Labels'] == 3, newOrleans_merged.columns[[0] + list(range(4, newOrleans_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,PONTCHARTRAIN PARK,Park,Women's Store,Eastern European Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant,Farmers Market,Event Space
54,ST. CLAUDE,Plaza,Entertainment Service,Park,Women's Store,Donut Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant,Farmers Market


In [117]:
#Examining the individual clusters
#Cluster 4 -- 
newOrleans_merged.loc[newOrleans_merged['Cluster Labels'] == 4, newOrleans_merged.columns[[0] + list(range(4, newOrleans_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,CITY PARK,Baseball Field,Trail,Tennis Court,Park,Women's Store,Eastern European Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market
33,LAKEWOOD,Baseball Field,Food,Shop & Service,Dog Run,Spa,Women's Store,Entertainment Service,Food & Drink Shop,Flower Shop,Flea Market
45,OLD AURORA,Playground,Baseball Field,Women's Store,Entertainment Service,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market,Fast Food Restaurant
