In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

%matplotlib inline

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.2 |       hecda079_0         147 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    certifi-2020.4.5.2         |   py36h9f0ad1d_0         152 KB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0           conda-forge
    geopy:          

In [52]:
address = 'New Castle, DE'

geolocator = Nominatim(user_agent="de_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New Castle City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New Castle City are 39.6159851, -75.662956.


In [53]:
# create map of Newark,DE using latitude and longitude values
map_newcastle = folium.Map(location=[39.6159851, -75.662956], zoom_start=10)
map_newcastle

In [54]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,zip,type,decommissioned,primary_city,acceptable_cities,unacceptable_cities,state,county,timezone,area_codes,world_region,country,latitude,longitude,irs_estimated_population_2015
0,501,UNIQUE,0,Holtsville,,I R S Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,562
1,544,UNIQUE,0,Holtsville,,Irs Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,0
2,601,STANDARD,0,Adjuntas,,"Colinas Del Gigante, Jard De Adjuntas, Urb San...",PR,Adjuntas Municipio,America/Puerto_Rico,787939,,US,18.16,-66.72,0
3,602,STANDARD,0,Aguada,,"Alts De Aguada, Bo Guaniquilla, Comunidad Las ...",PR,Aguada Municipio,America/Puerto_Rico,787939,,US,18.38,-67.18,0
4,603,STANDARD,0,Aguadilla,Ramey,"Bda Caban, Bda Esteves, Bo Borinquen, Bo Ceiba...",PR,Aguadilla Municipio,America/Puerto_Rico,787,,US,18.43,-67.15,0


In [55]:
df=df.loc[df["state"]=="DE",["primary_city","county","latitude","longitude"]]
df.reset_index(inplace=True,drop=True)
df

Unnamed: 0,primary_city,county,latitude,longitude
0,Bear,New Castle County,39.58,-75.68
1,Newark,New Castle County,39.62,-75.73
2,Claymont,New Castle County,39.8,-75.45
3,Delaware City,New Castle County,39.57,-75.6
4,Hockessin,New Castle County,39.78,-75.68
5,Kirkwood,New Castle County,39.57,-75.63
6,Middletown,New Castle County,39.45,-75.71
7,Montchanin,New Castle County,39.8,-75.59
8,Newark,New Castle County,39.71,-75.74
9,Newark,New Castle County,39.67,-75.75


#### Checking if there any Null values

In [56]:
df.isnull().any()

primary_city    False
county          False
latitude        False
longitude       False
dtype: bool

#### Renaming the columns based on our problem statement. Pls note in the state of Delaware , "county" is equivalent to "borough". And "primary_city" is same as "Neighborhood"

In [57]:
df.rename(columns={"primary_city":"Neighborhood","county":"Borough","latitude":"Latitude","longitude":"Longitude"},inplace=True)

In [58]:
df

Unnamed: 0,Neighborhood,Borough,Latitude,Longitude
0,Bear,New Castle County,39.58,-75.68
1,Newark,New Castle County,39.62,-75.73
2,Claymont,New Castle County,39.8,-75.45
3,Delaware City,New Castle County,39.57,-75.6
4,Hockessin,New Castle County,39.78,-75.68
5,Kirkwood,New Castle County,39.57,-75.63
6,Middletown,New Castle County,39.45,-75.71
7,Montchanin,New Castle County,39.8,-75.59
8,Newark,New Castle County,39.71,-75.74
9,Newark,New Castle County,39.67,-75.75


In [59]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [60]:
neighborhoods['Borough']=df['Borough']
neighborhoods['Neighborhood']=df['Neighborhood']
neighborhoods['Latitude']=df['Latitude']
neighborhoods['Longitude']=df['Longitude']


In [61]:
neighborhoods.shape

(98, 4)

#### Eliminating duplicates on "Neighborhood" col as same Neighborhood is listed multiple times due to some neighborhoods whch are mapped to multiple zip codes.

In [62]:
neighborhoods.drop_duplicates(subset="Neighborhood",inplace=True)

In [63]:
neighborhoods.reset_index(inplace=True,drop=True)
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,New Castle County,Bear,39.58,-75.68
1,New Castle County,Newark,39.62,-75.73
2,New Castle County,Claymont,39.8,-75.45
3,New Castle County,Delaware City,39.57,-75.6
4,New Castle County,Hockessin,39.78,-75.68
5,New Castle County,Kirkwood,39.57,-75.63
6,New Castle County,Middletown,39.45,-75.71
7,New Castle County,Montchanin,39.8,-75.59
8,New Castle County,New Castle,39.66,-75.57
9,New Castle County,Odessa,39.45,-75.66


In [64]:
address = 'New Castle, DE'

geolocator = Nominatim(user_agent="de_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Newark City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Newark City are 39.6159851, -75.662956.


In [65]:
# create map of Delaware using latitude and longitude values
map_delaware = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_delaware)  
    
map_delaware

#### For simplicity purposes, let's simplify the above map and segment and cluster only the neighborhoods in "New Castle County". So let's slice the original dataframe and create a new dataframe of the "New Castle County" data.

In [66]:
newcastle_data = neighborhoods[neighborhoods['Borough'] == 'New Castle County'].reset_index(drop=True)
newcastle_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,New Castle County,Bear,39.58,-75.68
1,New Castle County,Newark,39.62,-75.73
2,New Castle County,Claymont,39.8,-75.45
3,New Castle County,Delaware City,39.57,-75.6
4,New Castle County,Hockessin,39.78,-75.68


In [67]:
newcastle_data

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,New Castle County,Bear,39.58,-75.68
1,New Castle County,Newark,39.62,-75.73
2,New Castle County,Claymont,39.8,-75.45
3,New Castle County,Delaware City,39.57,-75.6
4,New Castle County,Hockessin,39.78,-75.68
5,New Castle County,Kirkwood,39.57,-75.63
6,New Castle County,Middletown,39.45,-75.71
7,New Castle County,Montchanin,39.8,-75.59
8,New Castle County,New Castle,39.66,-75.57
9,New Castle County,Odessa,39.45,-75.66


#### Let's get the geographical coordinates of New Castle county.

In [68]:
address = 'New Castle, DE'

geolocator = Nominatim(user_agent="nc_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New Castle are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New Castle are 39.6159851, -75.662956.


#### let's visualize New castle county and the neighborhoods in it.

In [69]:
# create map of Delaware using latitude and longitude values
map_newcastle = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(newcastle_data['Latitude'], newcastle_data['Longitude'], newcastle_data['Borough'], newcastle_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newcastle)  
    
map_newcastle

#### Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them

#### Define Foursquare Credentials and Version

In [70]:
CLIENT_ID = 'Z5IOQ45F14ITZIEUMBVGED5AKJJCOV3KC33PDVPZSP14JPZV' # your Foursquare ID
CLIENT_SECRET = '1KCMYSZJY5QDW5TEPVQAEF5HC1RNF2TVXFLWRWHAT0V5YRPI' # your Foursquare Secret
VERSION = '20200608' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Z5IOQ45F14ITZIEUMBVGED5AKJJCOV3KC33PDVPZSP14JPZV
CLIENT_SECRET:1KCMYSZJY5QDW5TEPVQAEF5HC1RNF2TVXFLWRWHAT0V5YRPI


#### Let's explore the first neighborhood in our dataframe

#### Get the neighborhood's name.

In [71]:
newcastle_data.loc[0, 'Neighborhood']

'Bear'

#### Get the neighborhood's latitude and longitude values.

In [72]:
neighborhood_latitude = newcastle_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = newcastle_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = newcastle_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Bear are 39.58, -75.68.


#### Now, let's get the top 100 venues that are in Bear within a radius of 2000 meters

#### First, let's create the GET request URL. Name your URL url.

In [73]:

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 5000 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=Z5IOQ45F14ITZIEUMBVGED5AKJJCOV3KC33PDVPZSP14JPZV&client_secret=1KCMYSZJY5QDW5TEPVQAEF5HC1RNF2TVXFLWRWHAT0V5YRPI&v=20200608&ll=39.58,-75.68&radius=5000&limit=100'

#### Send the GET request and examine the results

In [74]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ee3b68c1f420b592180c608'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Red Lion',
  'headerFullLocation': 'Red Lion',
  'headerLocationGranularity': 'city',
  'totalResults': 72,
  'suggestedBounds': {'ne': {'lat': 39.62500004500004,
    'lng': -75.62172318106171},
   'sw': {'lat': 39.534999954999954, 'lng': -75.7382768189383}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5de8a3a6e829d100088a257c',
       'name': 'Wawa',
       'location': {'address': '3601 Wrangle Hill Rd',
        'crossStreet': 'at Red Lion Rd',
        'lat': 39.58405303955078,
        'lng': -75.6842041015625,
        'labele

#### Defining the function that extracts the category of the venue

In [75]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Now we are ready to clean the json and structure it into a pandas dataframe.

In [76]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wawa,Convenience Store,39.584053,-75.684204
1,Amore Pizza,Pizza Place,39.592324,-75.677772
2,Delaware Rock Gym,Gym / Fitness Center,39.611009,-75.689161
3,Oceanmart Deli,Deli / Bodega,39.570546,-75.697795
4,Lums Pond State Park,State / Provincial Park,39.561565,-75.720337


#### And how many venues were returned by Foursquare?

In [77]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

72 venues were returned by Foursquare.


In [78]:
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Wawa,Convenience Store,39.584053,-75.684204
1,Amore Pizza,Pizza Place,39.592324,-75.677772
2,Delaware Rock Gym,Gym / Fitness Center,39.611009,-75.689161
3,Oceanmart Deli,Deli / Bodega,39.570546,-75.697795
4,Lums Pond State Park,State / Provincial Park,39.561565,-75.720337
5,Go Ape! Treetop Adventure,Athletics & Sports,39.559182,-75.716021
6,Frank's Pizzeria,Pizza Place,39.612555,-75.656268
7,Saladworks,Salad Place,39.607828,-75.713209
8,Simon Eye Associates,Optical Shop,39.6064,-75.713299
9,ABC Liquors,Liquor Store,39.60649,-75.710961


## Explore Neighborhoods of New Castle county

#### Let's create a function to repeat the same process to all the neighborhoods in New Castle County

In [79]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now write the code to run the above function on each neighborhood and create a new dataframe called newcastle_venues.

In [82]:
newcastle_venues = getNearbyVenues(names=newcastle_data['Neighborhood'],
                                   latitudes=newcastle_data['Latitude'],
                                   longitudes=newcastle_data['Longitude']
                                  )

Bear
Newark
Claymont
Delaware City
Hockessin
Kirkwood
Middletown
Montchanin
New Castle
Odessa
Port Penn
Rockland
Saint Georges
Townsend
Winterthur
Yorklyn
Wilmington


#### Let's check the size of the resulting dataframe

In [83]:
print(newcastle_venues.shape)
newcastle_venues.head()

(1089, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bear,39.58,-75.68,Wawa,39.584053,-75.684204,Convenience Store
1,Bear,39.58,-75.68,Amore Pizza,39.592324,-75.677772,Pizza Place
2,Bear,39.58,-75.68,Delaware Rock Gym,39.611009,-75.689161,Gym / Fitness Center
3,Bear,39.58,-75.68,Oceanmart Deli,39.570546,-75.697795,Deli / Bodega
4,Bear,39.58,-75.68,Lums Pond State Park,39.561565,-75.720337,State / Provincial Park


In [84]:
newcastle_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bear,39.58,-75.68,Wawa,39.584053,-75.684204,Convenience Store
1,Bear,39.58,-75.68,Amore Pizza,39.592324,-75.677772,Pizza Place
2,Bear,39.58,-75.68,Delaware Rock Gym,39.611009,-75.689161,Gym / Fitness Center
3,Bear,39.58,-75.68,Oceanmart Deli,39.570546,-75.697795,Deli / Bodega
4,Bear,39.58,-75.68,Lums Pond State Park,39.561565,-75.720337,State / Provincial Park
5,Bear,39.58,-75.68,Go Ape! Treetop Adventure,39.559182,-75.716021,Athletics & Sports
6,Bear,39.58,-75.68,Frank's Pizzeria,39.612555,-75.656268,Pizza Place
7,Bear,39.58,-75.68,Saladworks,39.607828,-75.713209,Salad Place
8,Bear,39.58,-75.68,Simon Eye Associates,39.6064,-75.713299,Optical Shop
9,Bear,39.58,-75.68,ABC Liquors,39.60649,-75.710961,Liquor Store


#### Let's check how many venues were returned for each neighborhood

In [85]:
newcastle_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bear,72,72,72,72,72,72
Claymont,100,100,100,100,100,100
Delaware City,15,15,15,15,15,15
Hockessin,68,68,68,68,68,68
Kirkwood,24,24,24,24,24,24
Middletown,86,86,86,86,86,86
Montchanin,100,100,100,100,100,100
New Castle,100,100,100,100,100,100
Newark,100,100,100,100,100,100
Odessa,36,36,36,36,36,36


#### Let's find out how many unique categories can be curated from all the returned venues

In [86]:
print('There are {} uniques categories.'.format(len(newcastle_venues['Venue Category'].unique())))

There are 191 uniques categories.


## Analyze Each Neighborhood

In [87]:
# one hot encoding
newcastle_onehot = pd.get_dummies(newcastle_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
newcastle_onehot['Neighborhood'] = newcastle_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [newcastle_onehot.columns[-1]] + list(newcastle_onehot.columns[:-1])
newcastle_onehot = newcastle_onehot[fixed_columns]

newcastle_onehot.head()

Unnamed: 0,Neighborhood,ATM,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Court,Beach,Big Box Store,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Buffet,Burger Joint,Burrito Place,Bus Stop,Business Service,Café,Cajun / Creole Restaurant,Camera Store,Canal,Chinese Restaurant,Clothing Store,Coffee Shop,College Basketball Court,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Disc Golf,Discount Store,Dive Bar,Dog Run,Donut Shop,Event Space,Eye Doctor,Farm,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Fondue Restaurant,Food & Drink Shop,Food Court,Football Stadium,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,General Travel,Gift Shop,Golf Course,Gourmet Shop,Government Building,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hot Dog Joint,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Lake,Laundromat,Lawyer,Light Rail Station,Lighthouse,Lingerie Store,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Martial Arts Dojo,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Moroccan Restaurant,Movie Theater,Multiplex,Museum,Music Store,Music Venue,National Park,Nature Preserve,New American Restaurant,Nightclub,Opera House,Optical Shop,Other Great Outdoors,Paintball Field,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pub,Racetrack,Record Shop,Recreation Center,Rental Car Location,Rental Service,Rest Area,Restaurant,River,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Plaza,Shrine,Smoke Shop,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,State / Provincial Park,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theater,Theme Park,Thrift / Vintage Store,Toll Booth,Toll Plaza,Tourist Information Center,Trail,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Weight Loss Center,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Bear,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Bear,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Bear,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Bear,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Bear,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### And let's examine the new dataframe size

In [88]:
newcastle_onehot.shape

(1089, 192)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [89]:
newcastle_grouped = newcastle_onehot.groupby('Neighborhood').mean().reset_index()
newcastle_grouped

Unnamed: 0,Neighborhood,ATM,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Court,Beach,Big Box Store,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Buffet,Burger Joint,Burrito Place,Bus Stop,Business Service,Café,Cajun / Creole Restaurant,Camera Store,Canal,Chinese Restaurant,Clothing Store,Coffee Shop,College Basketball Court,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Disc Golf,Discount Store,Dive Bar,Dog Run,Donut Shop,Event Space,Eye Doctor,Farm,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Fondue Restaurant,Food & Drink Shop,Food Court,Football Stadium,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,General Travel,Gift Shop,Golf Course,Gourmet Shop,Government Building,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hot Dog Joint,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Lake,Laundromat,Lawyer,Light Rail Station,Lighthouse,Lingerie Store,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Martial Arts Dojo,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Moroccan Restaurant,Movie Theater,Multiplex,Museum,Music Store,Music Venue,National Park,Nature Preserve,New American Restaurant,Nightclub,Opera House,Optical Shop,Other Great Outdoors,Paintball Field,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pub,Racetrack,Record Shop,Recreation Center,Rental Car Location,Rental Service,Rest Area,Restaurant,River,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Plaza,Shrine,Smoke Shop,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,State / Provincial Park,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theater,Theme Park,Thrift / Vintage Store,Toll Booth,Toll Plaza,Tourist Information Center,Trail,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Weight Loss Center,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Bear,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.027778,0.0,0.0,0.0,0.013889,0.0,0.013889,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.027778,0.041667,0.0,0.013889,0.0,0.0,0.0,0.013889,0.041667,0.0,0.0,0.013889,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.027778,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.027778,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.0,0.013889,0.013889,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.013889,0.0,0.013889,0.0,0.0,0.0,0.013889,0.069444,0.083333,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.013889,0.013889,0.0,0.013889,0.0,0.0,0.0,0.013889,0.0,0.013889,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.027778,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.013889,0.0,0.0,0.013889,0.0,0.0,0.0,0.0,0.013889,0.0,0.0
1,Claymont,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.01,0.02,0.0,0.0,0.02,0.0,0.05,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.09,0.1,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.04,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.0
2,Delaware City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Hockessin,0.0,0.044118,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.029412,0.014706,0.014706,0.0,0.029412,0.0,0.0,0.0,0.0,0.014706,0.029412,0.0,0.0,0.014706,0.014706,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.014706,0.0,0.014706,0.0,0.0,0.0,0.029412,0.014706,0.0,0.0,0.014706,0.029412,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044118,0.0,0.0,0.029412,0.058824,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.029412,0.044118,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.014706,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Kirkwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Middletown,0.0,0.046512,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.011628,0.011628,0.0,0.0,0.0,0.011628,0.011628,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.023256,0.0,0.034884,0.0,0.0,0.023256,0.011628,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.011628,0.023256,0.0,0.0,0.011628,0.0,0.034884,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.023256,0.011628,0.0,0.0,0.0,0.011628,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.023256,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.011628,0.0,0.011628,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.046512,0.046512,0.011628,0.0,0.011628,0.0,0.0,0.0,0.011628,0.011628,0.0,0.0,0.046512,0.0,0.011628,0.011628,0.034884,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.011628,0.0,0.034884,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.011628,0.011628,0.0,0.011628
6,Montchanin,0.0,0.05,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.01,0.01,0.01,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.03,0.02,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.02,0.0,0.01,0.02,0.0,0.03,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.02,0.01,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.03,0.03,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
7,New Castle,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.06,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.02,0.0,0.02,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.05,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.02,0.0,0.0,0.0,0.04,0.02,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0
8,Newark,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.04,0.0,0.01,0.03,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.05,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.07,0.08,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.02,0.04,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.02,0.0,0.0
9,Odessa,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.027778,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.027778,0.027778,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.027778,0.0,0.0,0.0


In [90]:
newcastle_grouped.shape

(17, 192)

#### Let's print each neighborhood along with the top 5 most common venues

In [91]:
num_top_venues = 5

for hood in newcastle_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = newcastle_grouped[newcastle_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bear----
               venue  freq
0        Pizza Place  0.08
1           Pharmacy  0.07
2  Convenience Store  0.04
3     Discount Store  0.04
4       Liquor Store  0.04


----Claymont----
               venue  freq
0        Pizza Place  0.10
1           Pharmacy  0.09
2  Convenience Store  0.06
3     Discount Store  0.05
4     Sandwich Place  0.04


----Delaware City----
                     venue  freq
0  State / Provincial Park  0.13
1       Seafood Restaurant  0.13
2           Ice Cream Shop  0.07
3            Boat or Ferry  0.07
4                     Café  0.07


----Hockessin----
                 venue  freq
0          Pizza Place  0.06
1                 Park  0.04
2       Sandwich Place  0.04
3  American Restaurant  0.04
4   Salon / Barbershop  0.03


----Kirkwood----
                venue  freq
0         Pizza Place  0.08
1  Seafood Restaurant  0.08
2        Intersection  0.08
3     Harbor / Marina  0.04
4              Bakery  0.04


----Middletown----
                 ven

#### Let's put that into a pandas dataframe

#### First, let's write a function to sort the venues in descending order

In [92]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Now let's create the new dataframe and display the top 10 venues for each neighborhood

In [93]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = newcastle_grouped['Neighborhood']

for ind in np.arange(newcastle_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(newcastle_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bear,Pizza Place,Pharmacy,Convenience Store,Liquor Store,Discount Store,Construction & Landscaping,Athletics & Sports,Mattress Store,Fried Chicken Joint,Gym / Fitness Center
1,Claymont,Pizza Place,Pharmacy,Convenience Store,Discount Store,Sandwich Place,Liquor Store,Donut Shop,Diner,Coffee Shop,Ice Cream Shop
2,Delaware City,Seafood Restaurant,State / Provincial Park,Intersection,Harbor / Marina,Park,Event Space,Canal,Café,Boat or Ferry,Bakery
3,Hockessin,Pizza Place,Sandwich Place,American Restaurant,Park,Golf Course,Burger Joint,Shopping Plaza,Café,Coffee Shop,Salon / Barbershop
4,Kirkwood,Seafood Restaurant,Intersection,Pizza Place,Harbor / Marina,Basketball Court,State / Provincial Park,Bar,Bakery,Boat or Ferry,River


## Now Let's Cluster Neighborhoods

#### Run k-means to cluster the neighborhood into 4 clusters.

In [94]:
# set number of clusters
kclusters = 4

newcastle_grouped_clustering = newcastle_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(newcastle_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 2, 0, 2, 0, 0, 0, 0, 0], dtype=int32)

#### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [95]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

newcastle_merged = newcastle_data

# merge newcastle_grouped with newcastle_data to add latitude/longitude for each neighborhood
newcastle_merged = newcastle_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

newcastle_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,New Castle County,Bear,39.58,-75.68,0,Pizza Place,Pharmacy,Convenience Store,Liquor Store,Discount Store,Construction & Landscaping,Athletics & Sports,Mattress Store,Fried Chicken Joint,Gym / Fitness Center
1,New Castle County,Newark,39.62,-75.73,0,Pizza Place,Pharmacy,Ice Cream Shop,Discount Store,Convenience Store,Sandwich Place,Coffee Shop,Donut Shop,Italian Restaurant,Spa
2,New Castle County,Claymont,39.8,-75.45,0,Pizza Place,Pharmacy,Convenience Store,Discount Store,Sandwich Place,Liquor Store,Donut Shop,Diner,Coffee Shop,Ice Cream Shop
3,New Castle County,Delaware City,39.57,-75.6,2,Seafood Restaurant,State / Provincial Park,Intersection,Harbor / Marina,Park,Event Space,Canal,Café,Boat or Ferry,Bakery
4,New Castle County,Hockessin,39.78,-75.68,0,Pizza Place,Sandwich Place,American Restaurant,Park,Golf Course,Burger Joint,Shopping Plaza,Café,Coffee Shop,Salon / Barbershop


In [96]:
newcastle_merged.sort_values('Cluster Labels')

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,New Castle County,Bear,39.58,-75.68,0,Pizza Place,Pharmacy,Convenience Store,Liquor Store,Discount Store,Construction & Landscaping,Athletics & Sports,Mattress Store,Fried Chicken Joint,Gym / Fitness Center
14,New Castle County,Winterthur,39.8,-75.6,0,American Restaurant,Coffee Shop,Golf Course,Gym / Fitness Center,Ice Cream Shop,Park,Bagel Shop,Pharmacy,Italian Restaurant,Pizza Place
11,New Castle County,Rockland,39.78,-75.57,0,Park,Pharmacy,Pizza Place,Gym / Fitness Center,Seafood Restaurant,Italian Restaurant,Golf Course,American Restaurant,Fast Food Restaurant,Coffee Shop
9,New Castle County,Odessa,39.45,-75.66,0,Pharmacy,Chinese Restaurant,American Restaurant,Donut Shop,Video Store,Italian Restaurant,Golf Course,Fast Food Restaurant,Restaurant,Salad Place
15,New Castle County,Yorklyn,39.8,-75.67,0,Park,Pizza Place,American Restaurant,Burger Joint,Sandwich Place,Café,Gym / Fitness Center,Golf Course,Fish Market,Salon / Barbershop
7,New Castle County,Montchanin,39.8,-75.59,0,American Restaurant,Coffee Shop,Park,Grocery Store,Golf Course,Burger Joint,Ice Cream Shop,Pizza Place,Pharmacy,Department Store
8,New Castle County,New Castle,39.66,-75.57,0,Donut Shop,American Restaurant,Pharmacy,Park,Sandwich Place,Pizza Place,Discount Store,Convenience Store,Rental Car Location,Fast Food Restaurant
4,New Castle County,Hockessin,39.78,-75.68,0,Pizza Place,Sandwich Place,American Restaurant,Park,Golf Course,Burger Joint,Shopping Plaza,Café,Coffee Shop,Salon / Barbershop
2,New Castle County,Claymont,39.8,-75.45,0,Pizza Place,Pharmacy,Convenience Store,Discount Store,Sandwich Place,Liquor Store,Donut Shop,Diner,Coffee Shop,Ice Cream Shop
1,New Castle County,Newark,39.62,-75.73,0,Pizza Place,Pharmacy,Ice Cream Shop,Discount Store,Convenience Store,Sandwich Place,Coffee Shop,Donut Shop,Italian Restaurant,Spa


In [97]:
newcastle_merged["Neighborhood"].count()

17

#### Finally, let's visualize the resulting clusters

In [98]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(newcastle_merged['Latitude'], newcastle_merged['Longitude'], newcastle_merged['Neighborhood'], newcastle_merged['Cluster Labels']):
#    print("processing ....:",dtoronto_merged['Latitude'], dtoronto_merged['Longitude'], dtoronto_merged['Neighborhood'], dtoronto_merged['Cluster Labels'])
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Examine Clusters

#### Cluster 1

In [99]:
newcastle_merged.loc[newcastle_merged['Cluster Labels'] == 0, newcastle_merged.columns[[1] + list(range(5, newcastle_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bear,Pizza Place,Pharmacy,Convenience Store,Liquor Store,Discount Store,Construction & Landscaping,Athletics & Sports,Mattress Store,Fried Chicken Joint,Gym / Fitness Center
1,Newark,Pizza Place,Pharmacy,Ice Cream Shop,Discount Store,Convenience Store,Sandwich Place,Coffee Shop,Donut Shop,Italian Restaurant,Spa
2,Claymont,Pizza Place,Pharmacy,Convenience Store,Discount Store,Sandwich Place,Liquor Store,Donut Shop,Diner,Coffee Shop,Ice Cream Shop
4,Hockessin,Pizza Place,Sandwich Place,American Restaurant,Park,Golf Course,Burger Joint,Shopping Plaza,Café,Coffee Shop,Salon / Barbershop
6,Middletown,American Restaurant,Restaurant,Pizza Place,Pharmacy,Supermarket,Sandwich Place,Fast Food Restaurant,Coffee Shop,Department Store,Mexican Restaurant
7,Montchanin,American Restaurant,Coffee Shop,Park,Grocery Store,Golf Course,Burger Joint,Ice Cream Shop,Pizza Place,Pharmacy,Department Store
8,New Castle,Donut Shop,American Restaurant,Pharmacy,Park,Sandwich Place,Pizza Place,Discount Store,Convenience Store,Rental Car Location,Fast Food Restaurant
9,Odessa,Pharmacy,Chinese Restaurant,American Restaurant,Donut Shop,Video Store,Italian Restaurant,Golf Course,Fast Food Restaurant,Restaurant,Salad Place
11,Rockland,Park,Pharmacy,Pizza Place,Gym / Fitness Center,Seafood Restaurant,Italian Restaurant,Golf Course,American Restaurant,Fast Food Restaurant,Coffee Shop
14,Winterthur,American Restaurant,Coffee Shop,Golf Course,Gym / Fitness Center,Ice Cream Shop,Park,Bagel Shop,Pharmacy,Italian Restaurant,Pizza Place


In [100]:
newcastle_merged.loc[newcastle_merged['Cluster Labels'] == 1, newcastle_merged.columns[[1] + list(range(5, newcastle_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Port Penn,Pub,Nature Preserve,IT Services,Event Space,Beach,Trail,Bar,Canal,Garden,Furniture / Home Store


In [101]:
newcastle_merged.loc[newcastle_merged['Cluster Labels'] == 2, newcastle_merged.columns[[1] + list(range(5, newcastle_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Delaware City,Seafood Restaurant,State / Provincial Park,Intersection,Harbor / Marina,Park,Event Space,Canal,Café,Boat or Ferry,Bakery
5,Kirkwood,Seafood Restaurant,Intersection,Pizza Place,Harbor / Marina,Basketball Court,State / Provincial Park,Bar,Bakery,Boat or Ferry,River
12,Saint Georges,Intersection,Pizza Place,Café,Convenience Store,Theme Park,Deli / Bodega,Restaurant,River,Football Stadium,Basketball Court


In [102]:
newcastle_merged.loc[newcastle_merged['Cluster Labels'] == 3, newcastle_merged.columns[[1] + list(range(5, newcastle_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Townsend,State / Provincial Park,Lake,Park,Construction & Landscaping,Farm,Farmers Market,Fast Food Restaurant,Music Venue,Pizza Place,Liquor Store
