#### Import all of the necessary libraries and packages.

In [2]:
import numpy as np

import pandas as pd
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 100)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library
from folium import plugins

print('Libraries imported.')

Libraries imported.


#### Check the working directory to make sure where it is pointing to so that we can upload the csv file.

In [3]:
pwd

'/Users/dustincremascoli'

In [4]:
dfs_coord = pd.read_csv("us-zip-code-latitude-and-longitude.csv", sep=',')

In [5]:
dfs_coord.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint,County
0,60803,Alsip,IL,41.675861,-87.73189,-6,1,"41.675861,-87.73189",Cook
1,60666,Amf Ohare,IL,41.968029,-87.891214,-6,1,"41.968029,-87.891214",Cook
2,60004,Arlington Heights,IL,42.108428,-87.97723,-6,1,"42.108428,-87.97723",Cook
3,60005,Arlington Heights,IL,42.069327,-87.98464,-6,1,"42.069327,-87.98464",Cook
4,60006,Arlington Heights,IL,41.811929,-87.68732,-6,1,"41.811929,-87.68732",Cook


In [6]:
dfs_coord.shape

(273, 9)

#### Clean up the pandas dataframe and reformat to just have the zip, neighborhood, state and lat and long and county name.

In [7]:
dfs_coord.rename(columns = {'City':'Neighborhood'}, inplace = True)

In [8]:
dfs_coord.head()

Unnamed: 0,Zip,Neighborhood,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint,County
0,60803,Alsip,IL,41.675861,-87.73189,-6,1,"41.675861,-87.73189",Cook
1,60666,Amf Ohare,IL,41.968029,-87.891214,-6,1,"41.968029,-87.891214",Cook
2,60004,Arlington Heights,IL,42.108428,-87.97723,-6,1,"42.108428,-87.97723",Cook
3,60005,Arlington Heights,IL,42.069327,-87.98464,-6,1,"42.069327,-87.98464",Cook
4,60006,Arlington Heights,IL,41.811929,-87.68732,-6,1,"41.811929,-87.68732",Cook


In [9]:
dfs_coord.shape

(273, 9)

In [10]:
dfs_coord.drop(['Timezone', 'Daylight savings time flag', 'geopoint'], axis=1)

Unnamed: 0,Zip,Neighborhood,State,Latitude,Longitude,County
0,60803,Alsip,IL,41.675861,-87.731890,Cook
1,60666,Amf Ohare,IL,41.968029,-87.891214,Cook
2,60004,Arlington Heights,IL,42.108428,-87.977230,Cook
3,60005,Arlington Heights,IL,42.069327,-87.984640,Cook
4,60006,Arlington Heights,IL,41.811929,-87.687320,Cook
...,...,...,...,...,...,...
268,60072,Ringwood,IL,42.405464,-88.302740,Mchenry
269,60081,Spring Grove,IL,42.441869,-88.221670,Mchenry
270,60180,Union,IL,42.229437,-88.526060,Mchenry
271,60097,Wonder Lake,IL,42.384504,-88.349500,Mchenry


In [11]:
# Export the new Pandas Dataframe into a csv file for further exploration.

dfs_coord.to_csv('/Users/dustincremascoli/Documents/IBM Data Science Certification/Module 9/villages.csv')

In [12]:
# Define Foursquare Credentials and Version

CLIENT_ID = 'V1UGJBFWMVFH0TZ5Q2MPCMIEZYCXK3VAOTCYSSOY5VUTVCIA' # your Foursquare ID
CLIENT_SECRET = 'BCTZQABODR5J3BT3NKQYTOTJZGYX4PLYQK15IHPKRZZU2DEF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: V1UGJBFWMVFH0TZ5Q2MPCMIEZYCXK3VAOTCYSSOY5VUTVCIA
CLIENT_SECRET:BCTZQABODR5J3BT3NKQYTOTJZGYX4PLYQK15IHPKRZZU2DEF


In [13]:
# Let's explore a familiar Village in our dataframe.
# Get the neighborhood's name.

dfs_coord.loc[69, 'Neighborhood']

'Mount Prospect'

In [14]:
# Get the neighborhood's latitude and longitude values.

neighborhood_latitude = dfs_coord.loc[69, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = dfs_coord.loc[69, 'Longitude'] # neighborhood longitude value

neighborhood_name = dfs_coord.loc[69, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Mount Prospect are 42.065427, -87.93621.


In [15]:
# Now, let's get the top 100 venues that are in Mount Prospect within a radius of 5000 meters.
# First, let's create the GET request URL. Name your URL url.

LIMIT = 100

radius = 5000

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=V1UGJBFWMVFH0TZ5Q2MPCMIEZYCXK3VAOTCYSSOY5VUTVCIA&client_secret=BCTZQABODR5J3BT3NKQYTOTJZGYX4PLYQK15IHPKRZZU2DEF&v=20180605&ll=42.065427,-87.93621&radius=5000&limit=100'

In [16]:
# Send the GET request and examine the resutls

results = requests.get(url).json()

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [17]:
# Now we are ready to clean the json and structure it into a pandas dataframe.

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  """


Unnamed: 0,name,categories,lat,lng
0,Le Peep Cafe & Grill,Breakfast Spot,42.063805,-87.936496
1,Capannari Ice Cream,Ice Cream Shop,42.065709,-87.939819
2,Mt. Prospect Lions Club Farmers Market,Farmers Market,42.063679,-87.937446
3,Mrs. P & Me,Pub,42.062545,-87.935309
4,Submarine Express,Sandwich Place,42.063991,-87.936998


In [18]:
# And how many venues were returned by Foursquare?

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [19]:
# Export the new Pandas Dataframe into a csv file for further exploration.

nearby_venues.to_csv('/Users/dustincremascoli/Documents/IBM Data Science Certification/Module 9/nv_example.csv')

#### Explore Neighborhoods in Chicagoland

In [20]:
# Let's create a function to repeat the same process to all the neighborhoods in the five counties within Chicagoland.
# Excluding the zip codes that fall within the City of Chicago.

def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
# The code to run the above function on each neighborhood and create a new dataframe called chicagoland_venues.

chicagoland_venues = getNearbyVenues(names=dfs_coord['Neighborhood'],
                                   latitudes=dfs_coord['Latitude'],

                                     longitudes=dfs_coord['Longitude'])

Alsip
Amf Ohare
Arlington Heights
Arlington Heights
Arlington Heights
Bedford Park
Bellwood
Berkeley
Berwyn
Blue Island
Bridgeview
Broadview
Brookfield
Burbank
Calumet City
Chicago Heights
Chicago Heights
Chicago Ridge
Cicero
Country Club Hills
Des Plaines
Des Plaines
Des Plaines
Des Plaines
Dolton
Elk Grove Village
Elk Grove Village
Elmwood Park
Evanston
Evanston
Evanston
Evanston
Evanston
Evanston
Evergreen Park
Flossmoor
Forest Park
Franklin Park
Glencoe
Glenview
Glenview Nas
Glenwood
Golf
Hanover Park
Harvey
Harwood Heights
Hazel Crest
Hickory Hills
Hillside
Hines
Hoffman Estates
Hometown
Homewood
Justice
Kenilworth
La Grange
La Grange Park
Lansing
Lemont
Lincolnwood
Lyons
Matteson
Maywood
McHenry
Melrose Park
Melrose Park
Melrose Park
Midlothian
Morton Grove
Mount Prospect
Niles
Northbrook
Northbrook
Oak Forest
Oak Lawn
Oak Lawn
Oak Park
Oak Park
Oak Park
Oak Park
Olympia Fields
Orland Park
Orland Park
Palatine
Palatine
Palatine
Palatine
Palatine
Palatine
Palatine
Palos Heights
Pa

In [22]:
# Let's check the size of the resulting dataframe

print(chicagoland_venues.shape)
chicagoland_venues.head()

(23991, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Alsip,41.675861,-87.73189,Burrito Jalisco,41.674929,-87.739512,Mexican Restaurant
1,Alsip,41.675861,-87.73189,LA Fitness,41.673129,-87.740504,Gym / Fitness Center
2,Alsip,41.675861,-87.73189,Cooper's Hawk,41.691086,-87.740988,American Restaurant
3,Alsip,41.675861,-87.73189,Mariano's Fresh Market,41.692853,-87.741475,Grocery Store
4,Alsip,41.675861,-87.73189,Starbucks,41.69216,-87.740454,Coffee Shop


In [24]:
# Export the new Pandas Dataframe into a csv file for further exploration.

chicagoland_venues.to_csv('/Users/dustincremascoli/Documents/IBM Data Science Certification/Module 9/detail.csv')

In [25]:
# Let's check how many venues were returned for each neighborhood

chicagoland_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Addison,100,100,100,100,100,100
Alden,81,81,81,81,81,81
Algonquin,100,100,100,100,100,100
Alsip,100,100,100,100,100,100
Amf Ohare,100,100,100,100,100,100
...,...,...,...,...,...,...
Wood Dale,100,100,100,100,100,100
Woodridge,100,100,100,100,100,100
Woodstock,81,81,81,81,81,81
Worth,100,100,100,100,100,100


In [26]:
# Let's find out how many unique categories can be curated from all the returned venues

print('There are {} uniques categories.'.format(len(chicagoland_venues['Venue Category'].unique())))

There are 371 uniques categories.


#### Analyze Each Neighborhood

In [61]:
# one hot encoding
chicagoland_onehot = pd.get_dummies(chicagoland_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
chicagoland_onehot['Neighborhood'] = chicagoland_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [chicagoland_onehot.columns[-1]] + list(chicagoland_onehot.columns[:-1])
chicagoland_onehot = chicagoland_onehot[fixed_columns]

chicagoland_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,African Restaurant,Airport,...,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Alsip,0,0,0,0,...,0,0,0,0,0
1,Alsip,0,0,0,0,...,0,0,0,0,0
2,Alsip,0,0,0,0,...,0,0,0,0,0
3,Alsip,0,0,0,0,...,0,0,0,0,0
4,Alsip,0,0,0,0,...,0,0,0,0,0


In [62]:
chicagoland_onehot.shape

(23991, 372)

In [63]:
# Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

chicagoland_grouped = chicagoland_onehot.groupby('Neighborhood').mean().reset_index()
chicagoland_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,African Restaurant,Airport,...,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Addison,0.000000,0.00,0.0,0.000000,...,0.01,0.00,0.00,0.0,0.0
1,Alden,0.000000,0.00,0.0,0.000000,...,0.00,0.00,0.00,0.0,0.0
2,Algonquin,0.000000,0.00,0.0,0.000000,...,0.02,0.00,0.00,0.0,0.0
3,Alsip,0.010000,0.00,0.0,0.000000,...,0.01,0.00,0.00,0.0,0.0
4,Amf Ohare,0.000000,0.01,0.0,0.000000,...,0.00,0.01,0.01,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
194,Wood Dale,0.000000,0.00,0.0,0.000000,...,0.02,0.00,0.00,0.0,0.0
195,Woodridge,0.000000,0.00,0.0,0.000000,...,0.00,0.00,0.00,0.0,0.0
196,Woodstock,0.000000,0.00,0.0,0.000000,...,0.00,0.00,0.00,0.0,0.0
197,Worth,0.000000,0.00,0.0,0.000000,...,0.00,0.00,0.00,0.0,0.0


In [64]:
chicagoland_grouped.shape

(199, 372)

In [65]:
# Let's print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in chicagoland_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = chicagoland_grouped[chicagoland_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Addison----
                venue  freq
0  Mexican Restaurant  0.07
1         Coffee Shop  0.06
2         Pizza Place  0.06
3      Sandwich Place  0.04
4       Hot Dog Joint  0.04


----Alden----
                  venue  freq
0    Mexican Restaurant  0.06
1  Fast Food Restaurant  0.06
2           Pizza Place  0.05
3        Sandwich Place  0.05
4   American Restaurant  0.04


----Algonquin----
                venue  freq
0      Sandwich Place  0.05
1         Coffee Shop  0.05
2      Breakfast Spot  0.04
3  Italian Restaurant  0.04
4          Restaurant  0.03


----Alsip----
                  venue  freq
0  Fast Food Restaurant  0.09
1           Pizza Place  0.06
2    Italian Restaurant  0.06
3    Mexican Restaurant  0.05
4   American Restaurant  0.04


----Amf Ohare----
                venue  freq
0         Coffee Shop  0.07
1               Hotel  0.06
2  Mexican Restaurant  0.05
3          Steakhouse  0.04
4     Airport Service  0.04


----Antioch----
                 venue  freq
0

4           Golf Course  0.04


----Forest Park----
                  venue  freq
0                  Park  0.07
1  Fast Food Restaurant  0.06
2         Grocery Store  0.06
3    Mexican Restaurant  0.05
4        Breakfast Spot  0.05


----Fort Sheridan----
                 venue  freq
0          Coffee Shop  0.07
1   Mexican Restaurant  0.04
2  American Restaurant  0.04
3        Hot Dog Joint  0.04
4       Breakfast Spot  0.04


----Fox Lake----
                 venue  freq
0                  Bar  0.10
1  American Restaurant  0.10
2          Pizza Place  0.09
3    Convenience Store  0.04
4                 Bank  0.04


----Fox River Grove----
                  venue  freq
0  Fast Food Restaurant  0.07
1        Sandwich Place  0.07
2    Italian Restaurant  0.05
3           Pizza Place  0.05
4           Coffee Shop  0.04


----Fox Valley----
                venue  freq
0  Mexican Restaurant  0.05
1      Ice Cream Shop  0.05
2  Italian Restaurant  0.04
3      Breakfast Spot  0.04
4         

                 venue  freq
0        Grocery Store  0.06
1          Pizza Place  0.06
2              Exhibit  0.05
3   Mexican Restaurant  0.05
4  American Restaurant  0.04


----Lafox----
           venue  freq
0    Pizza Place  0.10
1            Bar  0.10
2  Garden Center  0.05
3    Golf Course  0.05
4   Liquor Store  0.05


----Lake Bluff----
                 venue  freq
0  American Restaurant  0.09
1       Sandwich Place  0.08
2   Mexican Restaurant  0.05
3          Coffee Shop  0.05
4          Pizza Place  0.04


----Lake Forest----
                 venue  freq
0          Coffee Shop  0.09
1       Sandwich Place  0.06
2  American Restaurant  0.06
3   Mexican Restaurant  0.04
4          Pizza Place  0.04


----Lake Villa----
                  venue  freq
0    Mexican Restaurant  0.07
1           Pizza Place  0.07
2  Fast Food Restaurant  0.07
3        Sandwich Place  0.05
4  Gym / Fitness Center  0.05


----Lake Zurich----
                venue  freq
0         Pizza Place  0.04
1 

4  Grocery Store  0.07


----Plato Center----
                        venue  freq
0                        Park  0.33
1                         Bar  0.13
2          Light Rail Station  0.13
3            Volleyball Court  0.07
4  Construction & Landscaping  0.07


----Posen----
                  venue  freq
0           Pizza Place  0.07
1                   Bar  0.07
2    Mexican Restaurant  0.07
3  Fast Food Restaurant  0.06
4         Grocery Store  0.06


----Prospect Heights----
                venue  freq
0       Grocery Store  0.08
1         Pizza Place  0.07
2  Mexican Restaurant  0.06
3  Italian Restaurant  0.05
4              Bakery  0.04


----Richmond----
                 venue  freq
0                 Park  0.09
1        Grocery Store  0.06
2       Breakfast Spot  0.06
3  American Restaurant  0.06
4          Pizza Place  0.06


----Richton Park----
                  venue  freq
0  Fast Food Restaurant  0.06
1        Sandwich Place  0.05
2              Pharmacy  0.05
3        Di

                venue  freq
0  Mexican Restaurant  0.06
1                Park  0.05
2      Breakfast Spot  0.04
3      Ice Cream Shop  0.04
4         Pizza Place  0.04


----Wheeling----
                venue  freq
0         Pizza Place  0.08
1  Italian Restaurant  0.06
2  Mexican Restaurant  0.06
3                Park  0.05
4       Grocery Store  0.04


----Willow Springs----
                 venue  freq
0  American Restaurant  0.06
1          Coffee Shop  0.04
2           Donut Shop  0.04
3   Italian Restaurant  0.03
4       Clothing Store  0.03


----Wilmette----
                 venue  freq
0          Coffee Shop  0.07
1  American Restaurant  0.07
2               Bakery  0.05
3                Beach  0.04
4                 Park  0.04


----Winfield----
                  venue  freq
0           Pizza Place  0.09
1                  Park  0.06
2         Grocery Store  0.05
3  Fast Food Restaurant  0.05
4        Breakfast Spot  0.04


----Winnetka----
                 venue  freq
0     

In [66]:
# Let's put that into a pandas dataframe
# First, let's write a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [67]:
# Now let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = chicagoland_grouped['Neighborhood']

for ind in np.arange(chicagoland_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(chicagoland_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Addison,Mexican Restaurant,Coffee Shop,Pizza Place,Hot Dog Joint,...,Breakfast Spot,Italian Restaurant,Bar,Bakery,Convenience Store
1,Alden,Mexican Restaurant,Fast Food Restaurant,Sandwich Place,Pizza Place,...,Bar,Gas Station,Construction & Landscaping,Park,Chinese Restaurant
2,Algonquin,Coffee Shop,Sandwich Place,Italian Restaurant,Breakfast Spot,...,Pet Store,Pizza Place,Grocery Store,Convenience Store,Restaurant
3,Alsip,Fast Food Restaurant,Pizza Place,Italian Restaurant,Mexican Restaurant,...,Grocery Store,Park,Pub,Donut Shop,Coffee Shop
4,Amf Ohare,Coffee Shop,Hotel,Mexican Restaurant,Steakhouse,...,American Restaurant,Snack Place,Tea Room,Clothing Store,Airport Lounge


In [68]:
# Export the new Pandas Dataframe into a csv file for further exploration.

neighborhoods_venues_sorted.to_csv('/Users/dustincremascoli/Documents/IBM Data Science Certification/Module 9/nvs.csv')

#### 4. Cluster Neighborhoods
#### Run k-means to cluster the neighborhood into 5 clusters.

In [69]:
# set number of clusters
kclusters = 5

chicagoland_grouped_clustering = chicagoland_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(chicagoland_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 0, 4, 0, 4, 0, 4, 4, 4, 4], dtype=int32)

In [70]:
# Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

chicagoland_merged = dfs_coord

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
chicagoland_merged = chicagoland_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

chicagoland_merged.head() # check the last columns!

Unnamed: 0,Zip,Neighborhood,State,Latitude,Longitude,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,60803,Alsip,IL,41.675861,-87.73189,...,Grocery Store,Park,Pub,Donut Shop,Coffee Shop
1,60666,Amf Ohare,IL,41.968029,-87.891214,...,American Restaurant,Snack Place,Tea Room,Clothing Store,Airport Lounge
2,60004,Arlington Heights,IL,42.108428,-87.97723,...,Pizza Place,Bakery,Park,Taco Place,Sushi Restaurant
3,60005,Arlington Heights,IL,42.069327,-87.98464,...,Pizza Place,Bakery,Park,Taco Place,Sushi Restaurant
4,60006,Arlington Heights,IL,41.811929,-87.68732,...,Pizza Place,Bakery,Park,Taco Place,Sushi Restaurant


In [58]:
# Export the new Pandas Dataframe into a csv file for further exploration.

chicagoland_merged.to_csv('/Users/dustincremascoli/Documents/IBM Data Science Certification/Module 9/cluster_merged.csv')

In [49]:
address = 'Chicago, IL'

geolocator = Nominatim(user_agent="chicago_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Chicagoland are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Chicagoland are 41.8755616, -87.6244212.


In [50]:
# Finally, let's visualize the resulting clusters

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(chicagoland_merged['Latitude'], chicagoland_merged['Longitude'], chicagoland_merged['Neighborhood'], chicagoland_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Examine Clusters
#### Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

In [57]:
# Cluster 1

chicagoland_merged.loc[chicagoland_merged['Cluster Labels'] == 0, chicagoland_merged.columns[[1] + list(range(5, chicagoland_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Timezone,Daylight savings time flag,geopoint,County,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alsip,-6,1,"41.675861,-87.73189",Cook,...,Grocery Store,Park,Pub,Donut Shop,Coffee Shop
9,Blue Island,-6,1,"41.656592,-87.68154",Cook,...,Pizza Place,American Restaurant,Italian Restaurant,Supermarket,Breakfast Spot
10,Bridgeview,-6,1,"41.742432,-87.80678",Cook,...,Mexican Restaurant,Ice Cream Shop,Sandwich Place,Breakfast Spot,Convenience Store
14,Calumet City,-6,1,"41.614188,-87.54638",Cook,...,Grocery Store,American Restaurant,Pizza Place,Seafood Restaurant,Restaurant
19,Country Club Hills,-6,1,"41.561134,-87.72398",Cook,...,BBQ Joint,Mexican Restaurant,Breakfast Spot,Pizza Place,Donut Shop
24,Dolton,-6,1,"41.626839,-87.59865",Cook,...,Food,Pharmacy,Discount Store,Donut Shop,Caribbean Restaurant
41,Glenwood,-6,1,"41.544584,-87.61289",Cook,...,Ice Cream Shop,Pharmacy,American Restaurant,Discount Store,Fried Chicken Joint
44,Harvey,-6,1,"41.609078,-87.66264",Cook,...,Bar,Lounge,Sandwich Place,Mexican Restaurant,Hot Dog Joint
46,Hazel Crest,-6,1,"41.574034,-87.67857",Cook,...,Golf Course,Sandwich Place,Supermarket,Donut Shop,BBQ Joint
47,Hickory Hills,-6,1,"41.723782,-87.82825",Cook,...,Pizza Place,Breakfast Spot,Ice Cream Shop,BBQ Joint,Coffee Shop


In [53]:
# Cluster 2

chicagoland_merged.loc[chicagoland_merged['Cluster Labels'] == 1, chicagoland_merged.columns[[1] + list(range(5, chicagoland_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Timezone,Daylight savings time flag,geopoint,County,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
243,Russell,-6,1,"42.322814,-87.610053",Lake,...,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field


In [54]:
# Cluster 3

chicagoland_merged.loc[chicagoland_merged['Cluster Labels'] == 2, chicagoland_merged.columns[[1] + list(range(5, chicagoland_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Timezone,Daylight savings time flag,geopoint,County,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
151,Dement,-6,1,"41.964197,-88.951205",Dupage,...,Fish & Chips Shop,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant


In [55]:
# Cluster 1

chicagoland_merged.loc[chicagoland_merged['Cluster Labels'] == 3, chicagoland_merged.columns[[1] + list(range(5, chicagoland_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Timezone,Daylight savings time flag,geopoint,County,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
179,Virgil,-6,1,"41.908736,-88.59915",Dupage,...,Restaurant,Fast Food Restaurant,Construction & Landscaping,Park,Pizza Place
208,Maple Park,-6,1,"41.91842,-88.57626",Kane,...,Restaurant,American Restaurant,Farm,Event Space,Exhibit
211,Plato Center,-6,1,"42.025776,-88.425931",Kane,...,Pet Store,Construction & Landscaping,Volleyball Court,Farm,Food & Drink Shop
261,Hebron,-6,1,"42.468318,-88.43125",Mchenry,...,Border Crossing,Camera Store,Park,Home Service,Filipino Restaurant


In [56]:
# Cluster 1

chicagoland_merged.loc[chicagoland_merged['Cluster Labels'] == 4, chicagoland_merged.columns[[1] + list(range(5, chicagoland_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Timezone,Daylight savings time flag,geopoint,County,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Amf Ohare,-6,1,"41.968029,-87.891214",Cook,...,American Restaurant,Snack Place,Tea Room,Clothing Store,Airport Lounge
2,Arlington Heights,-6,1,"42.108428,-87.97723",Cook,...,Pizza Place,Bakery,Park,Taco Place,Sushi Restaurant
3,Arlington Heights,-6,1,"42.069327,-87.98464",Cook,...,Pizza Place,Bakery,Park,Taco Place,Sushi Restaurant
4,Arlington Heights,-6,1,"41.811929,-87.68732",Cook,...,Pizza Place,Bakery,Park,Taco Place,Sushi Restaurant
5,Bedford Park,-6,1,"41.811929,-87.68732",Cook,...,Hot Dog Joint,Italian Restaurant,Liquor Store,Seafood Restaurant,Park
...,...,...,...,...,...,...,...,...,...,...,...
257,Crystal Lake,-6,1,"42.226623,-88.33066",Mchenry,...,Grocery Store,Coffee Shop,Sandwich Place,Ice Cream Shop,Hot Dog Joint
258,Crystal Lake,-6,1,"42.324761,-88.452481",Mchenry,...,Grocery Store,Coffee Shop,Sandwich Place,Ice Cream Shop,Hot Dog Joint
263,Lake in the Hills,-6,1,"42.185733,-88.348484",Mchenry,...,Pizza Place,Grocery Store,Pet Store,Clothing Store,Sandwich Place
267,Richmond,-6,1,"42.464639,-88.3028",Mchenry,...,Baseball Field,Fast Food Restaurant,Gift Shop,Sandwich Place,German Restaurant
