In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

import geocoder
import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library
print('Libraries imported.')

Libraries imported.


### Getting Illinois map location from csv file

In [2]:
il_data = pd.read_csv('dataset/illinois.csv')

In [3]:
il_data.head()

Unnamed: 0,Suburbs,Latitude,Longitude
0,Addison,41.93407,-87.9935
1,Algonquin,42.16439,-88.32686
2,Antioch,42.47898,-88.09638
3,Arlington Heights,42.08729,-87.98044
4,Aurora,41.7571,-88.31613


In [4]:
address = 'Chicago Ridge, Illinois'
#geopy
#User_Agent is an http request header that is sent with each request.
geolocator = Nominatim(user_agent="chicago_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(address,latitude, longitude))

The geograpical coordinate of Chicago Ridge, Illinois are 41.7014217, -87.7792196.


#### Mapping Neighborhood

In [16]:
neighbor_loc = il_data.loc[0,'Suburbs']
print(neighbor_loc)
neighbor_lat = il_data.loc[0, 'Latitude']
print(neighbor_lat)
neighbor_long = il_data.loc[0, 'Longitude']
print(neighbor_long)

Addison
41.93407000000008
-87.99349999999998


In [17]:
# create map of Illinois using latitude and longitude values
map_illinois = folium.Map(location=[latitude, longitude], zoom_start=9)

# add markers to map
for label, lat, lng in zip(il_data['Suburbs'], il_data['Latitude'], il_data['Longitude']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_illinois)  
    
map_illinois

In [18]:
CLIENT_ID = 'ZP1LBGI24V2QOLYOS25V4ECL22VWRATVFQZHFFIBFUCX1FX4' #  Foursquare ID
CLIENT_SECRET = '5TJC3YVSBFPHF40TQPIRYSY3IZ2EIR0YVIPO5NLNLJ2Z40IC' #  Foursquare Secret
VERSION = '20201303' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZP1LBGI24V2QOLYOS25V4ECL22VWRATVFQZHFFIBFUCX1FX4
CLIENT_SECRET:5TJC3YVSBFPHF40TQPIRYSY3IZ2EIR0YVIPO5NLNLJ2Z40IC


In [19]:
LIMIT = 100
radius = 500
##using api to get data
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighbor_lat, neighbor_long, radius, LIMIT)


In [20]:
url

'https://api.foursquare.com/v2/venues/explore?&client_id=ZP1LBGI24V2QOLYOS25V4ECL22VWRATVFQZHFFIBFUCX1FX4&client_secret=5TJC3YVSBFPHF40TQPIRYSY3IZ2EIR0YVIPO5NLNLJ2Z40IC&v=20201303&ll=41.93407000000008,-87.99349999999998&radius=500&limit=100'

In [21]:
results = requests.get(url).json()

In [22]:
print(results)

{'meta': {'code': 200, 'requestId': '6113f2ba8136f3203affbf1e'}, 'response': {'suggestedFilters': {'header': 'Tap to show:', 'filters': [{'name': 'Open now', 'key': 'openNow'}, {'name': '$-$$$$', 'key': 'price'}]}, 'headerLocation': 'Addison', 'headerFullLocation': 'Addison', 'headerLocationGranularity': 'city', 'totalResults': 19, 'suggestedBounds': {'ne': {'lat': 41.93857000450008, 'lng': -87.98746220440565}, 'sw': {'lat': 41.929569995500074, 'lng': -87.99953779559432}}, 'groups': [{'type': 'Recommended Places', 'name': 'recommended', 'items': [{'reasons': {'count': 0, 'items': [{'summary': 'This spot is popular', 'type': 'general', 'reasonName': 'globalInteractionReason'}]}, 'venue': {'id': '4b44c97ff964a5202cfc25e3', 'name': "Portillo's", 'location': {'address': '100 W Lake St', 'crossStreet': 'btwn Addison Rd & Lincoln Ave', 'lat': 41.93367355289031, 'lng': -87.99147069454193, 'labeledLatLngs': [{'label': 'display', 'lat': 41.93367355289031, 'lng': -87.99147069454193}, {'label': '

# get all nearby venues in Illinois

In [35]:
def get_nearby(names, latitudes, longitudes, radius = 500):
    venues_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        # created the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        # make the GET request from api
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'],
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
    #created dataframes
    nearby_venues = pd.DataFrame([item for venue in venues_list for item in venue])
    nearby_venues.columns = ['Suburbs', 
                  'Suburb Latitude', 
                  'Suburb Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
    

In [52]:
# type your answer here
il_venues = get_nearby(names = il_data['Suburbs'], latitudes = il_data['Latitude'], longitudes = il_data['Longitude'])

In [53]:
il_venues

Unnamed: 0,Suburbs,Suburb Latitude,Suburb Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Addison,41.93407,-87.99350,Portillo's,41.933674,-87.991471,Hot Dog Joint
1,Addison,41.93407,-87.99350,Lou Malnati's Pizzeria,41.934673,-87.991551,Pizza Place
2,Addison,41.93407,-87.99350,Pyramid Club,41.935948,-87.994557,Bar
3,Addison,41.93407,-87.99350,Uncle Harry's Old Fashioned Ice Cream & Coffee,41.933732,-87.990793,Ice Cream Shop
4,Addison,41.93407,-87.99350,La Magdalena,41.932218,-87.988934,Mexican Restaurant
...,...,...,...,...,...,...,...
2901,Winnetka,42.10581,-87.73367,Tocco,42.105038,-87.734985,Italian Restaurant
2902,Winnetka,42.10581,-87.73367,Love's Yogurt,42.105337,-87.731633,Sandwich Place
2903,Winnetka,42.10581,-87.73367,Bella's Day Spa,42.104902,-87.734817,Spa
2904,Winnetka,42.10581,-87.73367,Winnetka Fitness Center,42.108139,-87.733253,Gym / Fitness Center


#### Analysis of these venues:
##### Since we have categorical values, that needs to be converted into numerical values 
##### <br>  applying one-hot encoding on 'Venue Category'


In [54]:
#one hot encoding
il_onehot = pd.get_dummies(il_venues['Venue Category'], prefix = "", prefix_sep = "")

In [55]:
il_venues['Suburbs'].head(5)

0    Addison
1    Addison
2    Addison
3    Addison
4    Addison
Name: Suburbs, dtype: object

In [56]:
#add suburbs column back to dataframe
il_onehot['Suburbs'] = il_venues['Suburbs']


In [57]:
fixed_columns = [il_onehot.columns[-1]] + list(il_onehot.columns[:-1])
print(fixed_columns)

['Suburbs', 'ATM', 'Accessories Store', 'Adult Boutique', 'American Restaurant', 'Antique Shop', 'Arcade', 'Argentinian Restaurant', 'Art Gallery', 'Art Museum', 'Arts & Crafts Store', 'Arts & Entertainment', 'Asian Restaurant', 'Athletics & Sports', 'Automotive Shop', 'BBQ Joint', 'Baby Store', 'Bagel Shop', 'Bakery', 'Bank', 'Bar', 'Baseball Field', 'Basketball Court', 'Beach', 'Beer Bar', 'Beer Garden', 'Beer Store', 'Big Box Store', 'Bistro', 'Bookstore', 'Botanical Garden', 'Boutique', 'Bowling Alley', 'Brazilian Restaurant', 'Breakfast Spot', 'Brewery', 'Bridal Shop', 'Buffet', 'Burger Joint', 'Burrito Place', 'Bus Station', 'Bus Stop', 'Business Service', 'Cable Car', 'Café', 'Cajun / Creole Restaurant', 'Camera Store', 'Campground', 'Candy Store', 'Car Wash', 'Caribbean Restaurant', 'Casino', 'Cheese Shop', 'Chinese Restaurant', 'Chiropractor', 'Chocolate Shop', 'Church', 'City', 'City Hall', 'Clothing Store', 'Cocktail Bar', 'Coffee Shop', 'Comedy Club', 'Comfort Food Restaura

In [58]:
il_onehot = il_onehot[fixed_columns]

In [59]:
il_onehot.head()

Unnamed: 0,Suburbs,ATM,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,...,Video Store,Vietnamese Restaurant,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio
0,Addison,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Addison,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Addison,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Addison,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Addison,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
#get dummies method
il_grouped = il_onehot.groupby('Suburbs').mean().reset_index()

In [61]:
il_grouped

Unnamed: 0,Suburbs,ATM,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,...,Video Store,Vietnamese Restaurant,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio
0,Addison,0.0,0.0,0.0,0.052632,0.000000,0.0,0.0,0.0,0.0,...,0.052632,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1,Antioch,0.0,0.0,0.0,0.086957,0.000000,0.0,0.0,0.0,0.0,...,0.043478,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,Arlington Heights,0.0,0.0,0.0,0.058824,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3,Aurora,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,Bannockburn,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,Willow Springs,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
183,"Willowbrook, DuPage County",0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
184,Wilmette,0.0,0.0,0.0,0.045455,0.000000,0.0,0.0,0.0,0.0,...,0.022727,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
185,Winfield,0.0,0.0,0.0,0.000000,0.090909,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [62]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [66]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Suburbs']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Suburbs'] = il_grouped['Suburbs']

for ind in np.arange(il_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(il_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Suburbs,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Addison,Mexican Restaurant,Pizza Place,Ice Cream Shop,Spa,Supermarket,Gift Shop,Bar,Discount Store,Video Store,Thrift / Vintage Store
1,Antioch,Bar,American Restaurant,Italian Restaurant,Asian Restaurant,Mexican Restaurant,Coffee Shop,Cocktail Bar,Clothing Store,Video Store,Donut Shop
2,Arlington Heights,Sandwich Place,Coffee Shop,Train Station,Breakfast Spot,Mexican Restaurant,Shipping Store,Thai Restaurant,Farmers Market,Park,Café
3,Aurora,Mexican Restaurant,Sandwich Place,Pub,Pizza Place,Theater,Bakery,Café,Brazilian Restaurant,Casino,Financial or Legal Service
4,Bannockburn,Asian Restaurant,ATM,Nail Salon,Optical Shop,Office,Noodle House,Nightclub,New American Restaurant,Music Venue,Other Repair Shop


In [67]:
neighborhoods_venues_sorted.describe()

Unnamed: 0,Suburbs,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
count,187,187,187,187,187,187,187,187,187,187,187
unique,187,72,84,91,89,94,83,85,83,75,85
top,Bridgeview,Mexican Restaurant,ATM,Pizza Place,Optical Shop,ATM,Office,Noodle House,Nightclub,New American Restaurant,Noodle House
freq,1,14,9,10,10,9,16,16,16,17,15


In [68]:
res_il = il_grouped[["Suburbs", "Café"]]

In [69]:
res_il

Unnamed: 0,Suburbs,Café
0,Addison,0.000000
1,Antioch,0.000000
2,Arlington Heights,0.058824
3,Aurora,0.038462
4,Bannockburn,0.000000
...,...,...
182,Willow Springs,0.000000
183,"Willowbrook, DuPage County",0.000000
184,Wilmette,0.022727
185,Winfield,0.000000


In [70]:
# set number of clusters
kclusters = 3

il_grouped_clustering = res_il.drop('Suburbs', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(il_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0])

In [71]:
il_data

Unnamed: 0,Suburbs,Latitude,Longitude
0,Addison,41.93407,-87.99350
1,Algonquin,42.16439,-88.32686
2,Antioch,42.47898,-88.09638
3,Arlington Heights,42.08729,-87.98044
4,Aurora,41.75710,-88.31613
...,...,...,...
188,Willow Springs,41.73633,-87.86978
189,"Willowbrook, DuPage County",41.74854,-87.94084
190,Wilmette,42.07568,-87.70765
191,Winfield,41.87050,-88.15717


In [72]:
# add clustering labels
res_il.insert(0, 'Cluster Labels', kmeans.labels_)

In [73]:
il_merged = il_data[il_data['Suburbs'].isin(res_il.Suburbs)].reset_index()

In [74]:
# merge melb_grouped with melb_data to add latitude/longitude for each neighborhood
il_merged = il_merged.join(res_il.set_index('Suburbs'), on='Suburbs')
il_merged["Cluster Labels"].dropna(inplace = True)

In [76]:
il_merged

Unnamed: 0,index,Suburbs,Latitude,Longitude,Cluster Labels,Café
0,0,Addison,41.93407,-87.99350,1,0.000000
1,2,Antioch,42.47898,-88.09638,1,0.000000
2,3,Arlington Heights,42.08729,-87.98044,2,0.058824
3,4,Aurora,41.75710,-88.31613,2,0.038462
4,5,Bannockburn,42.19420,-87.86726,1,0.000000
...,...,...,...,...,...,...
182,188,Willow Springs,41.73633,-87.86978,1,0.000000
183,189,"Willowbrook, DuPage County",41.74854,-87.94084,1,0.000000
184,190,Wilmette,42.07568,-87.70765,1,0.022727
185,191,Winfield,41.87050,-88.15717,1,0.000000


In [78]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=9)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(il_merged['Latitude'], il_merged['Longitude'], il_merged['Suburbs'], il_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [80]:
il_merged.loc[il_merged['Cluster Labels'] == 0, il_merged.columns[[1] + list(range(5, il_merged.shape[1]))]]

Unnamed: 0,Suburbs,Café
74,Hickory Hills,0.090909
80,Hoffman Estates,0.166667
186,Winnetka,0.1


In [81]:
il_merged.loc[il_merged['Cluster Labels'] == 1, il_merged.columns[[1] + list(range(5, il_merged.shape[1]))]]

Unnamed: 0,Suburbs,Café
0,Addison,0.000000
1,Antioch,0.000000
4,Bannockburn,0.000000
5,Barrington Hills,0.000000
6,Barrington,0.000000
...,...,...
181,Wheeling,0.000000
182,Willow Springs,0.000000
183,"Willowbrook, DuPage County",0.000000
184,Wilmette,0.022727


In [82]:
il_merged.loc[il_merged['Cluster Labels'] == 2, il_merged.columns[[1] + list(range(5, il_merged.shape[1]))]]

Unnamed: 0,Suburbs,Café
2,Arlington Heights,0.058824
3,Aurora,0.038462
20,Brookfield,0.076923
44,Deerfield,0.05
64,Geneva,0.033898
65,Glen Ellyn,0.033333
66,Glencoe,0.055556
73,Harwood Heights,0.025
91,Lake Bluff,0.0625
123,Northbrook,0.05


Color code = Red  : Cluster 0 = Suburbs with low numbers of cafe<br>
Color Code = Purple : Cluster 1 = Suburbs with moderate number of cafe<br>
Color Code = Cyan : Cluster 2 = Subutbs with high concentration of cafe

### References:<br>
https://developer.foursquare.com/docs/api-reference/venues/explore/  <br>
https://developer.foursquare.com/docs/places-api/versioning/ <br>
https://www.kaggle.com/kristoft/tutorial-foursquare-api-search<br>
https://www.linkedin.com/pulse/finding-surrounding-venues-before-buying-home-tony-chow/<br>
### Api generated
https://api.foursquare.com/v2/venues/explore?&client_id=ZP1LBGI24V2QOLYOS25V4ECL22VWRATVFQZHFFIBFUCX1FX4&client_secret=5TJC3YVSBFPHF40TQPIRYSY3IZ2EIR0YVIPO5NLNLJ2Z40IC&v=20201303&ll=41.93407000000008,-87.99349999999998&radius=500&limit=100
