### 1. Import and download all the dependencies

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    openssl-1.0.2p             |    h14c3975_1002         3.1 MB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.2 MB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0         conda-forge
    geopy:         1.18.1-py_0       conda-forge

The following packages will be UPDATED:

    openssl:       1.0.2p-h470a237_2 conda-forge --> 1.0.2p-h14c3975_1002 conda-forge


Downloading and Extracting Packages
geopy-1.18.1         | 51 KB     | ##########

<a id='item1'></a>

### 2. Load the data about the neighborhoods with a latitude and longitude from CSV (Toronto_PostalCode_Coord.csv)

In [3]:
#toronto_neighborhood - dataframe with info about the neighborhoods with a latitude and longitude
toronto_neighborhoods = pd.read_csv('Toronto_PostalCode_Coord.csv')
toronto_neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


### 3. Use geopy library to get the latitude and longitude values of Toronto

In [4]:
toronto_address = 'Toronto, Ontario'

toronto_geolocator = Nominatim(user_agent="my-application")
toronto_location = toronto_geolocator.geocode(toronto_address)
toronto_latitude = toronto_location.latitude
toronto_longitude = toronto_location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(toronto_latitude, toronto_longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### 4. Define Foursquare Credentials and Version

In [5]:
CLIENT_ID = 'your Foursquare ID' # your Foursquare ID
CLIENT_SECRET = 'your Foursquare Secret' # your Foursquare Secret
VERSION = '20190117' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
LIMIT = 1000 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius

Your credentails:
CLIENT_ID: S3WVYVZ5F4THYYPKZSFLY04XLQ4KLKNNCQRVURBB40FAFFXJ
CLIENT_SECRET:G12FWVMWYJ1FOAKX5DTQZFYU5JQ3BKJTLGJ2OREUWR3FBIFB


### 5. A function to ge dattaframe with information about venues by given coordinates and category from foursquare.com

In [6]:
def getNearbyVenues(names, latitudes, longitudes, category = "", radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            category)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


### 6. Get all venues in Toronto

In [7]:
toronto_all_venues = getNearbyVenues(names=toronto_neighborhoods['Neighborhood'],
                                   latitudes=toronto_neighborhoods['Latitude'],
                                   longitudes=toronto_neighborhoods['Longitude']
                                  )

### 7. Get asiatische venues in Toronto

In [8]:
toronto_asia_venues = getNearbyVenues(names=toronto_neighborhoods['Neighborhood'],
                                   latitudes=toronto_neighborhoods['Latitude'],
                                   longitudes=toronto_neighborhoods['Longitude'],
                                   category = "4bf58dd8d48988d142941735"
                                  )

### 8. Let's check how many venues were returned for each neighborhood

In [9]:
# all venues
toronto_all_venues_count = toronto_all_venues.groupby('Neighborhood').count()['Neighborhood Latitude'].reset_index()
toronto_all_venues_count.rename(columns = {'Neighborhood Latitude':'Count_all_venues'}, inplace = True) 
toronto_all_venues_count

# asiatische venues
toronto_asia_venues_count = toronto_asia_venues.groupby('Neighborhood').count()['Neighborhood Latitude'].reset_index()
toronto_asia_venues_count.rename(columns = {'Neighborhood Latitude':'Count_asia_venues'}, inplace = True) 
toronto_asia_venues_count

# united information
toronto_venues_count = pd.merge(toronto_asia_venues_count, toronto_all_venues_count, on='Neighborhood')
toronto_venues_count.head()


Unnamed: 0,Neighborhood,Count_asia_venues,Count_all_venues
0,"Adelaide, King, Richmond",100,100
1,Agincourt,23,47
2,"Agincourt North, L'Amoreaux East, Milliken, St...",16,30
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",4,16
4,"Alderwood, Long Branch",4,26


<a id='item3'></a>

### 9. Get the attitudes asiatische venues to all venues

In [10]:
toronto_venues_count['Attitude'] = toronto_venues_count['Count_asia_venues'] / toronto_venues_count['Count_all_venues']
toronto_venues_count.head()

Unnamed: 0,Neighborhood,Count_asia_venues,Count_all_venues,Attitude
0,"Adelaide, King, Richmond",100,100,1.0
1,Agincourt,23,47,0.489362
2,"Agincourt North, L'Amoreaux East, Milliken, St...",16,30,0.533333
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",4,16,0.25
4,"Alderwood, Long Branch",4,26,0.153846


### 10. Sort the dataframe in ascending order and select the first 10 values

In [11]:
toronto_venues_count = toronto_venues_count.sort_values(by=['Attitude']).head(10).reset_index()
toronto_venues_count.drop(["index"], axis = 1, inplace = True) 
toronto_venues_count

Unnamed: 0,Neighborhood,Count_asia_venues,Count_all_venues,Attitude
0,Parkwoods,1,29,0.034483
1,Thorncliffe Park,2,45,0.044444
2,"The Junction North, Runnymede",2,41,0.04878
3,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",1,19,0.052632
4,York Mills West,1,16,0.0625
5,Business reply mail Processing Centre969 Eastern,3,47,0.06383
6,"The Beaches West, India Bazaar",5,78,0.064103
7,"Runnymede, Swansea",5,74,0.067568
8,"Lawrence Heights, Lawrence Manor",4,53,0.075472
9,Glencairn,3,39,0.076923


### 10. Get a dataframe with neighborhoods with coordinates for the 10 neighborhoods

In [12]:
list_n = toronto_venues_count['Neighborhood'].tolist()
toronto_neighborhoods_top_ten_venues = toronto_neighborhoods[toronto_neighborhoods['Neighborhood'].isin(list_n)].reset_index()
toronto_neighborhoods_top_ten_venues.drop(["index"], axis = 1, inplace = True) 
toronto_neighborhoods_top_ten_venues

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
2,M6B,North York,Glencairn,43.709577,-79.445073
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372
4,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
5,M6M,York,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",43.691116,-79.476013
6,M6N,York,"The Junction North, Runnymede",43.673185,-79.487262
7,M2P,North York,York Mills West,43.752758,-79.400049
8,M6S,West Toronto,"Runnymede, Swansea",43.651571,-79.48445
9,M7Y,East Toronto,Business reply mail Processing Centre969 Eastern,43.662744,-79.321558


### 11. Create a map of Toronto with the top 10 neighborhoods

In [13]:
# create map using latitude and longitude values
map_toronto = folium.Map(location=[toronto_latitude, toronto_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_neighborhoods_top_ten_venues['Latitude'], toronto_neighborhoods_top_ten_venues['Longitude'], toronto_neighborhoods_top_ten_venues['Borough'], toronto_neighborhoods_top_ten_venues['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### 12. Get a dataframe for clustering

In [14]:
toronto_venues_count_attitude = toronto_venues_count[['Neighborhood', 'Attitude']].copy()
toronto_top_ten = pd.merge(toronto_neighborhoods_top_ten_venues, toronto_venues_count_attitude, on='Neighborhood')
toronto_top_ten_clustering = toronto_top_ten.drop(["PostalCode","Borough","Neighborhood"], axis = 1) 
toronto_top_ten_clustering

Unnamed: 0,Latitude,Longitude,Attitude
0,43.753259,-79.329656,0.034483
1,43.718518,-79.464763,0.075472
2,43.709577,-79.445073,0.076923
3,43.705369,-79.349372,0.044444
4,43.668999,-79.315572,0.064103
5,43.691116,-79.476013,0.052632
6,43.673185,-79.487262,0.04878
7,43.752758,-79.400049,0.0625
8,43.651571,-79.48445,0.067568
9,43.662744,-79.321558,0.06383


### 13. Clustering

In [15]:
kclusters = 2
# run k-means clustering
toronto_kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_top_ten_clustering)
# check cluster labels generated for each row in the dataframe
toronto_kmeans.labels_ 

array([0, 1, 1, 0, 0, 1, 1, 0, 1, 0], dtype=int32)

### 14. Get a dataframe with the top 10 neighborhoods and cluster Labels

In [16]:
toronto_merged = toronto_top_ten
toronto_merged['Cluster Labels'] = toronto_kmeans.labels_
toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Attitude,Cluster Labels
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.034483,0
1,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,0.075472,1
2,M6B,North York,Glencairn,43.709577,-79.445073,0.076923,1
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372,0.044444,0
4,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0.064103,0
5,M6M,York,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",43.691116,-79.476013,0.052632,1
6,M6N,York,"The Junction North, Runnymede",43.673185,-79.487262,0.04878,1
7,M2P,North York,York Mills West,43.752758,-79.400049,0.0625,0
8,M6S,West Toronto,"Runnymede, Swansea",43.651571,-79.48445,0.067568,1
9,M7Y,East Toronto,Business reply mail Processing Centre969 Eastern,43.662744,-79.321558,0.06383,0


### 15. Let's visualize the resulting clusters

In [18]:
# create map
toronto_map_clusters = folium.Map(location=[toronto_latitude, toronto_longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(toronto_map_clusters)
       
toronto_map_clusters

<a id='item5'></a>