## Part 3

### Create map of neighborhoods in Toronto

In [27]:
import pandas as pd
import numpy as np
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import json # library to handle JSON files
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# import k-means from clustering stage
from sklearn.cluster import KMeans
import requests

### Use geopy library to get the latitude and longitude values of Toronto, Canada

In [16]:
nb_data = pd.read_csv('Toronto_Neighborhoods2.csv')
nb_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [17]:
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent='ny_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.6534817, -79.3839347.


### Visualizing the group map


In [26]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
# add markers to map
for lat, lng, neighborhood, borough,postalcode in zip(nb_data['Latitude'],nb_data['Longitude'],nb_data['Neighborhood'],nb_data['Borough'],nb_data['PostalCode']):
    label = '{},{},{}'.format(neighborhood, borough,postalcode)
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_Toronto) 
map_Toronto

### Explore and cluster the neighborhoods in Toronto

### Define Foursquare Credentials and Version

In [19]:
CLIENT_ID = 'BXZDLEBHVSDCTCOY3DUERFGQDXPUEVRDBECETJJJ0TR2C3WL' # your Foursquare ID
CLIENT_SECRET = '2VQ3CQF3KF245LL5WDW5HWZ0BREQUQDRRE3TA0GRHUP3NFBS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: BXZDLEBHVSDCTCOY3DUERFGQDXPUEVRDBECETJJJ0TR2C3WL
CLIENT_SECRET:2VQ3CQF3KF245LL5WDW5HWZ0BREQUQDRRE3TA0GRHUP3NFBS


In [20]:
nb_data.loc[0,"Neighborhood"]

'Rouge,Malvern'

In [21]:
neighborhood_latitude = nb_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = nb_data.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = nb_data.loc[0, 'Neighborhood'] # neighborhood name

In [22]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    

#Get the nearby Venues
def getNearbyVenues(names, latitudes, longitudes, postalcodes, radius=500):
    LIMIT = 100
    venues_list=[]
    for name, lat, lng, postalcode in zip(names, latitudes, longitudes, postalcodes):
        #print(postalcode)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            postalcode,
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'PostalCode',
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [23]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(nb_data['Borough'].unique()),
        nb_data.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [24]:
print("Unique boroughs in the dataframe:")
print(nb_data['Borough'].unique())

Unique boroughs in the dataframe:
['Scarborough' 'North York' 'East York' 'East Toronto' 'Central Toronto'
 'Downtown Toronto' 'York' 'West Toronto' "Queen's Park" 'Mississauga'
 'Etobicoke']


In [28]:
Toronto_venues = getNearbyVenues(names=nb_data['Neighborhood'],
                                   latitudes=nb_data['Latitude'],
                                   longitudes=nb_data['Longitude'],
                                   postalcodes=nb_data['PostalCode']
                                  )

### check the size of the resulting dataframe

In [29]:
print(Toronto_venues.shape)
Toronto_venues.head()

(2146, 8)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,PostalCode,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern",43.806686,-79.194353,M1B,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,M1C,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood,Morningside,West Hill",43.763573,-79.188711,M1E,RBC Royal Bank,43.76679,-79.191151,Bank
3,"Guildwood,Morningside,West Hill",43.763573,-79.188711,M1E,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood,Morningside,West Hill",43.763573,-79.188711,M1E,Sail Sushi,43.765951,-79.191275,Restaurant


### check how many venues were returned for each neighborhood

In [30]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,PostalCode,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100,100
Agincourt,4,4,4,4,4,4,4
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",2,2,2,2,2,2,2
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",8,8,8,8,8,8,8
"Alderwood,Long Branch",7,7,7,7,7,7,7
"Bathurst Manor,Downsview North,Wilson Heights",25,25,25,25,25,25,25
Bayview Village,4,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East",23,23,23,23,23,23,23
Berczy Park,57,57,57,57,57,57,57
"Birch Cliff,Cliffside West",5,5,5,5,5,5,5


### find out how many unique categories can be curated from all the returned venues

In [31]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 267 uniques categories.


## Analyze Each Neighborhood

In [32]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
Toronto_onehot.shape

(2146, 267)

### group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [34]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,"Adelaide,King,Richmond",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.010000,0.00000,0.0,0.010000,0.00,0.000000,0.00,0.000000,0.0,0.01
1,Agincourt,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00
4,"Alderwood,Long Branch",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00
5,"Bathurst Manor,Downsview North,Wilson Heights",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00
6,Bayview Village,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00
7,"Bedford Park,Lawrence Manor East",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.043478,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00
8,Berczy Park,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.017544,0.00,0.000000,0.00,0.000000,0.0,0.00
9,"Birch Cliff,Cliffside West",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,...,0.000000,0.00000,0.0,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.00


### confirm the new size

In [35]:
Toronto_grouped.shape

(99, 267)

### print each neighborhood along with the top 5 most common venues

In [37]:
num_top_venues = 5

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.05
2        Hotel  0.04
3          Bar  0.04
4          Gym  0.04


----Agincourt----
                       venue  freq
0             Breakfast Spot  0.25
1               Skating Rink  0.25
2  Latin American Restaurant  0.25
3                     Lounge  0.25
4   Mediterranean Restaurant  0.00


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                       venue  freq
0                       Park   0.5
1                 Playground   0.5
2             Medical Center   0.0
3         Miscellaneous Shop   0.0
4  Middle Eastern Restaurant   0.0


----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
                  venue  freq
0         Grocery Store  0.25
1  Fast Food Restaurant  0.12
2            Beer Store  0.12
3              Pharmacy  0.12
4           Pizza Place  0.12


----Alderwood,Long Branch----
            ve

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### put that into a pandas dataframe