In [1]:
import pandas as pd
import numpy as np

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt

import requests

# import k-means from clustering stage
from sklearn.cluster import KMeans

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import folium # map rendering library

In [2]:
df = pd.read_excel('uni_rental_data.xlsx')

df.head()

Unnamed: 0,University,Address,Postal Code,rental_lat,rental_long,Type,Price,Rental Name,uni_lat,uni_long,Distance (km)
0,University of Toronto,"Yonge St & Bloor St E, Toronto, M4W 0A8",M4W 0A8,43.6827,-79.373,10 Beds,$625,Rental #1,43.663462,-79.39776,3
1,University of Toronto,"St Clair Ave E & Midland Ave #Room, Toronto, M...",M1M 3E5,43.7247,-79.2312,Studio,$650,Rental #2,43.663462,-79.39776,15
2,University of Toronto,"Mosedale Crescent & Kingslake Rd, Toronto, M2J...",M2J 3A3,43.7801,-79.3479,1 Bed,$670,Rental #3,43.663462,-79.39776,14
3,University of Toronto,"Maywood Park & Garthwood Drive, Scarborough, O...",M1K 2H4,43.7298,-79.2639,Studio,$695,Rental #4,43.663462,-79.39776,13
4,University of Toronto,"135 Leeward Glenway, Toronto, M3C 2Z6",M3C 2Z6,43.7334,-79.3329,4 Beds,$750,Rental #5,43.663462,-79.39776,9


#### Use geopy library to get the latitude and longitude values of New York City.

In [3]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Create a map of Toronto with Universities superimposed on top.

In [4]:
latitude = 43.6534817
longitude = -79.3839347

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, uni in zip(df['uni_lat'], df['uni_long'], df['University']):
    label = '{}'.format(uni)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [5]:
uni_list = df['University'].unique()

def f(x):
    #filter the dataframe based on university chosen
    temp_df = df[df['University'] == x].reset_index()
    
    latitude = temp_df.loc[0,'uni_lat'] # get first latitude of university
    longitude = temp_df.loc[0,'uni_long'] # get first longitude of university
    
    # create map of Toronto using latitude and longitude values
    map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
    
    for lat, lng, uni in zip(temp_df['uni_lat'], temp_df['uni_long'], temp_df['University']):
        label = '{}'.format(uni)
        label = folium.Popup(label, parse_html=True)
        folium.Marker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)  
    
    return map_toronto

interact(f, x=uni_list);

interactive(children=(Dropdown(description='x', options=('University of Toronto', 'York University', 'Ryerson …

Perfect! Using IPython and Jupyter widget library, we are able to create a really interactive map that will change accordingly to the university that is chosen in the dropdown list. We will be building on top of this and add in other informations and locations such as the rental prices and locations.

More information on Jupyter Widgets here: https://ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html

In [6]:
uni_list = df['University'].unique()

def f(University):
    #filter the dataframe based on university chosen
    temp_df = df[df['University'] == University].reset_index()
    
    latitude = temp_df.loc[0,'uni_lat'] # get first latitude of university
    longitude = temp_df.loc[0,'uni_long'] # get first longitude of university
    
    # create map of Toronto using latitude and longitude values
    map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11.5)
    
    for lat, lng, uni in zip(temp_df['uni_lat'], temp_df['uni_long'], temp_df['University']):
        label = f'{uni}'
        label = folium.Popup(label, parse_html=True)
        folium.Marker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)  
        
        
    # plotting every coordinates for rentals in the chosen university    
    for lat, long, price, dist, style, name in zip(temp_df['rental_lat'], temp_df['rental_long'], temp_df['Price'], temp_df['Distance (km)'], temp_df['Type'], temp_df['Rental Name']):
        label = f'Rental Name: {name}, \nRental: {price} \nType: {style} \nDistance: {dist}km'
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, long],
            color='red',
            fill=True,
            fill_color='#FF0000',
            fill_opacity=0.7,
            popup= label).add_to(map_toronto)
    
    
    return map_toronto

interact(f, University=uni_list); # plot a dropdown list and update the folium map accordingly

interactive(children=(Dropdown(description='University', options=('University of Toronto', 'York University', …

#### Now, let's get the top 20 venues that are within a radius of 500 meters of the rental houses.

In [7]:
CLIENT_ID = 'RBK5NLX0IKYVBD13RVL30ORMSSMYZDKJYB12WYZCJ3VS33GQ' # your Foursquare ID
CLIENT_SECRET = 'WMYCE0MY0FE3VFVAOGFE3HFVUTZPGFNKLW3BJIILN2QN1GVU' # your Foursquare Secret
VERSION = '20200705' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

rental_latitude = 43.6827 # the chosen rental place
rental_longitude = -79.3730 # the chosen rental place

LIMIT = 20 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    rental_latitude, 
    rental_longitude, 
    radius, 
    LIMIT)


Your credentails:
CLIENT_ID: RBK5NLX0IKYVBD13RVL30ORMSSMYZDKJYB12WYZCJ3VS33GQ
CLIENT_SECRET:WMYCE0MY0FE3VFVAOGFE3HFVUTZPGFNKLW3BJIILN2QN1GVU


In [8]:
# results['response'].keys()

## 2. Explore the Rental Place

#### Let's create a function to repeat the same process to all the rentals in Toronto

In [9]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 20):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Rental Name', 
                  'Rental Latitude', 
                  'Rental Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [10]:
# rental_venues = getNearbyVenues(names=df['Rental Name'],
#                                    latitudes=df['rental_lat'],
#                                    longitudes=df['rental_long']
#                                   )

Rental #1
Rental #2
Rental #3
Rental #4
Rental #5
Rental #6
Rental #7
Rental #8
Rental #9
Rental #10
Rental #11
Rental #12
Rental #13
Rental #14
Rental #15
Rental #16
Rental #17
Rental #18
Rental #19
Rental #20
Rental #21
Rental #22
Rental #23
Rental #24
Rental #25
Rental #26
Rental #27
Rental #28
Rental #29
Rental #30
Rental #31
Rental #32
Rental #33
Rental #34
Rental #35
Rental #36
Rental #37
Rental #38
Rental #39
Rental #40
Rental #41
Rental #42
Rental #43
Rental #44
Rental #45
Rental #46
Rental #47
Rental #48
Rental #49
Rental #50
Rental #51
Rental #52
Rental #53
Rental #54
Rental #55
Rental #56
Rental #57
Rental #58
Rental #59
Rental #60
Rental #61
Rental #62
Rental #63
Rental #64
Rental #65
Rental #66
Rental #67
Rental #68
Rental #69
Rental #70
Rental #71
Rental #72
Rental #73
Rental #74
Rental #75
Rental #76
Rental #77
Rental #78
Rental #79
Rental #80
Rental #81
Rental #82
Rental #83
Rental #84
Rental #85
Rental #86
Rental #87
Rental #88
Rental #89
Rental #90
Rental #91
Rental #

In [11]:
# rental_venues

# #saving what we have to an excel file
# rental_venues.to_excel('rental_cluster.xlsx', index = None)

Let's check how many venues were returned for each University

In [12]:
rental_venues.groupby('Rental Name').count()

Unnamed: 0_level_0,Rental Latitude,Rental Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Rental Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Rental #1,4,4,4,4,4,4
Rental #10,2,2,2,2,2,2
Rental #100,4,4,4,4,4,4
Rental #101,10,10,10,10,10,10
Rental #102,20,20,20,20,20,20
...,...,...,...,...,...,...
Rental #95,1,1,1,1,1,1
Rental #96,5,5,5,5,5,5
Rental #97,2,2,2,2,2,2
Rental #98,1,1,1,1,1,1


#### Let's find out how many unique categories can be curated from all the returned venues

In [13]:
print('There are {} uniques categories.'.format(len(rental_venues['Venue Category'].unique())))

There are 121 uniques categories.


## 3. Analyze Each Rental Venue

In [14]:
# one hot encoding
rental_onehot = pd.get_dummies(rental_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
rental_onehot['Rental Name'] = rental_venues['Rental Name'] 

# move neighborhood column to the first column
fixed_columns = [rental_onehot.columns[-1]] + list(rental_onehot.columns[:-1])
rental_onehot = rental_onehot[fixed_columns]

rental_onehot.head()

Unnamed: 0,Rental Name,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Badminton Court,Bagel Shop,...,Sushi Restaurant,Taiwanese Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Store,Wine Shop,Yoga Studio
0,Rental #1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rental #1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rental #1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rental #1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Rental #2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
rental_onehot.shape

(2648, 122)

#### Next, let's group rows by rental and by taking the mean of the frequency of occurrence of each category

In [16]:
rental_grouped = rental_onehot.groupby('Rental Name').mean().reset_index()
rental_grouped

Unnamed: 0,Rental Name,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Badminton Court,Bagel Shop,...,Sushi Restaurant,Taiwanese Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Store,Wine Shop,Yoga Studio
0,Rental #1,0.0,0.00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.0,0.0,0.0,0.0
1,Rental #10,0.0,0.00,0.0,0.0,0.0000,0.0,0.5,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.0,0.0,0.0,0.0
2,Rental #100,0.0,0.00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.0,0.0,0.0,0.0
3,Rental #101,0.0,0.00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.0,0.0,0.0,0.0
4,Rental #102,0.0,0.05,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.05,0.0,0.0500,0.05,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,Rental #95,0.0,0.00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.0,0.0,0.0,0.0
248,Rental #96,0.0,0.00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.2,0.0,0.0,0.0
249,Rental #97,0.0,0.00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.0,0.0,0.0,0.0
250,Rental #98,0.0,0.00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0000,...,0.0,0.0,0.00,0.0,0.0000,0.00,0.0,0.0,0.0,0.0


#### Let's print each rental along with the top 5 most common venues

In [17]:
num_top_venues = 5

for hood in rental_grouped['Rental Name']:
    print("----"+hood+"----")
    temp = rental_grouped[rental_grouped['Rental Name'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Rental #1----
           venue  freq
0  Grocery Store  0.25
1    Candy Store  0.25
2           Park  0.25
3     Playground  0.25
4          Trail  0.00


----Rental #10----
               venue  freq
0        Auto Garage   0.5
1  Convenience Store   0.5
2            Airport   0.0
3          Locksmith   0.0
4           Pharmacy   0.0


----Rental #100----
           venue  freq
0  Grocery Store  0.25
1    Candy Store  0.25
2           Park  0.25
3     Playground  0.25
4          Trail  0.00


----Rental #101----
            venue  freq
0  Ice Cream Shop   0.2
1    Liquor Store   0.1
2            Bank   0.1
3     Coffee Shop   0.1
4  Discount Store   0.1


----Rental #102----
            venue  freq
0  Clothing Store  0.10
1       Juice Bar  0.10
2        Pharmacy  0.05
3    Burger Joint  0.05
4     Coffee Shop  0.05


----Rental #103----
                venue  freq
0         Coffee Shop  0.21
1       Grocery Store  0.07
2         Bus Station  0.07
3        Intersection  0.07
4  Ligh

#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [18]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [21]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Rental Name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
rental_venues_sorted = pd.DataFrame(columns=columns)
rental_venues_sorted['Rental Name'] = rental_grouped['Rental Name']

for ind in np.arange(rental_grouped.shape[0]):
    rental_venues_sorted.iloc[ind, 1:] = return_most_common_venues(rental_grouped.iloc[ind, :], num_top_venues)

rental_venues_sorted.head()

Unnamed: 0,Rental Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Rental #1,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
1,Rental #10,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
2,Rental #100,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
3,Rental #101,Ice Cream Shop,Bank,Bistro,Liquor Store,Discount Store,Coffee Shop,Pizza Place,Pharmacy,Sandwich Place,Deli / Bodega
4,Rental #102,Clothing Store,Juice Bar,Electronics Store,Bakery,Movie Theater,Chocolate Shop,Liquor Store,Pharmacy,Department Store,Burger Joint


### 3. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [43]:
# set number of clusters
kclusters = 5

grouped_clustering = rental_grouped.drop('Rental Name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(rental_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 0, 0, 0, 0, 4, 4, 2, 0], dtype=int32)

In [46]:
# add clustering labels
rental_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

rental_venues_merged = rental_venues

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
rental_venues_merged = rental_venues_merged.join(rental_venues_sorted.set_index('Rental Name'), on='Rental Name')

rental_venues_merged

Unnamed: 0,Rental Name,Rental Latitude,Rental Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Rental #1,43.6827,-79.3730,Summerhill Market,43.686265,-79.375458,Grocery Store,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
1,Rental #1,43.6827,-79.3730,Whitney Park,43.682036,-79.373788,Park,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
2,Rental #1,43.6827,-79.3730,Scoops Convenience Boutique,43.686148,-79.375828,Candy Store,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
3,Rental #1,43.6827,-79.3730,Rosedale Park,43.682328,-79.378934,Playground,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
4,Rental #2,43.7247,-79.2312,Vi Pei Bistro - Bluffs,43.727148,-79.229640,Bistro,0,Ice Cream Shop,Bank,Bistro,Liquor Store,Discount Store,Coffee Shop,Pizza Place,Pharmacy,Sandwich Place,Deli / Bodega
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2643,Rental #253,43.6803,-79.3538,Valley Farm Produce,43.677999,-79.349969,Fruit & Vegetable Store,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store
2644,Rental #253,43.6803,-79.3538,Urban Nails,43.676668,-79.356602,Spa,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store
2645,Rental #253,43.6803,-79.3538,Bulk Barn,43.676790,-79.355865,Grocery Store,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store
2646,Rental #253,43.6803,-79.3538,Dough Bakeshop,43.676643,-79.356846,Bakery,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store


Finally, let's visualize the resulting clusters

In [48]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(rental_venues_merged['Venue Latitude'], rental_venues_merged['Venue Longitude'], rental_venues_merged['Rental Name'], rental_venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 4. Examine Clusters

Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, we can then assign a name to each cluster.

In [54]:
# saving the data
rental_venues_merged.to_excel('rental_clustered.xlsx', index = None)

#### Cluster 1

In [49]:
rental_venues_merged.loc[rental_venues_merged['Cluster Labels'] == 0, rental_venues_merged.columns[[1] + list(range(5, rental_venues_merged.shape[1]))]]

Unnamed: 0,Rental Latitude,Venue Longitude,Venue Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43.6827,-79.375458,Grocery Store,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
1,43.6827,-79.373788,Park,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
2,43.6827,-79.375828,Candy Store,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
3,43.6827,-79.378934,Playground,0,Grocery Store,Candy Store,Playground,Park,Gaming Cafe,Dog Run,Convenience Store,Gas Station,Cosmetics Shop,Coworking Space
4,43.7247,-79.229640,Bistro,0,Ice Cream Shop,Bank,Bistro,Liquor Store,Discount Store,Coffee Shop,Pizza Place,Pharmacy,Sandwich Place,Deli / Bodega
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2643,43.6803,-79.349969,Fruit & Vegetable Store,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store
2644,43.6803,-79.356602,Spa,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store
2645,43.6803,-79.355865,Grocery Store,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store
2646,43.6803,-79.356846,Bakery,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Yoga Studio,Spa,Bakery,Brewery,Café,Cosmetics Shop,Fruit & Vegetable Store


#### Cluster 2

In [50]:
rental_venues_merged.loc[rental_venues_merged['Cluster Labels'] == 1, rental_venues_merged.columns[[1] + list(range(5, rental_venues_merged.shape[1]))]]

Unnamed: 0,Rental Latitude,Venue Longitude,Venue Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
83,43.7507,-79.301508,Auto Garage,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
84,43.7507,-79.297899,Convenience Store,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
277,43.7507,-79.301508,Auto Garage,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
278,43.7507,-79.297899,Convenience Store,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
417,43.7507,-79.301508,Auto Garage,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
418,43.7507,-79.297899,Convenience Store,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
532,43.7507,-79.301508,Auto Garage,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
533,43.7507,-79.297899,Convenience Store,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
690,43.7507,-79.301508,Auto Garage,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
691,43.7507,-79.297899,Convenience Store,1,Convenience Store,Auto Garage,Fast Food Restaurant,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store


#### Cluster 3

In [51]:
rental_venues_merged.loc[rental_venues_merged['Cluster Labels'] == 2, rental_venues_merged.columns[[1] + list(range(5, rental_venues_merged.shape[1]))]]

Unnamed: 0,Rental Latitude,Venue Longitude,Venue Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
76,43.6872,-79.335007,Convenience Store,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
77,43.6872,-79.335007,Park,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
78,43.6872,-79.341091,Park,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
261,43.6872,-79.335007,Convenience Store,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
262,43.6872,-79.335007,Park,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
263,43.6872,-79.341091,Park,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
415,43.75,-79.399717,Park,2,Convenience Store,Park,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
416,43.75,-79.401393,Convenience Store,2,Convenience Store,Park,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
525,43.6872,-79.335007,Convenience Store,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
526,43.6872,-79.335007,Park,2,Park,Convenience Store,Construction & Landscaping,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store


#### Cluster 4

In [52]:
rental_venues_merged.loc[rental_venues_merged['Cluster Labels'] == 3, rental_venues_merged.columns[[1] + list(range(5, rental_venues_merged.shape[1]))]]

Unnamed: 0,Rental Latitude,Venue Longitude,Venue Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
208,43.7915,-79.410474,Playground,3,Playground,Yoga Studio,Farmers Market,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner
390,43.7915,-79.410474,Playground,3,Playground,Yoga Studio,Farmers Market,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner
980,43.7915,-79.410474,Playground,3,Playground,Yoga Studio,Farmers Market,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner
988,43.7915,-79.410474,Playground,3,Playground,Yoga Studio,Farmers Market,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner
1192,43.7915,-79.410474,Playground,3,Playground,Yoga Studio,Farmers Market,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Diner


#### Cluster 5

In [53]:
rental_venues_merged.loc[rental_venues_merged['Cluster Labels'] == 4, rental_venues_merged.columns[[1] + list(range(5, rental_venues_merged.shape[1]))]]

Unnamed: 0,Rental Latitude,Venue Longitude,Venue Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,43.7334,-79.334115,Park,4,Park,Trail,Gym,River,Dessert Shop,Dog Run,Distribution Center,Discount Store,Diner,Deli / Bodega
49,43.7334,-79.332316,Gym,4,Park,Trail,Gym,River,Dessert Shop,Dog Run,Distribution Center,Discount Store,Diner,Deli / Bodega
50,43.7334,-79.328766,Trail,4,Park,Trail,Gym,River,Dessert Shop,Dog Run,Distribution Center,Discount Store,Diner,Deli / Bodega
51,43.7334,-79.337130,River,4,Park,Trail,Gym,River,Dessert Shop,Dog Run,Distribution Center,Discount Store,Diner,Deli / Bodega
52,43.6899,-79.388133,Park,4,Grocery Store,Gym,Thai Restaurant,Park,Garden Center,Event Space,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2523,43.7334,-79.337130,River,4,Park,Trail,Gym,River,Dessert Shop,Dog Run,Distribution Center,Discount Store,Diner,Deli / Bodega
2524,43.6899,-79.388133,Park,4,Grocery Store,Gym,Thai Restaurant,Park,Garden Center,Event Space,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega
2525,43.6899,-79.390769,Grocery Store,4,Grocery Store,Gym,Thai Restaurant,Park,Garden Center,Event Space,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega
2526,43.6899,-79.391163,Thai Restaurant,4,Grocery Store,Gym,Thai Restaurant,Park,Garden Center,Event Space,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega


After naming the clusters, we just have to simply merge it to our initial dataframe to include the Cluster Label. From there, we can visualise them again and we are done!

In [74]:
rental_clusters = rental_venues_merged.groupby(['Rental Name', 'Cluster Labels']).count()[['Venue']].reset_index().sort_values(by = 'Rental Name')

In [81]:
# get the relevant columns only - Rental Name and Cluster Labels
rental_clusters = rental_clusters[['Rental Name', 'Cluster Labels']]

In [85]:
final_df = df.merge(rental_clusters, on = 'Rental Name')

### 5. Final Product

In [7]:
final_df = pd.read_excel('final_data.xlsx')

uni_list = final_df['University'].unique()

def f(University):
    #filter the dataframe based on university chosen
    temp_df = final_df[final_df['University'] == University].reset_index()
    
    latitude = temp_df.loc[0,'uni_lat'] # get first latitude of university
    longitude = temp_df.loc[0,'uni_long'] # get first longitude of university
    
    # create map of Toronto using latitude and longitude values
    map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11.5)
    
    for lat, lng, uni in zip(temp_df['uni_lat'], temp_df['uni_long'], temp_df['University']):
        label = f'{uni}'
        label = folium.Popup(label, parse_html=True)
        folium.Marker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)  
        
        
    # plotting every coordinates for rentals in the chosen university    
    for lat, long, price, dist, style, name, cluster in zip(temp_df['rental_lat'], temp_df['rental_long'], temp_df['Price'], temp_df['Distance (km)'], temp_df['Type'], temp_df['Rental Name'], temp_df['Cluster Labels']):
        label = f'Rental Name: {name}, \nRental: {price} \nType: {style} \nDistance: {dist}km \nCluster: {cluster}'
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, long],
            color='red',
            fill=True,
            fill_color='#FF0000',
            fill_opacity=0.7,
            popup= label).add_to(map_toronto)
    
    
    return map_toronto

interact(f, University=uni_list); # plot a dropdown list and update the folium map accordingly

interactive(children=(Dropdown(description='University', options=('University of Toronto', 'York University', …

Great! We have all the information we want from the map. When we select the university, the rental houses will be shown on the map with the following informations:

 - Rental Name
 - Rental Cost
 - Type of Room
 - Distance Away from the university
 - Cluster Type
 
With these information, they will then be able to make a more informed decision on where they would like to stay during their course of study. They will be able to know what type of amenities are available surrounding the rental area which is key in identifying if the place suits your personal preference.

#### What can be improved?

    - More rental data
    - More FourSquare premium calls - to get reviews of the neighbourhood
    - How expensive is the rental compared to others? (is it above market or not?)
    - What is the average market price based on the location
    - Pre-filter for students to fill in their preference before showing the available listings

In [90]:
#saving final output data
final_df.to_excel('final_data.xlsx', index = None)