<a href="https://colab.research.google.com/github/anushka012399/Battle_Of_Neighborhood/blob/master/Final_Capstone_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **A Recommender System for expats looking for a family friendly neighbourhood in Downtown Toronto.** 

In [1]:
#importing libraries
import pandas as pd 
import numpy as np
import requests 
#BeautifulSoup for scrapping data from wikipedia
from bs4 import BeautifulSoup

In [2]:
#scarping dataset 1 for obtaining the information abour boroughs and their neighbourhoods of Canada
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(url).text

In [3]:
soup = BeautifulSoup(source, 'xml')
table=soup.find('table')

In [4]:

column_names = ['PostalCode', 'Borough', 'Neighbourhood']

In [5]:

df = pd.DataFrame(columns=column_names)
for tr_cell in table.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==3:
        df.loc[len(df)] = row_data
df

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [6]:
#cleaning the data and removing the neighbouhoods not falling in any boruoghs
df=df[df['Borough']!='Not assigned']
df

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
#importing dataset 2 which contaings the locational data, that is thet latitude and longitude corresponding to different postal codes 
geo_df=pd.read_csv('http://cocl.us/Geospatial_data')
geo_df

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [8]:
geo_df.rename(columns={'Postal Code':'PostalCode'},inplace=True)
geo_df

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [9]:
#merging dataset 1 and dataset 2 to form a complete understading of neighbourhoods and their location in a borough
geo_aftermerge = pd.merge(geo_df, df, on='PostalCode')
geo_aftermerge

Unnamed: 0,PostalCode,Latitude,Longitude,Borough,Neighbourhood
0,M1B,43.806686,-79.194353,Scarborough,"Malvern, Rouge"
1,M1C,43.784535,-79.160497,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,43.763573,-79.188711,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae
...,...,...,...,...,...
98,M9N,43.706876,-79.518188,York,Weston
99,M9P,43.696319,-79.532242,Etobicoke,Westmount
100,M9R,43.688905,-79.554724,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,43.739416,-79.588437,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [10]:
#importing floium to render maps and visualize data
import folium

In [11]:
#identifying all the neighbourhoods of Canada on the map
toronto_latitude = 43.6932; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighbourhood in zip(geo_aftermerge['Latitude'], geo_aftermerge['Longitude'], geo_aftermerge['Borough'], geo_aftermerge['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

In [12]:
#the map might not be visible on Github, hence open it in colaboratory or refer to the report

In [13]:
#focusing on the specific borough, "Downtown Toronto" in our case and hence forming a new dataframe
downtown_data = geo_aftermerge[geo_aftermerge['Borough'] == 'Downtown Toronto']
downtown_data

Unnamed: 0,PostalCode,Latitude,Longitude,Borough,Neighbourhood
50,M4W,43.679563,-79.377529,Downtown Toronto,Rosedale
51,M4X,43.667967,-79.367675,Downtown Toronto,"St. James Town, Cabbagetown"
52,M4Y,43.66586,-79.38316,Downtown Toronto,Church and Wellesley
53,M5A,43.65426,-79.360636,Downtown Toronto,"Regent Park, Harbourfront"
54,M5B,43.657162,-79.378937,Downtown Toronto,"Garden District, Ryerson"
55,M5C,43.651494,-79.375418,Downtown Toronto,St. James Town
56,M5E,43.644771,-79.373306,Downtown Toronto,Berczy Park
57,M5G,43.657952,-79.387383,Downtown Toronto,Central Bay Street
58,M5H,43.650571,-79.384568,Downtown Toronto,"Richmond, Adelaide, King"
59,M5J,43.640816,-79.381752,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands"


In [14]:
#visualizing the neighbourhoods of Downtown Toronto only
address_dt = 'Downtown, Toronto'
latitude_dt = 43.6548
longitude_dt = -79.3883

map_Downtown = folium.Map(location=[latitude_dt, longitude_dt], zoom_start=11.5)

# add markers to map
for lat, lng, label in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_Downtown)  
    
map_Downtown

In [15]:
#the map might not be visible on Github, hence open it in colaboratory or refer to the report

In [16]:
#defining the crawler function to import dataset 3, which will contain details about the venues in the neighbourhood of Downtown Toronto
#notice the endpoint 'explore' being used to search for nearby venues
def foursquare_crawler (postal_code_list, neighbourhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighbourhood, lat, lng in zip(postal_code_list, neighbourhood_list, lat_list, lng_list):
         
        #creating the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        #the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighbourhood(s)'] = neighbourhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data obtained for the Postal Code {} (and Neighbourhoods {}).'.format(postal_code, neighbourhood))
    return result_ds;

In [17]:
#obtain you FourSqaure ID and Secret by creating an app on FourSquare
CLIENT_ID = 'GZ5JIMZNOQCBYRYYJ5M315PQYLQSFJV5ILK4VAKRZR1121EQ' 
CLIENT_SECRET = 'FPIBJI54KDD2JOJYQSMA2C15GBQ0YNZZPN2RLVH3MV10NPJZ' 
VERSION = '20180605'

In [18]:
Downtown_foursquare_dataset = foursquare_crawler(list(downtown_data['PostalCode']),list(downtown_data['Neighbourhood']),list(downtown_data['Latitude']),list(downtown_data['Longitude']),)

1.
Data obtained for the Postal Code M4W (and Neighbourhoods Rosedale).
2.
Data obtained for the Postal Code M4X (and Neighbourhoods St. James Town, Cabbagetown).
3.
Data obtained for the Postal Code M4Y (and Neighbourhoods Church and Wellesley).
4.
Data obtained for the Postal Code M5A (and Neighbourhoods Regent Park, Harbourfront).
5.
Data obtained for the Postal Code M5B (and Neighbourhoods Garden District, Ryerson).
6.
Data obtained for the Postal Code M5C (and Neighbourhoods St. James Town).
7.
Data obtained for the Postal Code M5E (and Neighbourhoods Berczy Park).
8.
Data obtained for the Postal Code M5G (and Neighbourhoods Central Bay Street).
9.
Data obtained for the Postal Code M5H (and Neighbourhoods Richmond, Adelaide, King).
10.
Data obtained for the Postal Code M5J (and Neighbourhoods Harbourfront East, Union Station, Toronto Islands).
11.
Data obtained for the Postal Code M5K (and Neighbourhoods Toronto Dominion Centre, Design Exchange).
12.
Data obtained for the Postal C

In [33]:
import pickle
with open("Downtown_foursquare_dataset.txt", "wb") as fp:   #Pickling
    pickle.dump(Downtown_foursquare_dataset, fp)

In [34]:
with open("Downtown_foursquare_dataset.txt", "rb") as fp:   # Unpickling
    Downtown_foursquare_dataset = pickle.load(fp)

In [35]:
#function for finding the number of venues in different neighbourhoods and creating a dataframe from the results ontained by the foursquare crawler
def venues(dataset):
    df = pd.DataFrame(columns = ['Postal Code', 'Neighbourhood', 
                                           'Neighbourhood Latitude', 'Neighbourhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    
    for nb in dataset:
        postal_code = nb['Postal Code']; neighbourhood = nb['Neighbourhood(s)']
        latitude = nb['Latitude']; longitude = nb['Longitude']
        print('No. of Venues in "{}" Posal Code and "{}" Negihbourhood is:'.format(postal_code, neighbourhood))
        print(len(nb['Crawling_result']))
        
        for v in nb['Crawling_result']:
            summary = v['reasons']['items'][0]['summary']
            name = v['venue']['name']
            distance = v['venue']['location']['distance']
            category =  v['venue']['categories'][0]['name']
            
            df = df.append({'Postal Code': postal_code, 'Neighbourhood': neighbourhood, 
                              'Neighbourhood Latitude': latitude, 'Neighbourhood Longitude':longitude,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': category, 'Distance': distance}, ignore_index = True)
    
    return(df)

In [36]:
downtown_venues = venues(Downtown_foursquare_dataset)

No. of Venues in "M4W" Posal Code and "Rosedale" Negihbourhood is:
21
No. of Venues in "M4X" Posal Code and "St. James Town, Cabbagetown" Negihbourhood is:
34
No. of Venues in "M4Y" Posal Code and "Church and Wellesley" Negihbourhood is:
100
No. of Venues in "M5A" Posal Code and "Regent Park, Harbourfront" Negihbourhood is:
100
No. of Venues in "M5B" Posal Code and "Garden District, Ryerson" Negihbourhood is:
100
No. of Venues in "M5C" Posal Code and "St. James Town" Negihbourhood is:
100
No. of Venues in "M5E" Posal Code and "Berczy Park" Negihbourhood is:
100
No. of Venues in "M5G" Posal Code and "Central Bay Street" Negihbourhood is:
100
No. of Venues in "M5H" Posal Code and "Richmond, Adelaide, King" Negihbourhood is:
100
No. of Venues in "M5J" Posal Code and "Harbourfront East, Union Station, Toronto Islands" Negihbourhood is:
100
No. of Venues in "M5K" Posal Code and "Toronto Dominion Centre, Design Exchange" Negihbourhood is:
100
No. of Venues in "M5L" Posal Code and "Commerce C

In [37]:
downtown_venues

Unnamed: 0,Postal Code,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M4W,Rosedale,43.679563,-79.377529,Summerhill Market,This spot is popular,Grocery Store,764
1,M4W,Rosedale,43.679563,-79.377529,Black Camel,This spot is popular,BBQ Joint,994
2,M4W,Rosedale,43.679563,-79.377529,Craigleigh Gardens,This spot is popular,Park,505
3,M4W,Rosedale,43.679563,-79.377529,Toronto Lawn Tennis Club,This spot is popular,Athletics & Sports,896
4,M4W,Rosedale,43.679563,-79.377529,Pie Squared,This spot is popular,Pie Shop,826
...,...,...,...,...,...,...,...,...
1664,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Canadian National Ballet School,This spot is popular,Dance Studio,998
1665,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,The Men's Room,This spot is popular,Men's Store,795
1666,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,MUJI,This spot is popular,Miscellaneous Shop,859
1667,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Cafe Plenty,This spot is popular,Café,860


In [38]:
#to find a list of unique neighbourhoods in Downtown Toronto
neighbourhood_list = list(downtown_venues['Neighbourhood'].unique())
print('List of Neighbourhoods in downtown Toronto:')
neighbourhood_list

List of Neighbourhoods in downtown Toronto:


['Rosedale',
 'St. James Town, Cabbagetown',
 'Church and Wellesley',
 'Regent Park, Harbourfront',
 'Garden District, Ryerson',
 'St. James Town',
 'Berczy Park',
 'Central Bay Street',
 'Richmond, Adelaide, King',
 'Harbourfront East, Union Station, Toronto Islands',
 'Toronto Dominion Centre, Design Exchange',
 'Commerce Court, Victoria Hotel',
 'University of Toronto, Harbord',
 'Kensington Market, Chinatown, Grange Park',
 'CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport',
 'Stn A PO Boxes',
 'First Canadian Place, Underground city',
 'Christie',
 "Queen's Park, Ontario Provincial Government"]

In [39]:
#listing the unique categories of venue present in downtown toronto
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))
list(downtown_venues['Venue Category'].unique())


There are 200 uniques categories.


['Grocery Store',
 'BBQ Joint',
 'Park',
 'Athletics & Sports',
 'Pie Shop',
 'Filipino Restaurant',
 'Bistro',
 'Coffee Shop',
 'Japanese Restaurant',
 'Bank',
 'Breakfast Spot',
 'Playground',
 'Sandwich Place',
 'Candy Store',
 'Office',
 'Convenience Store',
 'Metro Station',
 'Diner',
 'Italian Restaurant',
 'Restaurant',
 'Café',
 'Indian Restaurant',
 'Gastropub',
 'Jewelry Store',
 'Bakery',
 'Pub',
 'Caribbean Restaurant',
 'Deli / Bodega',
 'Farm',
 'Pet Store',
 'Gift Shop',
 'Taiwanese Restaurant',
 'Steakhouse',
 'Thai Restaurant',
 'Botanical Garden',
 'Sushi Restaurant',
 'Garden',
 'Dance Studio',
 'Pool',
 'Theater',
 'American Restaurant',
 'Performing Arts Venue',
 'Trail',
 'Theme Restaurant',
 'Bubble Tea Shop',
 'Beer Bar',
 'Bookstore',
 'Juice Bar',
 'Ramen Restaurant',
 'Mexican Restaurant',
 'Pizza Place',
 'Salon / Barbershop',
 'Burger Joint',
 'General Entertainment',
 'Creperie',
 'Gay Bar',
 'Smoke Shop',
 'Arts & Crafts Store',
 'Ice Cream Shop',
 'Wine 

In [40]:
# onehot encoding the categories so that it can be used readily
downtown_after_onehot = pd.get_dummies(data = downtown_venues, drop_first  = False, prefix = "", prefix_sep = "", columns = ['Venue Category'])
downtown_after_onehot

Unnamed: 0,Postal Code,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Summary,Distance,Airport,American Restaurant,Animal Shelter,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Botanical Garden,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Café,Camera Store,...,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,South American Restaurant,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Street Art,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tech Startup,Thai Restaurant,Theater,Theme Restaurant,Track,Trail,Train Station,University,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,M4W,Rosedale,43.679563,-79.377529,Summerhill Market,This spot is popular,764,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M4W,Rosedale,43.679563,-79.377529,Black Camel,This spot is popular,994,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M4W,Rosedale,43.679563,-79.377529,Craigleigh Gardens,This spot is popular,505,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M4W,Rosedale,43.679563,-79.377529,Toronto Lawn Tennis Club,This spot is popular,896,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M4W,Rosedale,43.679563,-79.377529,Pie Squared,This spot is popular,826,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1664,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Canadian National Ballet School,This spot is popular,998,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1665,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,The Men's Room,This spot is popular,795,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1666,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,MUJI,This spot is popular,859,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1667,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Cafe Plenty,This spot is popular,860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [47]:
# To create a list of categories manually, to select categories considered relevant to a family-friendly neighbourhood 
#how i chose these categories is explained in the business_problem_description
family_friendly_categories = [
 'Neighbourhood',
 'Neighbourhood Latitude',
 'Neighbourhood Longitude',
 'Grocery Store',
 'Park',
 'Bank',
 'Playground',
 'Sandwich Place',
 'Candy Store',
 'Metro Station',
 'Diner',
 'Restaurant',
 'Bakery',
 'Farm',
 'Pet Store',
 'Gift Shop',
 'Garden',
 'Dance Studio',
 'Pool',
 'Theater',
 'Performing Arts Venue',
 'Bookstore',
 'Salon / Barbershop',
 'Arts & Crafts Store',
 'Ice Cream Shop',
 'Historic Site',
 'Supermarket',
 'Yoga Studio',
 'Health & Beauty Service',
 'Furniture / Home Store',
 'Video Store',
 'Comic Shop',
 'Clothing Store',
 'Shopping Mall',
 'Cosmetics Shop',
 'Gym',
 'Dog Run',
 'Museum',
 'Farmers Market',
 'Chocolate Shop',
 'Dessert Shop',
 'Spa',
 'Gym / Fitness Center',
 'Shoe Store',
 'Event Space',
 'Food Truck',
 'Gym Pool',
 'Electronics Store',
 'Skating Rink',
 'Pharmacy',
 'Music Venue',
 'Department Store',
 'Monument / Landmark',
 'Art Museum',
 'Poutine Place',
 'Concert Hall',
 'Church',
 'Fountain',
 'Tailor Shop',
 'Basketball Stadium',
 'Sporting Goods Shop',
 'Beach',
 'Lake',
 'Train Station',
 'University',
 'Movie Theater',
 'Aquarium',
 'Baseball Stadium',
 'Indie Movie Theater',
 'Organic Grocery',
 'Health Food Store',
 'Music Store',
 "Women's Store",
 'Food Court',
 'Optical Shop',
 'Airport',
 'Harbor / Marina',
 'Sculpture Garden',
 'Cupcake Shop',
 'Rock Climbing Spot']


In [48]:
#dropping the columns corresponding to unwanted venues
downtown_after_onehot = downtown_after_onehot[family_friendly_categories].drop(columns = ['Neighbourhood Latitude', 'Neighbourhood Longitude']).groupby('Neighbourhood').sum()
downtown_after_onehot

Unnamed: 0_level_0,Grocery Store,Park,Bank,Playground,Sandwich Place,Candy Store,Metro Station,Diner,Restaurant,Bakery,Farm,Pet Store,Gift Shop,Garden,Dance Studio,Pool,Theater,Performing Arts Venue,Bookstore,Salon / Barbershop,Arts & Crafts Store,Ice Cream Shop,Historic Site,Supermarket,Yoga Studio,Health & Beauty Service,Furniture / Home Store,Video Store,Comic Shop,Clothing Store,Shopping Mall,Cosmetics Shop,Gym,Dog Run,Museum,Farmers Market,Chocolate Shop,Dessert Shop,Spa,Gym / Fitness Center,Shoe Store,Event Space,Food Truck,Gym Pool,Electronics Store,Skating Rink,Pharmacy,Music Venue,Department Store,Monument / Landmark,Art Museum,Poutine Place,Concert Hall,Church,Fountain,Tailor Shop,Basketball Stadium,Sporting Goods Shop,Beach,Lake,Train Station,University,Movie Theater,Aquarium,Baseball Stadium,Indie Movie Theater,Organic Grocery,Health Food Store,Music Store,Women's Store,Food Court,Optical Shop,Airport,Harbor / Marina,Sculpture Garden,Cupcake Shop,Rock Climbing Spot
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1
Berczy Park,2,4,0,0,1,0,0,0,4,3,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,2,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0
Central Bay Street,1,3,0,0,1,0,0,2,1,0,0,0,0,0,0,0,2,0,1,1,2,0,1,1,2,0,2,0,1,2,1,2,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Christie,5,2,1,1,1,1,0,2,1,1,0,0,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1
Church and Wellesley,1,3,0,0,1,0,0,3,2,0,0,0,0,1,2,0,1,0,2,1,1,2,1,1,2,1,1,1,1,2,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Commerce Court, Victoria Hotel",0,1,0,0,1,0,0,0,6,2,0,0,0,0,0,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,1,2,2,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,3,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"First Canadian Place, Underground city",0,2,0,0,1,0,0,0,5,1,0,0,0,0,0,0,3,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,2,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,1,2,0,0,3,0,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
"Garden District, Ryerson",1,1,0,0,1,0,0,2,2,0,0,0,0,1,1,0,2,0,1,0,1,1,1,1,1,0,1,0,1,2,1,2,2,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Harbourfront East, Union Station, Toronto Islands",0,4,0,0,1,0,0,0,2,1,0,0,0,0,1,0,3,1,0,0,0,1,0,1,1,0,0,0,0,0,0,0,3,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,2,0,0,0,1,1,0,1,1,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0
"Kensington Market, Chinatown, Grange Park",1,3,0,0,1,0,0,1,1,3,0,0,0,0,1,0,0,0,1,0,2,1,0,1,3,0,0,0,1,1,0,1,2,0,0,1,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0


In [49]:
#importing libraries to use K-Means clustering
from sklearn.cluster import KMeans

In [50]:
# k=4
kmeans = KMeans(n_clusters = 4, random_state = 0).fit(downtown_after_onehot)

In [51]:
#creating groups and printing them in the order of having maximum total (that is maximum number of family frineldy venues nearby)
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = downtown_after_onehot.columns
means_df.index = ['Group No. 1','Group No. 2','Group No. 3','Group No. 4']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Grocery Store,Park,Bank,Playground,Sandwich Place,Candy Store,Metro Station,Diner,Restaurant,Bakery,Farm,Pet Store,Gift Shop,Garden,Dance Studio,Pool,Theater,Performing Arts Venue,Bookstore,Salon / Barbershop,Arts & Crafts Store,Ice Cream Shop,Historic Site,Supermarket,Yoga Studio,Health & Beauty Service,Furniture / Home Store,Video Store,Comic Shop,Clothing Store,Shopping Mall,Cosmetics Shop,Gym,Dog Run,Museum,Farmers Market,Chocolate Shop,Dessert Shop,Spa,Gym / Fitness Center,Shoe Store,Event Space,Food Truck,Gym Pool,Electronics Store,Skating Rink,Pharmacy,Music Venue,Department Store,Monument / Landmark,Art Museum,Poutine Place,Concert Hall,Church,Fountain,Tailor Shop,Basketball Stadium,Sporting Goods Shop,Beach,Lake,Train Station,University,Movie Theater,Aquarium,Baseball Stadium,Indie Movie Theater,Organic Grocery,Health Food Store,Music Store,Women's Store,Food Court,Optical Shop,Airport,Harbor / Marina,Sculpture Garden,Cupcake Shop,Rock Climbing Spot,Total Sum
Group No. 2,1.0,3.0,0.2,0.0,1.0,0.0,0.0,2.6,2.0,0.6,0.0,0.2,0.2,0.4,1.2,0.2,2.0,0.4,1.2,0.6,1.0,1.2,1.0,0.8,1.6,0.4,1.0,0.4,0.8,1.6,0.8,1.6,0.8,0.2,0.2,0.2,0.2,0.6,0.2,0.6,0.2,0.2,0.4,0.2,0.2,0.2,0.2,0.4,0.2,0.4,0.4,0.2,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.2
Group No. 3,0.5,2.125,-2.775558e-17,0.0,1.0,0.0,0.0,0.25,4.5,1.625,0.0,0.0,0.0,0.0,0.125,0.0,2.25,0.125,0.625,0.25,0.25,1.0,0.0,0.5,0.25,0.0,1.0,0.0,0.25,0.625,0.5,1.0,2.25,0.0,1.0,0.625,0.0,0.25,0.625,1.0,0.0,0.125,0.875,0.0,0.0,0.125,0.0,0.125,0.875,1.0,0.125,0.0,2.0,0.5,0.75,0.75,0.75,0.75,0.25,0.25,0.875,0.125,0.625,0.375,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0
Group No. 4,1.5,2.5,0.0,0.0,0.5,0.0,0.0,0.5,2.0,4.5,0.0,0.0,0.5,0.0,0.5,0.0,0.5,0.0,2.0,0.0,2.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.5,1.0,0.0,2.0,0.5,0.0,0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,1.0,0.5,0.5,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,34.5
Group No. 1,1.75,1.75,0.5,0.5,0.5,0.5,0.25,1.0,0.5,0.5,0.25,0.25,0.5,0.5,0.5,0.25,0.5,0.25,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.25,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.25,0.5,0.25,0.25,0.25,14.5


In [52]:
#the above dataframe tells that neighbourhoods belonging to Group 2 are the ones having maximum family-friendly venues nearby
#adding group labels to the list of neighbourhoods
neighbourhood_summary = pd.DataFrame([neighbourhood_list, 1 + kmeans.labels_]).T
neighbourhood_summary.columns = ['Neighbourhood', 'Group']
neighbourhood_summary

Unnamed: 0,Neighbourhood,Group
0,Rosedale,3
1,"St. James Town, Cabbagetown",1
2,Church and Wellesley,2
3,"Regent Park, Harbourfront",1
4,"Garden District, Ryerson",2
5,St. James Town,3
6,Berczy Park,3
7,Central Bay Street,2
8,"Richmond, Adelaide, King",3
9,"Harbourfront East, Union Station, Toronto Islands",4


In [53]:
#printing out the result (best neighbourhoods)
result_df = neighbourhood_summary[neighbourhood_summary['Group'] == 2]
result_df

Unnamed: 0,Neighbourhood,Group
2,Church and Wellesley,2
4,"Garden District, Ryerson",2
7,Central Bay Street,2
10,"Toronto Dominion Centre, Design Exchange",2
11,"Commerce Court, Victoria Hotel",2


In [54]:
final_df = pd.merge(geo_aftermerge, result_df, on='Neighbourhood')
final_df

Unnamed: 0,PostalCode,Latitude,Longitude,Borough,Neighbourhood,Group
0,M4Y,43.66586,-79.38316,Downtown Toronto,Church and Wellesley,2
1,M5B,43.657162,-79.378937,Downtown Toronto,"Garden District, Ryerson",2
2,M5G,43.657952,-79.387383,Downtown Toronto,Central Bay Street,2
3,M5K,43.647177,-79.381576,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",2
4,M5L,43.648198,-79.379817,Downtown Toronto,"Commerce Court, Victoria Hotel",2


In [55]:
#visualizing the resultant family-friendly neighbpuhoods of downtown toronto on the map
address_dt = 'Downtown, Toronto'
latitude_dt = 43.6548
longitude_dt = -79.3883

map_Downtown = folium.Map(location=[latitude_dt, longitude_dt], zoom_start=11.5)

# add markers to map
for lat, lng, label in zip(final_df['Latitude'], final_df['Longitude'], final_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_Downtown)  
    
map_Downtown

In [56]:
#the map might not be visible on Github, hence open it in colaboratory or refer to the report
#Thank You!