## A project to identify suitable location(s) for constructing warehouse (Recommender System)

In [8]:
# importing required libraries 
import numpy as np  
import pandas as pd  
from bs4 import BeautifulSoup
import requests  
import json  
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# !conda install -c conda-forge geopy --yes # uncomment this line if the library is already installed
import geopy.geocoders # convert an address into latitude and longitude values

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if the library is already installed
import folium # map rendering library

print('Required Libraries are imported.')

Required Libraries are imported.


### Toronto Postal Codes details. The same is available in a CSV file.

In [9]:
df_toronto = pd.read_csv('toronto_base.csv')
df_toronto.head()

Unnamed: 0.1,Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,0,M3A,North York,Parkwoods,43.753259,-79.329656
1,1,M4A,North York,Victoria Village,43.725882,-79.315572
2,2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


###  Map of Toronto City with its Postal Codes as regions.

In [16]:
#toronto_latitude = 43.6932; toronto_longitude = -79.3832
address = 'Toronto'
geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
toronto_latitude = location.latitude
toronto_longitude = location.longitude

Toronto_map = folium.Map(location=[toronto_latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'],df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map

### We select  "Scarorough" borough from Toronto city and its neighborhoods

In [18]:
# Filering only neighborhoods belongs to "Scarborough" borough.
scarborough_data = df_toronto[df_toronto['Borough'] == 'Scarborough']
scarborough_data = scarborough_data.reset_index(drop=True).drop(columns = 'Unnamed: 0')
scarborough_data.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Making a map of Scarborough and Its neighbourhoods

In [19]:
address_scar = 'Scarborough, Toronto'
latitude_scar = 43.773077
longitude_scar = -79.257774
#print('The geograpical coordinate of "Scarborough" are: {}, {}.'.format(latitude_scar, longitude_scar))

map_Scarborough = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=11.5)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html=False).add_to(map_Scarborough)  
    
map_Scarborough



The geograpical coordinate of "Scarborough" are: 43.773077, -79.257774.


In [22]:
def foursquare_search (postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data is Received, for the Postal Code {} (and Neighborhoods {}) SUCCESSFULLY.'.format(postal_code, neighborhood))
    return result_ds;


In [23]:
#CLIENT_ID = '0MJA3NYYG3U2ZY1LTZN2OYEHS3Y3WVSON2GBSO3IL4EDYVIR' # Sample Foursquare ID
#CLIENT_SECRET = 'WGWSAF2TKVUQPE3PD0N3EOITFVBY5EYP1VCZI3BMUG0ROUS5' # Sample Foursquare Secret
CLIENT_ID = 'QHUMOXGWQVJDC2CHA35N4R03TDDW2DT5DZXZ1D40BZQR4JG1' #  Foursquare ID
CLIENT_SECRET = 'NYOPDDLYL40X4XFRNDLBS0TKRYRPEKLRECZUTA2CTALWWV5Q' # Foursquar Secret
VERSION = '20180605' # Foursquare API version

### Accessing 'Foursquare' and obtaining venues in the neighborhoods inside 'Scarborough'

In [29]:
print('Start accessing different neighborhoods inside "Scarborough"')
Scarborough_foursquare_dataset = foursquare_search(list(scarborough_data['Postal code']),
                                                   list(scarborough_data['Neighborhood']),
                                                   list(scarborough_data['Latitude']),
                                                   list(scarborough_data['Longitude']),)
print('')
print('Finished accessing different neighborhoods inside "Scarborough"')

# To avoid connecting to 'Foursquare' everytime the recived data saved to a local file. This can help us to avoid accessing 
# iternet repeatedly.
import pickle
with open("Scarborough_foursquare_dataset.txt", "wb") as fp:   #Pickling
    pickle.dump(Scarborough_foursquare_dataset, fp)
    
print('')
print("Received Data from 'Foursquare' is Saved to a file.")  

with open("Scarborough_foursquare_dataset.txt", "rb") as fp:   # Unpickling
    Scarborough_foursquare_dataset = pickle.load(fp)


Start accessing different neighborhoods inside "Scarborough"
1.
Data is Received, for the Postal Code M1B (and Neighborhoods Malvern / Rouge) SUCCESSFULLY.
2.
Data is Received, for the Postal Code M1C (and Neighborhoods Rouge Hill / Port Union / Highland Creek) SUCCESSFULLY.
3.
Data is Received, for the Postal Code M1E (and Neighborhoods Guildwood / Morningside / West Hill) SUCCESSFULLY.
4.
Data is Received, for the Postal Code M1G (and Neighborhoods Woburn) SUCCESSFULLY.
5.
Data is Received, for the Postal Code M1H (and Neighborhoods Cedarbrae) SUCCESSFULLY.
6.
Data is Received, for the Postal Code M1J (and Neighborhoods Scarborough Village) SUCCESSFULLY.
7.
Data is Received, for the Postal Code M1K (and Neighborhoods Kennedy Park / Ionview / East Birchmount Park) SUCCESSFULLY.
8.
Data is Received, for the Postal Code M1L (and Neighborhoods Golden Mile / Clairlea / Oakridge) SUCCESSFULLY.
9.
Data is Received, for the Postal Code M1M (and Neighborhoods Cliffside / Cliffcrest / Scarboro

### Cleaning the data received from 'Foursquare' is performed here 

In [35]:
# This function is created to clean every neighborhood inside the database.

def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        #print('Number of Venues "{}" Posal Code and "{}" Negihborhood(s) is:'.format(postal_code, neigh))
        #print(len(neigh_dict['Crawling_result']))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)
            
    
    return(result_df)

In [36]:
scarborough_venues = get_venue_dataset(Scarborough_foursquare_dataset)
print ("Cleaning of 'Sacrborough' data received from 'Foursquare' finished here")

Cleaning of 'Sacrborough' data received from 'Foursquare' finished here


### Showing Venues for Each Neighborhood in Scarborough

In [14]:
scarborough_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M1B,Malvern / Rouge,43.806686,-79.194353,Harvey's,This spot is popular,Restaurant,807
1,M1B,Malvern / Rouge,43.806686,-79.194353,Wendy's,This spot is popular,Fast Food Restaurant,600
2,M1B,Malvern / Rouge,43.806686,-79.194353,Wendy’s,This spot is popular,Fast Food Restaurant,387
3,M1B,Malvern / Rouge,43.806686,-79.194353,RBC Royal Bank,This spot is popular,Bank,906
4,M1B,Malvern / Rouge,43.806686,-79.194353,Caribbean Wave,This spot is popular,Caribbean Restaurant,912


In [15]:
scarborough_venues.tail()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
385,M1W,Steeles West / L'Amoreaux West,43.799525,-79.318389,Divine Wok Restaurant,This spot is popular,Chinese Restaurant,957
386,M1W,Steeles West / L'Amoreaux West,43.799525,-79.318389,Buddy Cafe,This spot is popular,Chinese Restaurant,973
387,M1W,Steeles West / L'Amoreaux West,43.799525,-79.318389,Birchwood Plaza,This spot is popular,Shopping Mall,977
388,M1W,Steeles West / L'Amoreaux West,43.799525,-79.318389,Olympian Swimming,This spot is popular,Gym Pool,978
389,M1W,Steeles West / L'Amoreaux West,43.799525,-79.318389,Dumpling & Szechuan Cuisine（川流不息店）,This spot is popular,Chinese Restaurant,989


## Saving a cleaned final version of  dataframe to a csv file (data from Foursquare) 
## This will be stored in a local file can be used as and when required.
## The same is again read and loaded

In [46]:
scarborough_venues.to_csv('scarborough_venues.csv')
scarborough_venues = pd.read_csv('scarborough_venues.csv')

### Some Summary Information about Neighborhoods inside "Scarborough"

In [3]:
neigh_list = list(scarborough_venues['Neighborhood'].unique())
print('Number of Neighborhoods inside Scarborough:')
print(len(neigh_list))
print('List of Neighborhoods inside Scarborough:')
neigh_list

NameError: name 'scarborough_venues' is not defined

In [None]:
neigh_venue_summary = scarborough_venues.groupby('Neighborhood').count()
neigh_venue_summary.drop(columns = ['Unnamed: 0']).head()

In [None]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

print('Here is the list of different categories:')
list(scarborough_venues['Venue Category'].unique())

### One-hot Encoding the "categroies" Column into Every Unique Categorical Feature.

In [50]:
# one hot encoding
scarborough_onehot = pd.get_dummies(data = scarborough_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
scarborough_onehot.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Distance,Accessories Store,African Restaurant,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Badminton Court,Bakery,Bank,Beach,Beer Store,Bowling Alley,Breakfast Spot,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Business Service,Café,Cajun / Creole Restaurant,Camera Store,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Electronics Store,Event Space,Fast Food Restaurant,Fish Market,Flea Market,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Gas Station,General Entertainment,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym Pool,Hakka Restaurant,Hardware Store,Hobby Shop,Hockey Arena,Hookah Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Malay Restaurant,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Motorcycle Shop,Music Store,Noodle House,Other Great Outdoors,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Pub,Rental Car Location,Restaurant,Sandwich Place,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Skating Rink,Soccer Field,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tennis Court,Thai Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
0,0,0,M1B,Malvern / Rouge,43.806686,-79.194353,Harvey's,This spot is popular,807,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,1,M1B,Malvern / Rouge,43.806686,-79.194353,Wendy's,This spot is popular,600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,2,M1B,Malvern / Rouge,43.806686,-79.194353,Wendy’s,This spot is popular,387,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,3,M1B,Malvern / Rouge,43.806686,-79.194353,RBC Royal Bank,This spot is popular,906,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,4,M1B,Malvern / Rouge,43.806686,-79.194353,Caribbean Wave,This spot is popular,912,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Features relevent to the contractor is selected manually.

In [53]:
important_list_of_features = [
 
 'Neighborhood',
 'Neighborhood Latitude',
 'Neighborhood Longitude',

 'American Restaurant',
 'Asian Restaurant',
 'BBQ Joint',
 'Bakery',
 'Breakfast Spot',
 'Burger Joint',
 'Cajun / Creole Restaurant',
 'Cantonese Restaurant',
 'Caribbean Restaurant',
 'Chinese Restaurant',
 'Diner',
 'Fast Food Restaurant',
 'Fish Market',
 'Food & Drink Shop',
 'Fried Chicken Joint',
 'Greek Restaurant',
 'Grocery Store',
 'Hakka Restaurant',
 'Hotpot Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Latin American Restaurant',
 'Malay Restaurant',
 'Mediterranean Restaurant',
 'Mexican Restaurant',
 'Middle Eastern Restaurant',
 'Noodle House',
 'Pizza Place',
 'Restaurant',
 'Sandwich Place',
 'Seafood Restaurant',
 'Sushi Restaurant',
 'Taiwanese Restaurant',
 'Thai Restaurant',
 'Vegetarian / Vegan Restaurant',
 'Vietnamese Restaurant',
 'Wings Joint']

### Updating the One-hot Encoded DataFrame and
### Grouping the Data by Neighborhoods

In [54]:
scarborough_onehot = scarborough_onehot[important_list_of_features].drop(
    columns = ['Neighborhood Latitude', 'Neighborhood Longitude']).groupby(
    'Neighborhood').sum()


scarborough_onehot.head()

Unnamed: 0_level_0,American Restaurant,Asian Restaurant,BBQ Joint,Bakery,Breakfast Spot,Burger Joint,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Diner,Fast Food Restaurant,Fish Market,Food & Drink Shop,Fried Chicken Joint,Greek Restaurant,Grocery Store,Hakka Restaurant,Hotpot Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Noodle House,Pizza Place,Restaurant,Sandwich Place,Seafood Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
Agincourt,0,0,0,2,1,0,0,1,2,7,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,0
Birch Cliff / Cliffside West,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
Cedarbrae,0,0,0,3,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1
Clarks Corners / Tam O'Shanter / Sullivan,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,2,0,2,1,0,1,1,0,1,0
Cliffside / Cliffcrest / Scarborough Village West,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0


## Integrating Different Restaurants and Different Joints
### (Assuming Different Resaturants Use the Same Raw Materials)
#### This Assumption is made for simplicity and due to not having very large dataset about neighborhoods.

In [55]:
feat_name_list = list(scarborough_onehot.columns)
restaurant_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
scarborough_onehot['Total Restaurants'] = scarborough_onehot[restaurant_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = restaurant_list)


feat_name_list = list(scarborough_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
scarborough_onehot['Total Joints'] = scarborough_onehot[joint_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = joint_list)

### Showing the Fully-Processed DataFrame about Neighborhoods inside Scarborrough.
### This Dataset is Ready for any Machine Learning Algorithm.

In [56]:
scarborough_onehot

Unnamed: 0_level_0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Agincourt,2,1,0,0,0,0,1,1,1,19,0
Birch Cliff / Cliffside West,0,0,1,0,0,0,0,0,0,2,0
Cedarbrae,3,0,0,0,0,1,0,1,0,7,3
Clarks Corners / Tam O'Shanter / Sullivan,0,0,0,0,0,1,1,2,2,12,1
Cliffside / Cliffcrest / Scarborough Village West,0,0,0,0,0,0,0,3,0,1,1
Dorset Park / Wexford Heights / Scarborough Town Centre,2,0,0,0,0,0,0,1,1,14,2
Golden Mile / Clairlea / Oakridge,2,0,2,0,0,1,0,1,1,3,0
Guildwood / Morningside / West Hill,0,0,0,0,1,1,0,3,1,5,1
Kennedy Park / Ionview / East Birchmount Park,0,0,0,0,0,2,0,1,1,7,1
Malvern / Rouge,1,0,0,0,0,0,0,0,1,6,0


# Run k-means to Cluster Neighborhoods into 5 Clusters

In [57]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(scarborough_onehot)


## Showing Centers of Each Cluster

In [58]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = scarborough_onehot.columns
means_df.index = ['G1','G2','G3','G4','G5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints,Total Sum
G3,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,19.0,0.0,25.0
G4,1.0,0.0,0.0,0.0,0.0,0.666667,1.0,1.666667,1.0,12.666667,1.333333,19.333333
G2,2.0,0.5,0.0,0.5,0.0,2.0,0.0,1.5,0.0,8.0,2.5,17.0
G5,0.75,0.25,0.0,0.0,0.25,1.25,0.25,1.5,1.0,6.25,0.5,12.0
G1,0.333333,0.166667,0.5,0.0,0.0,0.333333,0.0,0.833333,0.333333,2.166667,0.333333,5.0


## Result:
### Best Group is G5;
### Second Best Group is G1;
### Third Best Group is G4;

### Inserting "kmeans.labels_" into the Original Scarborough DataFrame
#### Finding the Corresponding Group for Each Neighborhood.

In [59]:
#neigh_summary = pd.DataFrame([scar_ds.index, 1 + kmeans.labels_]).T
neigh_summary = pd.DataFrame([scarborough_onehot.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Group']
neigh_summary

Unnamed: 0,Neighborhood,Group
0,Agincourt,3
1,Birch Cliff / Cliffside West,1
2,Cedarbrae,2
3,Clarks Corners / Tam O'Shanter / Sullivan,4
4,Cliffside / Cliffcrest / Scarborough Village West,1
5,Dorset Park / Wexford Heights / Scarborough To...,4
6,Golden Mile / Clairlea / Oakridge,1
7,Guildwood / Morningside / West Hill,5
8,Kennedy Park / Ionview / East Birchmount Park,5
9,Malvern / Rouge,5


# Deducing Results:
## Best Neighborhood Are...

In [60]:
neigh_summary[neigh_summary['Group'] == 5]

Unnamed: 0,Neighborhood,Group
7,Guildwood / Morningside / West Hill,5
8,Kennedy Park / Ionview / East Birchmount Park,5
9,Malvern / Rouge,5
13,Steeles West / L'Amoreaux West,5


In [61]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 5]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Unnamed: 0.1': 22,
 'Postal Code': 'M1E',
 'Neighborhood': 'Guildwood / Morningside / West Hill',
 'Neighborhood Latitude': 43.7635726}

## Second Best Neighborhoods

In [62]:
neigh_summary[neigh_summary['Group'] == 1]

Unnamed: 0,Neighborhood,Group
1,Birch Cliff / Cliffside West,1
4,Cliffside / Cliffcrest / Scarborough Village West,1
6,Golden Mile / Clairlea / Oakridge,1
11,Rouge Hill / Port Union / Highland Creek,1
12,Scarborough Village,1
15,Woburn,1


## Third Best Neighborhood

In [63]:
neigh_summary[neigh_summary['Group'] == 4]

Unnamed: 0,Neighborhood,Group
3,Clarks Corners / Tam O'Shanter / Sullivan,4
5,Dorset Park / Wexford Heights / Scarborough To...,4
10,Milliken / Agincourt North / Steeles East / L'...,4


In [42]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 4]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M1T',
 'Neighborhood': "Clarks Corners / Tam O'Shanter / Sullivan",
 'Neighborhood Latitude': 43.7816375,
 'Neighborhood Longitude': -79.3043021}

### Thank You So Much for Reading this Notebook. 
#### Mohammad Ali Dastgheib - jdastgheib@gmail.com