# A Recommender System for Grocery Contractor

In [1]:
# importing libraries
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
from bs4 import BeautifulSoup
import requests # library to handle requests
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
import geopy.geocoders # convert an address into latitude and longitude values

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries are imported.')

Libraries are imported.


# Postal Codes in Toronto

In [2]:
# Loading the dataset which is about postal codes in Toronto
# This dataset was created in week 3. 
df_toronto = pd.read_csv('toronto_base.csv')
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


# Create a Map of Toronto City (with its Postal Codes' Regions)

In [3]:
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6543; toronto_longitude = -79.3860
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

# Focusing on the "Downtown Toronto" Borough in Toronto (its neighborhoods)

In [4]:
df_toronto['Borough'] == 'Downtown Toronto'

# selecting only neighborhoods regarding to "Scarborough" borough.
dt_data = df_toronto[df_toronto['Borough'] == 'Downtown Toronto']
dt_data = dt_data.reset_index(drop=True)#.drop(columns = 'Unnamed: 0')
dt_data

Unnamed: 0,Postcode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,M4W,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",M4X,43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,M4Y,43.66586,-79.38316
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",M5A,43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson, Garden District",M5B,43.657162,-79.378937
5,M5C,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
6,M5E,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306
7,M5G,Downtown Toronto,Central Bay Street,M5G,43.657952,-79.387383
8,M5H,Downtown Toronto,"Adelaide, King, Richmond",M5H,43.650571,-79.384568
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",M5J,43.640816,-79.381752


# Create a Map of Downtown Toronto and Its Neighbourhoods

In [5]:
address_scar = 'Downtown Toronto, Toronto'
latitude_scar = 43.6543
longitude_scar = -79.3860
print('The geograpical coordinate of "Downtown Toronto" are: {}, {}.'.format(latitude_scar, longitude_scar))

map_DowntownToronto = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=11.5)

# add markers to map
for lat, lng, label in zip(dt_data['Latitude'], dt_data['Longitude'], dt_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_DowntownToronto)  
    
map_DowntownToronto

The geograpical coordinate of "Downtown Toronto" are: 43.6543, -79.386.


In [6]:
def foursquare_crawler (postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data is Obtained, for the Postal Code {} (and Neighborhoods {}) SUCCESSFULLY.'.format(postal_code, neighborhood))
    return result_ds;

In [7]:
# @hiddel_cell
CLIENT_ID = 'POXL4O3TH0RPS0ZN1LL4IT4JLVKXOWAPOISEDXJ3FBB5HOU0' # your Foursquare ID
CLIENT_SECRET = 'AC2HM0D5AJVSGNBSAQTEPMXIEI2MXE0AV3DP1YS3XESGHBTO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

# Crawling Internet (in fact only Foursquare database) for Venues in the Neighborhoods inside "Downtown Toronto"

In [8]:
print('Crawling different neighborhoods inside "Downtown Toronto"')
dt_foursquare_dataset = foursquare_crawler(list(dt_data['Postcode']),
                                                   list(dt_data['Neighborhood']),
                                                   list(dt_data['Latitude']),
                                                   list(dt_data['Longitude']),)

Crawling different neighborhoods inside "Downtown Toronto"
1.
Data is Obtained, for the Postal Code M4W (and Neighborhoods Rosedale) SUCCESSFULLY.
2.
Data is Obtained, for the Postal Code M4X (and Neighborhoods Cabbagetown, St. James Town) SUCCESSFULLY.
3.
Data is Obtained, for the Postal Code M4Y (and Neighborhoods Church and Wellesley) SUCCESSFULLY.
4.
Data is Obtained, for the Postal Code M5A (and Neighborhoods Harbourfront, Regent Park) SUCCESSFULLY.
5.
Data is Obtained, for the Postal Code M5B (and Neighborhoods Ryerson, Garden District) SUCCESSFULLY.
6.
Data is Obtained, for the Postal Code M5C (and Neighborhoods St. James Town) SUCCESSFULLY.
7.
Data is Obtained, for the Postal Code M5E (and Neighborhoods Berczy Park) SUCCESSFULLY.
8.
Data is Obtained, for the Postal Code M5G (and Neighborhoods Central Bay Street) SUCCESSFULLY.
9.
Data is Obtained, for the Postal Code M5H (and Neighborhoods Adelaide, King, Richmond) SUCCESSFULLY.
10.
Data is Obtained, for the Postal Code M5J (and

# Breakpoint:
# Saving results of Foursquare, so that we would not need to connect every time to Foursquare (and use our portions) .

In [9]:
import pickle
with open("dt_foursquare_dataset.txt", "wb") as fp:   #Pickling
    pickle.dump(dt_foursquare_dataset, fp)
print('Received Data from Internet is Saved to Computer.')

Received Data from Internet is Saved to Computer.


In [10]:
with open("dt_foursquare_dataset.txt", "rb") as fp:   # Unpickling
    dt_foursquare_dataset = pickle.load(fp)

# Cleaning the RAW Data Received from Foursquare Database

In [11]:
# This function is created to connect to the saved list which is the received database. It will extract each venue 
# for every neighborhood inside the database

def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    # print(result_df)
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        print('Number of Venuse in Coordination "{}" Posal Code and "{}" Negihborhood(s) is:'.format(postal_code, neigh))
        print(len(neigh_dict['Crawling_result']))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            
            # print({'Postal Code': postal_code, 'Neighborhood': neigh, 
            #                   'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
            #                   'Venue': name, 'Venue Summary': summary, 
            #                   'Venue Category': cat, 'Distance': dist})
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)
            # print(result_df)
    
    return(result_df)

In [12]:
dt_venues = get_venue_dataset(dt_foursquare_dataset)

Number of Venuse in Coordination "M4W" Posal Code and "Rosedale" Negihborhood(s) is:
23
Number of Venuse in Coordination "M4X" Posal Code and "Cabbagetown, St. James Town" Negihborhood(s) is:
38
Number of Venuse in Coordination "M4Y" Posal Code and "Church and Wellesley" Negihborhood(s) is:
100
Number of Venuse in Coordination "M5A" Posal Code and "Harbourfront, Regent Park" Negihborhood(s) is:
100
Number of Venuse in Coordination "M5B" Posal Code and "Ryerson, Garden District" Negihborhood(s) is:
100
Number of Venuse in Coordination "M5C" Posal Code and "St. James Town" Negihborhood(s) is:
100
Number of Venuse in Coordination "M5E" Posal Code and "Berczy Park" Negihborhood(s) is:
100
Number of Venuse in Coordination "M5G" Posal Code and "Central Bay Street" Negihborhood(s) is:
100
Number of Venuse in Coordination "M5H" Posal Code and "Adelaide, King, Richmond" Negihborhood(s) is:
100
Number of Venuse in Coordination "M5J" Posal Code and "Harbourfront East, Toronto Islands, Union Stati

# Showing Venues for Each Neighborhood in Downtown Toronto

In [13]:
dt_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M4W,Rosedale,43.679563,-79.377529,Summerhill Market,This spot is popular,Grocery Store,764
1,M4W,Rosedale,43.679563,-79.377529,Black Camel,This spot is popular,BBQ Joint,994
2,M4W,Rosedale,43.679563,-79.377529,Toronto Lawn Tennis Club,This spot is popular,Athletics & Sports,896
3,M4W,Rosedale,43.679563,-79.377529,Tinuno,This spot is popular,Filipino Restaurant,945
4,M4W,Rosedale,43.679563,-79.377529,Pie Squared,This spot is popular,Pie Shop,826


In [14]:
dt_venues = dt_venues.replace({'Neighborhood': 'Neighborhood_Venue'})

# Breakpoint:
# End of Processing the Retrieved Information from Foursquare
# Saving a Cleaned Version of DataFrame as the Results from Foursquare

In [15]:
dt_venues.to_csv('dt_venues.csv')

# Loading Data from File (Saved "Foursquare " DataFrame for Venues)

In [16]:
dt_venues = pd.read_csv('dt_venues.csv')

# Some Summary Information about Neighborhoods inside "Downtown Toronto"

In [17]:
neigh_list = list(dt_venues['Neighborhood'].unique())
print('Number of Neighborhoods inside Downtown Toronto:')
print(len(neigh_list))
print('List of Neighborhoods inside Downtown Toronto:')
neigh_list

Number of Neighborhoods inside Downtown Toronto:
18
List of Neighborhoods inside Downtown Toronto:


['Rosedale',
 'Cabbagetown, St. James Town',
 'Church and Wellesley',
 'Harbourfront, Regent Park',
 'Ryerson, Garden District',
 'St. James Town',
 'Berczy Park',
 'Central Bay Street',
 'Adelaide, King, Richmond',
 'Harbourfront East, Toronto Islands, Union Station',
 'Design Exchange, Toronto Dominion Centre',
 'Commerce Court, Victoria Hotel',
 'Harbord, University of Toronto',
 'Chinatown, Grange Park, Kensington Market',
 'CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara',
 'Stn A PO Boxes 25 The Esplanade',
 'First Canadian Place, Underground city',
 'Christie']

# Some Summary Information about Neighborhoods inside "Downtown Toronto" Cont'd

In [18]:
neigh_venue_summary = dt_venues.groupby('Neighborhood').count()
neigh_venue_summary.drop(columns = ['Unnamed: 0']).head()

Unnamed: 0_level_0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100,100
Berczy Park,100,100,100,100,100,100,100
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",15,15,15,15,15,15,15
"Cabbagetown, St. James Town",38,38,38,38,38,38,38
Central Bay Street,100,100,100,100,100,100,100


In [19]:
print('There are {} uniques categories.'.format(len(dt_venues['Venue Category'].unique())))

print('Here is the list of different categories:')
list(dt_venues['Venue Category'].unique())


There are 206 uniques categories.
Here is the list of different categories:


['Grocery Store',
 'BBQ Joint',
 'Athletics & Sports',
 'Filipino Restaurant',
 'Pie Shop',
 'Office',
 'Park',
 'Smoothie Shop',
 'Japanese Restaurant',
 'Coffee Shop',
 'Breakfast Spot',
 'Bank',
 'Sandwich Place',
 'Playground',
 'Trail',
 'Candy Store',
 'Hostel',
 'Convenience Store',
 'Metro Station',
 'Diner',
 'Indian Restaurant',
 'Italian Restaurant',
 'Restaurant',
 'Café',
 'Jewelry Store',
 'Gastropub',
 'Pub',
 'Farm',
 'Pet Store',
 'Caribbean Restaurant',
 'Deli / Bodega',
 'Taiwanese Restaurant',
 'Gift Shop',
 'Thai Restaurant',
 'Steakhouse',
 'Garden',
 'Rock Club',
 'Dance Studio',
 'Pool',
 'American Restaurant',
 'Theater',
 'Performing Arts Venue',
 'Beer Store',
 'Bubble Tea Shop',
 'Theme Restaurant',
 'Bookstore',
 'Ramen Restaurant',
 'Tea Room',
 'Salon / Barbershop',
 'Pizza Place',
 'Juice Bar',
 'Burger Joint',
 'Mexican Restaurant',
 'Hobby Shop',
 'Gay Bar',
 'Ice Cream Shop',
 'Ethiopian Restaurant',
 'General Entertainment',
 'Creperie',
 "Men's Stor

In [20]:
# Just for fun and deeper understanding
print(type(dt_venues[['Venue Category']]))

print(type(dt_venues['Venue Category']))


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


# One-hot Encoding the "categroies" Column into Every Unique Categorical Feature.

In [21]:
# one hot encoding
dt_onehot = pd.get_dummies(data = dt_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
# dt_onehot.head()
dt_onehot.describe()

Unnamed: 0.1,Unnamed: 0,Neighborhood Latitude,Neighborhood Longitude,Distance,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Animal Shelter,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Korean Restaurant,Lake,Latin American Restaurant,Liquor Store,Lounge,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Monument / Landmark,Movie Theater,Museum,Music School,Music Store,Music Venue,Neighborhood_Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pie Shop,Pizza Place,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Rock Climbing Spot,Rock Club,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,South American Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tech Startup,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Track,Trail,Train Station,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
count,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0,1576.0
mean,787.5,43.653746,-79.384014,486.595178,0.000635,0.000635,0.000635,0.000635,0.015863,0.000635,0.001904,0.010152,0.001904,0.004442,0.004442,0.000635,0.000635,0.003173,0.002538,0.022208,0.003173,0.017766,0.001269,0.003807,0.000635,0.013959,0.001269,0.001904,0.003173,0.00698,0.001904,0.01269,0.003173,0.000635,0.007614,0.008249,0.005076,0.060914,0.000635,0.003807,0.003173,0.005076,0.000635,0.002538,0.008883,0.009518,0.077411,0.000635,0.001269,0.002538,0.001904,0.012056,0.000635,0.011421,0.005076,0.000635,0.005076,0.008249,0.005076,0.000635,0.005711,0.010152,0.001269,0.001269,0.001269,0.001904,0.001269,0.004442,0.001904,0.001269,0.001269,0.000635,0.00698,0.002538,0.001904,0.000635,0.001904,0.005076,0.004442,0.001269,0.004442,0.004442,0.004442,0.001904,0.002538,0.019036,0.001904,0.000635,0.002538,0.001269,0.002538,0.000635,0.004442,0.009518,0.013325,0.005076,0.000635,0.001269,0.000635,0.000635,0.002538,0.000635,0.001269,0.002538,0.000635,0.038071,0.000635,0.012056,0.005076,0.000635,0.020939,0.024746,0.003173,0.001269,0.000635,0.001904,0.001269,0.007614,0.001269,0.002538,0.003807,0.004442,0.000635,0.004442,0.001904,0.000635,0.00698,0.005076,0.001269,0.005711,0.005076,0.006345,0.000635,0.000635,0.001904,0.003807,0.005076,0.000635,0.003173,0.003807,0.001269,0.001269,0.000635,0.019036,0.000635,0.003173,0.000635,0.000635,0.000635,0.001269,0.014594,0.001269,0.008249,0.002538,0.001269,0.001269,0.000635,0.011421,0.008883,0.001269,0.027919,0.000635,0.000635,0.000635,0.003807,0.001904,0.003807,0.003173,0.000635,0.000635,0.00698,0.001904,0.003807,0.001269,0.002538,0.002538,0.000635,0.000635,0.000635,0.005076,0.000635,0.003173,0.003807,0.004442,0.013325,0.005076,0.008249,0.001904,0.003173,0.000635,0.000635,0.008883,0.000635,0.013325,0.017766,0.000635,0.000635,0.000635,0.001269,0.001269,0.004442,0.001269,0.015228,0.000635,0.000635,0.002538,0.002538,0.000635,0.000635,0.004442
std,455.096327,0.008899,0.01384,224.894546,0.02519,0.02519,0.02519,0.02519,0.124985,0.02519,0.043602,0.100278,0.043602,0.066518,0.066518,0.02519,0.02519,0.056254,0.050331,0.147407,0.056254,0.132144,0.035612,0.061604,0.02519,0.11736,0.035612,0.043602,0.056254,0.083279,0.043602,0.11197,0.056254,0.02519,0.086954,0.090476,0.071089,0.239248,0.02519,0.061604,0.056254,0.071089,0.02519,0.050331,0.093861,0.097124,0.267328,0.02519,0.035612,0.050331,0.043602,0.10917,0.02519,0.106292,0.071089,0.02519,0.071089,0.090476,0.071089,0.02519,0.075377,0.100278,0.035612,0.035612,0.035612,0.043602,0.035612,0.066518,0.043602,0.035612,0.035612,0.02519,0.083279,0.050331,0.043602,0.02519,0.043602,0.071089,0.066518,0.035612,0.066518,0.066518,0.066518,0.043602,0.050331,0.136693,0.043602,0.02519,0.050331,0.035612,0.050331,0.02519,0.066518,0.097124,0.114698,0.071089,0.02519,0.035612,0.02519,0.02519,0.050331,0.02519,0.035612,0.050331,0.02519,0.191429,0.02519,0.10917,0.071089,0.02519,0.143226,0.1554,0.056254,0.035612,0.02519,0.043602,0.035612,0.086954,0.035612,0.050331,0.061604,0.066518,0.02519,0.066518,0.043602,0.02519,0.083279,0.071089,0.035612,0.075377,0.071089,0.079429,0.02519,0.02519,0.043602,0.061604,0.071089,0.02519,0.056254,0.061604,0.035612,0.035612,0.02519,0.136693,0.02519,0.056254,0.02519,0.02519,0.02519,0.035612,0.119959,0.035612,0.090476,0.050331,0.035612,0.035612,0.02519,0.106292,0.093861,0.035612,0.164792,0.02519,0.02519,0.02519,0.061604,0.043602,0.061604,0.056254,0.02519,0.02519,0.083279,0.043602,0.061604,0.035612,0.050331,0.050331,0.02519,0.02519,0.02519,0.071089,0.02519,0.056254,0.061604,0.066518,0.114698,0.071089,0.090476,0.043602,0.056254,0.02519,0.02519,0.093861,0.02519,0.114698,0.132144,0.02519,0.02519,0.02519,0.035612,0.035612,0.066518,0.035612,0.122499,0.02519,0.02519,0.050331,0.050331,0.02519,0.02519,0.066518
min,0.0,43.628947,-79.422564,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,393.75,43.647177,-79.387383,319.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,787.5,43.651494,-79.381752,463.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1181.25,43.657952,-79.375418,639.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1575.0,43.679563,-79.360636,999.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# Manually Selecting (Subsetting) Related Features for the Grocery Contractor

In [22]:
important_list_of_features = ['Neighborhood',
 'Neighborhood Latitude',
 'Neighborhood Longitude',
 'Grocery Store',
 'BBQ Joint',
 'Filipino Restaurant',
 'Pie Shop',
 'Japanese Restaurant',
 'Breakfast Spot',
 'Sandwich Place',
 'Diner',
 'Indian Restaurant',
 'Italian Restaurant',
 'Restaurant',
 'Caribbean Restaurant',
 'Deli / Bodega',
 'Taiwanese Restaurant',
 'Thai Restaurant',
 'Steakhouse',
 'American Restaurant',
 'Theme Restaurant',
 'Ramen Restaurant',
 'Pizza Place',
 'Burger Joint',
 'Mexican Restaurant',
 'Ethiopian Restaurant',
 'General Entertainment',
 'Creperie',
 'Sushi Restaurant',
 'Wings Joint',
 'Supermarket',
 'Vietnamese Restaurant',
 'Afghan Restaurant',
 'Mediterranean Restaurant',
 'Portuguese Restaurant',
 'Chinese Restaurant',
 'Seafood Restaurant',
 'Middle Eastern Restaurant',
 'Falafel Restaurant',
 'Vegetarian / Vegan Restaurant',
 'Bakery',
 'Farmers Market',
 'French Restaurant',
 'Spanish Restaurant',
 'German Restaurant',
 'Food Truck',
 'Fast Food Restaurant',
 'Burrito Place',
 'Taco Place',
 'Food Court',
 'Latin American Restaurant',
 'Bistro',
 'Fish Market',
 'Belgian Restaurant',
 'Greek Restaurant',
 'Salad Place',
 'Fried Chicken Joint',
 'Comfort Food Restaurant',
 'Tapas Restaurant',
 'Asian Restaurant',
 'Noodle House',
 'Brazilian Restaurant',
 'Dumpling Restaurant',
 'Doner Restaurant',
 'Pastry Shop',
 'Eastern European Restaurant',
 'Hot Dog Joint',
 'Organic Grocery',
 'Persian Restaurant',
 'Donut Shop',
 'Gourmet Shop',
 'Fish & Chips Shop',
 'Korean Restaurant',
 'South American Restaurant',
 'Cupcake Shop',
 'Health Food Store',
 'Jewish Restaurant']

# Updating the One-hot Encoded DataFrame and
# Grouping the Data by Neighborhoods

In [23]:
dt_onehot = dt_onehot[important_list_of_features].drop(columns = ['Neighborhood Latitude', 'Neighborhood Longitude'])
dt_onehot = dt_onehot.groupby('Neighborhood').sum()



In [24]:
dt_onehot.head()

Unnamed: 0_level_0,Grocery Store,BBQ Joint,Filipino Restaurant,Pie Shop,Japanese Restaurant,Breakfast Spot,Sandwich Place,Diner,Indian Restaurant,Italian Restaurant,Restaurant,Caribbean Restaurant,Deli / Bodega,Taiwanese Restaurant,Thai Restaurant,Steakhouse,American Restaurant,Theme Restaurant,Ramen Restaurant,Pizza Place,Burger Joint,Mexican Restaurant,Ethiopian Restaurant,General Entertainment,Creperie,Sushi Restaurant,Wings Joint,Supermarket,Vietnamese Restaurant,Afghan Restaurant,Mediterranean Restaurant,Portuguese Restaurant,Chinese Restaurant,Seafood Restaurant,Middle Eastern Restaurant,Falafel Restaurant,Vegetarian / Vegan Restaurant,Bakery,Farmers Market,French Restaurant,Spanish Restaurant,German Restaurant,Food Truck,Fast Food Restaurant,Burrito Place,Taco Place,Food Court,Latin American Restaurant,Bistro,Fish Market,Belgian Restaurant,Greek Restaurant,Salad Place,Fried Chicken Joint,Comfort Food Restaurant,Tapas Restaurant,Asian Restaurant,Noodle House,Brazilian Restaurant,Dumpling Restaurant,Doner Restaurant,Pastry Shop,Eastern European Restaurant,Hot Dog Joint,Organic Grocery,Persian Restaurant,Donut Shop,Gourmet Shop,Fish & Chips Shop,Korean Restaurant,South American Restaurant,Cupcake Shop,Health Food Store,Jewish Restaurant
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1
"Adelaide, King, Richmond",0,0,0,0,3,1,0,0,0,0,2,0,1,0,2,2,3,0,1,2,1,0,0,0,0,2,0,0,0,0,1,0,0,1,0,0,2,2,0,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Berczy Park,1,1,0,0,3,3,0,1,0,2,3,0,1,0,1,2,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,4,2,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Cabbagetown, St. James Town",0,0,1,1,2,0,0,2,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Central Bay Street,1,0,0,0,2,2,1,1,0,2,0,0,0,0,2,1,1,0,4,2,2,1,0,0,1,2,0,1,0,0,0,1,2,1,1,1,2,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Integrating Different Restaurants and Different Joints
# (Assuming Different Resaturants Use the Same Raw Groceries)
# This Assumption is made for simplicity and due to not having very large dataset about neighborhoods.

In [25]:
feat_name_list = list(dt_onehot.columns)
restaurant_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
dt_onehot['Total Restaurants'] = dt_onehot[restaurant_list].sum(axis = 1)
dt_onehot = dt_onehot.drop(columns = restaurant_list)


feat_name_list = list(dt_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
dt_onehot['Total Joints'] = dt_onehot[joint_list].sum(axis = 1)
dt_onehot = dt_onehot.drop(columns = joint_list)

In [26]:
dt_onehot.head()

Unnamed: 0_level_0,Grocery Store,Pie Shop,Breakfast Spot,Sandwich Place,Diner,Deli / Bodega,Steakhouse,Pizza Place,General Entertainment,Creperie,Supermarket,Bakery,Farmers Market,Food Truck,Burrito Place,Taco Place,Food Court,Bistro,Fish Market,Salad Place,Noodle House,Pastry Shop,Organic Grocery,Donut Shop,Gourmet Shop,Fish & Chips Shop,Cupcake Shop,Health Food Store,Total Restaurants,Total Joints
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
"Adelaide, King, Richmond",0,0,1,0,0,1,2,2,0,0,0,2,0,0,1,1,2,0,0,0,1,0,0,0,0,0,0,0,21,1
Berczy Park,1,0,3,0,1,1,2,1,0,1,1,4,2,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,16,2
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Cabbagetown, St. James Town",0,1,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0
Central Bay Street,1,0,2,1,1,0,1,2,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,23,2


# Run k-means to Cluster Neighborhoods into 5 Clusters

In [27]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(dt_onehot)


# Showing Centers of Each Cluster

In [28]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = dt_onehot.columns
means_df.index = ['G1','G2','G3','G4','G5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Grocery Store,Pie Shop,Breakfast Spot,Sandwich Place,Diner,Deli / Bodega,Steakhouse,Pizza Place,General Entertainment,Creperie,Supermarket,Bakery,Farmers Market,Food Truck,Burrito Place,Taco Place,Food Court,Bistro,Fish Market,Salad Place,Noodle House,Pastry Shop,Organic Grocery,Donut Shop,Gourmet Shop,Fish & Chips Shop,Cupcake Shop,Health Food Store,Total Restaurants,Total Joints,Total Sum
G5,3.5,0.0,0.5,1.0,2.0,0.5,0.5,2.0,0.5,0.5,0.5,0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,32.0,2.5,48.0
G3,0.5,0.0,0.5,0.5,0.0,0.0,0.0,1.5,0.0,0.5,1.0,5.0,1.0,0.0,1.0,0.0,0.5,0.0,0.5,0.0,0.5,0.5,1.0,1.0,0.5,0.5,0.0,0.0,28.0,2.0,46.5
G4,0.375,-2.775558e-17,1.5,0.25,1.0,0.875,1.75,1.5,-1.387779e-17,0.5,0.375,2.0,0.625,0.625,0.75,0.25,0.875,0.25,0.0,0.375,0.5,-1.387779e-17,-2.775558e-17,-2.775558e-17,-1.387779e-17,-1.387779e-17,-1.387779e-17,-1.387779e-17,21.875,1.25,37.5
G1,0.5,0.25,1.25,0.0,1.0,1.25,1.5,1.0,0.0,0.5,0.5,2.0,1.0,0.5,0.0,0.0,0.0,0.75,0.5,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.25,1.75,29.25
G2,1.0,0.5,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5,4.0


# Result:
# Best Group is G5;
# Second Best Group is G3;
# Third Best Group is G4;
# Inserting "kmeans.labels_" into the Original Scarborough DataFrame
# Finding the Corresponding Group for Each Neighborhood.

In [29]:
neigh_summary = pd.DataFrame([dt_data['Neighborhood'], 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Group']
neigh_summary

Unnamed: 0,Neighborhood,Group
0,Rosedale,4
1,"Cabbagetown, St. James Town",1
2,Church and Wellesley,2
3,"Harbourfront, Regent Park",1
4,"Ryerson, Garden District",4
5,St. James Town,3
6,Berczy Park,5
7,Central Bay Street,5
8,"Adelaide, King, Richmond",4
9,"Harbourfront East, Toronto Islands, Union Station",4


# Deducing Results:

# Best Neighborhood Are...

In [30]:
neigh_summary[neigh_summary['Group'] == 5]

Unnamed: 0,Neighborhood,Group
6,Berczy Park,5
7,Central Bay Street,5


In [36]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 5]['Neighborhood'])[0]
dt_venues[dt_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M5E',
 'Neighborhood': 'Berczy Park',
 'Neighborhood Latitude': 43.644770799999996,
 'Neighborhood Longitude': -79.3733064}

In [37]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 5]['Neighborhood'])[1]
dt_venues[dt_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M5G',
 'Neighborhood': 'Central Bay Street',
 'Neighborhood Latitude': 43.6579524,
 'Neighborhood Longitude': -79.3873826}

# Second Best Neighborhoods

In [32]:
neigh_summary[neigh_summary['Group'] == 3]

Unnamed: 0,Neighborhood,Group
5,St. James Town,3
11,"Commerce Court, Victoria Hotel",3


# Third Best Neighborhood

In [33]:
neigh_summary[neigh_summary['Group'] == 4]

Unnamed: 0,Neighborhood,Group
0,Rosedale,4
4,"Ryerson, Garden District",4
8,"Adelaide, King, Richmond",4
9,"Harbourfront East, Toronto Islands, Union Station",4
10,"Design Exchange, Toronto Dominion Centre",4
13,"Chinatown, Grange Park, Kensington Market",4
15,Stn A PO Boxes 25 The Esplanade,4
16,"First Canadian Place, Underground city",4


In [34]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 4]['Neighborhood'])[0]
dt_venues[dt_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M4W',
 'Neighborhood': 'Rosedale',
 'Neighborhood Latitude': 43.6795626,
 'Neighborhood Longitude': -79.37752940000001}