# installing/importing necessary libraries for assignment

In [None]:
pip install geopy

In [None]:
!pip install lxml

In [1]:
import lxml
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

import json
from geopy.geocoders import Nominatim 

# Download & Explore Data Set into a Pandas Dataframe

In [2]:
read= 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
ds = pd.read_html(read, index_col=[0])
Ctable = ds[0]
Ctable

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1A,Not assigned,Not assigned
M2A,Not assigned,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...
M5Z,Not assigned,Not assigned
M6Z,Not assigned,Not assigned
M7Z,Not assigned,Not assigned
M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


# processes the cells that have an assigned borough. Ignore cells with a borough that is Not assigned. This shows how many rows have a not assigned value.

In [3]:
Ctable.Borough.value_counts()

Not assigned        77
North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East York            5
East Toronto         5
York                 5
Mississauga          1
Name: Borough, dtype: int64

# replaces 'Not assigned' with another value

In [4]:
Ctable.Borough.replace("Not assigned", np.nan, inplace = True) 
Ctable.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1A,,Not assigned
M2A,,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
Ctable.Borough.value_counts 

<bound method IndexOpsMixin.value_counts of Postal Code
M1A                 NaN
M2A                 NaN
M3A          North York
M4A          North York
M5A    Downtown Toronto
             ...       
M5Z                 NaN
M6Z                 NaN
M7Z                 NaN
M8Z           Etobicoke
M9Z                 NaN
Name: Borough, Length: 180, dtype: object>

# drops nan from the table

In [11]:
Ctable.dropna(axis=0, inplace=True)
Ctable = Ctable.reset_index()
Ctable = Ctable.drop(['index'], axis=1)
Ctable.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


# Deletes the first column called level_0

In [12]:
del Ctable['level_0']

KeyError: 'level_0'

In [13]:
Ctable

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


# look at the first 12 rows

In [14]:
Ctable.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


# This is where the selections are grouped by postal code, borough and neighborhood

In [15]:
Ctable = Ctable.groupby(['Postal Code', 'Borough'])['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))
Ctable = Ctable.reset_index()
Ctable.head(50)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


# Obtaining the shape of the table: columns and rows

In [16]:
Ctable.shape

(103, 3)

# Read in the second file

In [37]:
read2 = 'http://cocl.us/Geospatial_data'
Geof = pd.read_csv(read2, index_col=[0])
Geof

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
...,...,...
M9N,43.706876,-79.518188
M9P,43.696319,-79.532242
M9R,43.688905,-79.554724
M9V,43.739416,-79.588437


# Merge first dataframe with newly read dataframe

In [45]:
GeoTable=pd.merge(Ctable,Geof, on='Postal Code')
GeoTable

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


# Install and import folium for map visualization

In [50]:
pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [51]:
import folium

# determine Toronto, Canada's coordinates

In [229]:
Add = 'Toronto, Canada'
geoL = Nominatim(user_agent="to_explorer")
location = geoL.geocode(Add)
Lat = location.latitude
Long = location.longitude
print('The geograpical coordinates of Toronto, Canada are the following: {}, {}.'.format(Lat, Long))

The geograpical coordinates of Toronto, Canada are the following: 43.6534817, -79.3839347.


# Visualize Toronto with folium maps

In [92]:
Tmap = folium.Map(location=[43.6534817, -79.3839347], tiles='stamenterrain', zoom_start=12)
print("This is a map of Toronto and surrounding areas ")
Tmap

This is a map of Toronto and surrounding areas 


# Adding labels and markers to Toronto Map

In [98]:
for lat, lng, borough, neighborhood in zip(GeoTable['Latitude'], GeoTable['Longitude'], GeoTable['Borough'], GeoTable['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#232c43',
        fill_opacity=0.7,
        parse_html=False).add_to(Tmap) 
print(Tmap)

## Peer-graded Assignment: Capstone Project - The Battle of Neighborhoods Code & Write-up

## idea or problem to analyze: 
What are the most popular soul food/carribean eateries in Cedarbrae and if not, what neignborhood would be the best location for a soul food or west-indian caribbean eatery?


## set up foursquare API with a limit of 100 and a radius of 300m.

In [153]:
LIMIT = 100
radius= 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighbourhood_latitude, neighbourhood_longitude, radius, LIMIT)

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
print(url)

Your credentials:
CLIENT_ID: I4M2KNRBWYSOJSGHRGE2JW2UU5IOHHLNTPS3DO4NQ3RIPUUN
CLIENT_SECRET:WYB0HR2B4TKFAVSHXNMX3N253I4NMHL1OI5ZZQZDO1RFM3W5
https://api.foursquare.com/v2/venues/explore?&client_id=I4M2KNRBWYSOJSGHRGE2JW2UU5IOHHLNTPS3DO4NQ3RIPUUN&client_secret=WYB0HR2B4TKFAVSHXNMX3N253I4NMHL1OI5ZZQZDO1RFM3W5&v=20180605&ll=43.773136,-79.23947609999999&radius=500&limit=100


## import the proper libraries to convert from JSON to pandas df and to handle API requests

In [126]:
import requests 
from pandas.io.json import json_normalize 

In [154]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f9a1066dd96685c64badbef'},
 'response': {'headerLocation': 'Woburn',
  'headerFullLocation': 'Woburn, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 8,
  'suggestedBounds': {'ne': {'lat': 43.7776360045, 'lng': -79.2332557734104},
   'sw': {'lat': 43.7686359955, 'lng': -79.24569642658957}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e261f261f6eb1ae13930699',
       'name': "Drupati's Roti & Doubles",
       'location': {'address': '1085 Bellamy Rd N',
        'crossStreet': 'Bellamy & Ellesmere',
        'lat': 43.775222138791534,
        'lng': -79.24167761001029,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.775222138791534,
          'lng': -79.24167761001029}],
        'dista

## Explore Toronto Neighborhoods, finding the coodinates for each location or just the location of choice: Cedarbrae

In [112]:
GeoTable.loc[4, 'Neighbourhood']
neighbourhood_latitude = GeoTable.loc[4, 'Latitude'] 
neighbourhood_longitude = GeoTable.loc[4, 'Longitude']

neighbourhood_name = GeoTable.loc[4, 'Neighbourhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Cedarbrae are 43.773136, -79.23947609999999.


## Send the GET request  function and examine the query

In [155]:

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

## clean and structure the data into a pd

In [162]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) 


filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]


nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)


nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

8 venues were returned by Foursquare.


  This is separate from the ipykernel package so we can avoid doing imports until


In [163]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
       
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Running get function on each Toronto Neighborhood & obtaining results

In [164]:
SFCvenues = getNearbyVenues(names=GeoTable['Neighbourhood'],
                                   latitudes=GeoTable['Latitude'],
                                   longitudes=GeoTable['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto, Broadview North (Old East York)
The Danforth West, 

## Obtain table shape and first 200 rows from new df

In [165]:
print(SFCvenues.shape)
SFCvenues.head(200)

(2136, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,SEBS Engineering Inc. (Sustainable Energy and ...,43.782371,-79.156820,Construction & Landscaping
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.766790,-79.191151,Bank
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
...,...,...,...,...,...,...,...
195,"Willowdale, Willowdale East",43.770120,-79.408493,Subway,43.771401,-79.413457,Sandwich Place
196,"Willowdale, Willowdale East",43.770120,-79.408493,Pho 88 Vietnamese Cuisine,43.770456,-79.413064,Vietnamese Restaurant
197,"Willowdale, Willowdale East",43.770120,-79.408493,Freshslice Pizza,43.771353,-79.413487,Pizza Place
198,"Willowdale, Willowdale East",43.770120,-79.408493,Ten Ren's Tea Time 喫茶新饌,43.769575,-79.412597,Bubble Tea Shop


## Find unique categories

In [169]:
SFCvenues.groupby('Neighborhood').count()
print('There are {} unique categories.'.format(len(SFCvenues['Venue Category'].unique())))

There are 273 unique categories.


## Analyze each Toronto Neighborhood after shaping data

In [171]:
SFCvenuesOH = pd.get_dummies(SFCvenues[['Venue Category']], prefix="", prefix_sep="")


SFCvenuesOH['Neighborhood'] = SFCvenues['Neighborhood'] 

fixed_columns = [SFCvenuesOH.columns[-1]] + list(SFCvenuesOH.columns[:-1])
SFCvenuesOH = SFCvenuesOH[fixed_columns]

## obtain shape of new df

In [173]:
SFCvenues.shape
SFCvenues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,SEBS Engineering Inc. (Sustainable Energy and ...,43.782371,-79.156820,Construction & Landscaping
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.766790,-79.191151,Bank
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
...,...,...,...,...,...,...,...
2131,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,LCBO,43.741508,-79.584501,Liquor Store
2132,"Northwest, West Humber - Clairville",43.706748,-79.594054,Economy Rent A Car,43.708471,-79.589943,Rental Car Location
2133,"Northwest, West Humber - Clairville",43.706748,-79.594054,Logistics Distribution,43.707554,-79.589252,Bar
2134,"Northwest, West Humber - Clairville",43.706748,-79.594054,Saand Rexdale,43.705072,-79.598725,Drugstore


In [174]:
SFCgrouped = SFCvenuesOH.groupby('Neighborhood').mean().reset_index()
SFCgrouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## New Group Shape

In [175]:
SFCgrouped.shape

(96, 273)

## Displays the top 5 eateries in each Toronto neighborhood

In [183]:
num_top_venues = 5

for hood in SFCgrouped['Neighborhood']:
    print("----"+hood+"----")
    temp = SFCgrouped[SFCgrouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = [' Venue','Frequency']
    temp = temp.iloc[1:]
    temp['Frequency'] = temp['Frequency'].astype(float)
    temp = temp.round({'Frequency': 2})
    print(temp.sort_values('Frequency', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       Venue  Frequency
0  Latin American Restaurant        0.2
1             Breakfast Spot        0.2
2                     Lounge        0.2
3               Skating Rink        0.2
4             Clothing Store        0.2


----Alderwood, Long Branch----
            Venue  Frequency
0     Pizza Place       0.29
1             Gym       0.14
2  Sandwich Place       0.14
3             Pub       0.14
4     Coffee Shop       0.14


----Bathurst Manor, Wilson Heights, Downsview North----
                       Venue  Frequency
0                Coffee Shop       0.10
1                       Bank       0.10
2                   Pharmacy       0.05
3              Shopping Mall       0.05
4  Middle Eastern Restaurant       0.05


----Bayview Village----
                 Venue  Frequency
0  Japanese Restaurant       0.25
1                 Café       0.25
2   Chinese Restaurant       0.25
3                 Bank       0.25
4        Movie Theater       0.00


----B

## Function to sort df in descending order

In [184]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [186]:
indicators = ['st', 'nd', 'rd']


columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


NVsorted = pd.DataFrame(columns=columns)
NVsorted['Neighborhood'] = SFCgrouped['Neighborhood']

for ind in np.arange(SFCgrouped.shape[0]):
   NVsorted.iloc[ind, 1:] = return_most_common_venues(SFCgrouped.iloc[ind, :], num_top_venues)

NVsorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agincourt,Lounge,Skating Rink,Latin American Restaurant,Breakfast Spot,Clothing Store
1,"Alderwood, Long Branch",Pizza Place,Pharmacy,Gym,Sandwich Place,Coffee Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Frozen Yogurt Shop,Shopping Mall,Bridal Shop
3,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Women's Store
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Italian Restaurant,Greek Restaurant,Sushi Restaurant
...,...,...,...,...,...,...
91,"Willowdale, Willowdale West",Pizza Place,Grocery Store,Coffee Shop,Butcher,Pharmacy
92,Woburn,Coffee Shop,Mexican Restaurant,Korean Restaurant,Women's Store,Drugstore
93,Woodbine Heights,Skating Rink,Park,Athletics & Sports,Beer Store,Dance Studio
94,York Mills West,Park,Convenience Store,Women's Store,Drugstore,Diner


## The above table is now converted to a df

In [188]:
NVsorted = pd.DataFrame(columns=columns)
NVsorted['Neighborhood'] = SFCgrouped['Neighborhood']

for ind in np.arange(SFCgrouped.shape[0]):
    NVsorted.iloc[ind, 1:] = return_most_common_venues(SFCgrouped.iloc[ind, :], num_top_venues)

NVsorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agincourt,Lounge,Skating Rink,Latin American Restaurant,Breakfast Spot,Clothing Store
1,"Alderwood, Long Branch",Pizza Place,Pharmacy,Gym,Sandwich Place,Coffee Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Frozen Yogurt Shop,Shopping Mall,Bridal Shop
3,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Women's Store
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Italian Restaurant,Greek Restaurant,Sushi Restaurant


## k-means clustering

In [196]:
kclusters = 4

SFC_clustering = SFCgrouped.drop('Neighborhood', 1)


SFCkmeans = KMeans(n_clusters=kclusters, random_state=0).fit(SFC_clustering)


SFCkmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

# add clustering labels and merge dfs to add latitude and longitude data for each neighborhood

In [218]:
GeoTableMerged = GeoTable

GeoTableMerged = GeoTableMerged.join(NVsorted.set_index('Neighborhood'), on='Neighborhood')

GeoTableMerged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,3.0,Fast Food Restaurant,Drugstore,Diner,Discount Store,Distribution Center
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Construction & Landscaping,Bar,Women's Store,Dumpling Restaurant,Distribution Center
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Intersection,Mexican Restaurant,Electronics Store,Breakfast Spot,Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Mexican Restaurant,Korean Restaurant,Women's Store,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Athletics & Sports,Bakery,Gas Station,Caribbean Restaurant


## Now we have both dfs merged into one showing the top five most common venues, coordinates and cluster labels.

In [219]:
GeoTableMerged

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,3.0,Fast Food Restaurant,Drugstore,Diner,Discount Store,Distribution Center
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Construction & Landscaping,Bar,Women's Store,Dumpling Restaurant,Distribution Center
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Intersection,Mexican Restaurant,Electronics Store,Breakfast Spot,Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Mexican Restaurant,Korean Restaurant,Women's Store,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Athletics & Sports,Bakery,Gas Station,Caribbean Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188,0.0,Park,Women's Store,Drugstore,Diner,Discount Store
99,M9P,Etobicoke,Westmount,43.696319,-79.532242,1.0,Pizza Place,Sandwich Place,Discount Store,Coffee Shop,Chinese Restaurant
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,1.0,Pizza Place,Park,Sandwich Place,Bus Line,Doner Restaurant
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,1.0,Pizza Place,Grocery Store,Pharmacy,Fried Chicken Joint,Sandwich Place


## Create Cluster map of the following dataframe

In [237]:
MClusters = folium.Map(location=[Lat, Long], zoom_start=11)


x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(GeoTableMerged['Latitude'], GeoTableMerged['Longitude'], GeoTableMerged['Neighborhood'], GeoTableMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[kclusters-1],
        fill=True,
        fill_color=rainbow[kclusters-1],
        fill_opacity=0.7).add_to(MClusters)
       
MClusters

## Time to examine clusters!

## Cluster 1

In [238]:
GeoTableMerged.loc[GeoTableMerged['Cluster Labels'] == 0, GeoTableMerged.columns[[1] + list(range(5, GeoTableMerged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
14,Scarborough,0.0,Park,Playground,Bakery,Women's Store,Drugstore
23,North York,0.0,Park,Convenience Store,Women's Store,Drugstore,Diner
25,North York,0.0,Food & Drink Shop,Park,Drugstore,Diner,Discount Store
40,East York,0.0,Intersection,Park,Convenience Store,Drugstore,Diner
44,Central Toronto,0.0,Park,Swim School,Bus Line,Women's Store,Donut Shop
50,Downtown Toronto,0.0,Park,Trail,Playground,Donut Shop,Dim Sum Restaurant
64,Central Toronto,0.0,Trail,Park,Sushi Restaurant,Jewelry Store,Donut Shop
74,York,0.0,Park,Women's Store,Pool,Donut Shop,Dim Sum Restaurant
79,North York,0.0,Park,Construction & Landscaping,Bakery,Women's Store,Drugstore
98,York,0.0,Park,Women's Store,Drugstore,Diner,Discount Store


## Cluster 2

In [239]:
GeoTableMerged.loc[GeoTableMerged['Cluster Labels'] == 1, GeoTableMerged.columns[[1] + list(range(5, GeoTableMerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Scarborough,1.0,Construction & Landscaping,Bar,Women's Store,Dumpling Restaurant,Distribution Center
2,Scarborough,1.0,Intersection,Mexican Restaurant,Electronics Store,Breakfast Spot,Restaurant
3,Scarborough,1.0,Coffee Shop,Mexican Restaurant,Korean Restaurant,Women's Store,Drugstore
4,Scarborough,1.0,Hakka Restaurant,Athletics & Sports,Bakery,Gas Station,Caribbean Restaurant
5,Scarborough,1.0,Playground,Smoke Shop,Jewelry Store,Women's Store,Donut Shop
...,...,...,...,...,...,...,...
96,North York,1.0,Pizza Place,Furniture / Home Store,Doner Restaurant,Dim Sum Restaurant,Diner
99,Etobicoke,1.0,Pizza Place,Sandwich Place,Discount Store,Coffee Shop,Chinese Restaurant
100,Etobicoke,1.0,Pizza Place,Park,Sandwich Place,Bus Line,Doner Restaurant
101,Etobicoke,1.0,Pizza Place,Grocery Store,Pharmacy,Fried Chicken Joint,Sandwich Place


## Cluster 3

In [240]:
GeoTableMerged.loc[GeoTableMerged['Cluster Labels'] == 2, GeoTableMerged.columns[[1] + list(range(5, GeoTableMerged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
91,Etobicoke,2.0,Baseball Field,Construction & Landscaping,Women's Store,Distribution Center,Dog Run
97,North York,2.0,Baseball Field,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run


## Cluster 4

In [241]:
GeoTableMerged.loc[GeoTableMerged['Cluster Labels'] == 3, GeoTableMerged.columns[[1] + list(range(5,GeoTableMerged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Scarborough,3.0,Fast Food Restaurant,Drugstore,Diner,Discount Store,Distribution Center


## Cluster 5: Empty

In [242]:
GeoTableMerged.loc[GeoTableMerged['Cluster Labels'] == 4, GeoTableMerged.columns[[1] + list(range(5, GeoTableMerged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue


## Assessment:
    Besides eateries, the most common visited venues are public recreational ones such as the park or trail. Food-wise, pizza shops are the most visited across the board. There aren't too many soul food/ west indian restaurants however, some can be placed anywhere the indian restaurants are located such as Scarborough, which already has an Indian eatery in Cedarbrae. In Cedarbrae, there is a caribbean restaurant that is the 5th most common venue. In Scarborough, the first cluster table shows that most people frequent Construction and Landscaping businesses. In North York, East York and York, the most frequented location is the park. In Etobicoke, the most frequented venue is the pizza spot and the baseball park! In the first cluster, people spend the most time at venues where simple errands can be conducted as well as public places to exercise and enjoy outdoors.

## To learn more about Scarborough and surrounding areas

In [247]:
SBMerged = list(GeoTableMerged.columns.values)

dfScar = pd.DataFrame(GeoTableMerged, columns=SBMerged)


SBCount = len(SBMerged)


print('The dataframe {} has {} total boroughs and {} neighborhood listings.'.format(
        "dfScar",
        len(dfScar['Borough'].unique()),
        dfScar.shape[0]
    )
)

dfScar.sort_values(['Postal Code','Borough','Latitude','Longitude'], inplace=True)
dfScar.head(45)

The dataframe dfScar has 10 total boroughs and 103 neighborhood listings.


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,3.0,Fast Food Restaurant,Drugstore,Diner,Discount Store,Distribution Center
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Construction & Landscaping,Bar,Women's Store,Dumpling Restaurant,Distribution Center
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Intersection,Mexican Restaurant,Electronics Store,Breakfast Spot,Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Mexican Restaurant,Korean Restaurant,Women's Store,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Athletics & Sports,Bakery,Gas Station,Caribbean Restaurant
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,1.0,Playground,Smoke Shop,Jewelry Store,Women's Store,Donut Shop
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,1.0,Department Store,Coffee Shop,Hobby Shop,Train Station,Drugstore
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,1.0,Bus Line,Bakery,Park,Ice Cream Shop,Metro Station
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476,1.0,Motel,American Restaurant,Women's Store,Dessert Shop,Diner
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,1.0,College Stadium,Skating Rink,General Entertainment,Café,Doner Restaurant


## Create function to obtain surrounding venue information

In [248]:
import requests 
from pandas.io.json import json_normalize 

CLIENT_ID  
CLIENT_SECRET 
VERSION 
LIMIT = 100 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

def getNearbyVenues(boroughs, names, postcodes, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for borough, name, postcode, lat, lng in zip(boroughs, names, postcodes, latitudes, longitudes):
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        
        venues_list.append([(
            borough,
            name,
            postcode,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough',
                  'Neighborhood',
                  'PostalCode',
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Your credentails:
CLIENT_ID: I4M2KNRBWYSOJSGHRGE2JW2UU5IOHHLNTPS3DO4NQ3RIPUUN
CLIENT_SECRET:WYB0HR2B4TKFAVSHXNMX3N253I4NMHL1OI5ZZQZDO1RFM3W5


In [249]:
city_venues = getNearbyVenues(boroughs=dfScar['Borough'],
                              names=dfScar['Neighborhood'],
                              postcodes=dfScar['Postal Code'],
                              latitudes=dfScar['Latitude'],
                              longitudes=dfScar['Longitude']
                             )
print(city_venues.shape)
print(city_venues['Borough'].unique())
print(len(city_venues['Neighborhood'].unique()))
print(len(city_venues['Venue Category'].unique()))

city_venues.head(1625)

(2136, 9)
['Scarborough' 'North York' 'East York' 'East Toronto' 'Central Toronto'
 'Downtown Toronto' 'York' 'West Toronto' 'Mississauga' 'Etobicoke']
96
273


Unnamed: 0,Borough,Neighborhood,PostalCode,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",M1C,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,Scarborough,"Rouge Hill, Port Union, Highland Creek",M1C,43.784535,-79.160497,SEBS Engineering Inc. (Sustainable Energy and ...,43.782371,-79.156820,Construction & Landscaping
3,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711,RBC Royal Bank,43.766790,-79.191151,Bank
4,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
...,...,...,...,...,...,...,...,...,...
1620,Downtown Toronto,Stn A PO Boxes,M5W,43.646435,-79.374846,Loaded Pierogi,43.647965,-79.373427,Comfort Food Restaurant
1621,Downtown Toronto,Stn A PO Boxes,M5W,43.646435,-79.374846,Winners,43.647748,-79.374551,Department Store
1622,Downtown Toronto,Stn A PO Boxes,M5W,43.646435,-79.374846,Seafront Fish Market,43.648479,-79.371489,Fish Market
1623,Downtown Toronto,Stn A PO Boxes,M5W,43.646435,-79.374846,Olympic Cheese,43.648702,-79.371541,Cheese Shop


## Grouping selections by borough

In [250]:
one = city_venues.groupby(['Borough']).count() 
one.head()

Unnamed: 0_level_0,Neighborhood,PostalCode,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Central Toronto,104,104,104,104,104,104,104,104
Downtown Toronto,1248,1248,1248,1248,1248,1248,1248,1248
East Toronto,119,119,119,119,119,119,119,119
East York,77,77,77,77,77,77,77,77
Etobicoke,74,74,74,74,74,74,74,74


## Venue categories per borough

In [251]:
one = city_venues.groupby(['Borough','Neighborhood','Venue Category'], as_index=False).count() 
one.head(2000)

Unnamed: 0,Borough,Neighborhood,Venue Category,PostalCode,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
0,Central Toronto,Davisville,Brewery,1,1,1,1,1,1
1,Central Toronto,Davisville,Café,2,2,2,2,2,2
2,Central Toronto,Davisville,Coffee Shop,2,2,2,2,2,2
3,Central Toronto,Davisville,Dessert Shop,3,3,3,3,3,3
4,Central Toronto,Davisville,Diner,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...
1516,York,"Runnymede, The Junction North",Breakfast Spot,1,1,1,1,1,1
1517,York,"Runnymede, The Junction North",Brewery,1,1,1,1,1,1
1518,York,"Runnymede, The Junction North",Bus Line,1,1,1,1,1,1
1519,York,"Runnymede, The Junction North",Convenience Store,1,1,1,1,1,1


## Clean up the data

In [254]:
COH = pd.get_dummies(city_venues[['Venue Category']], prefix="", prefix_sep="")


COH['Borough'] = city_venues['Borough']
COH['Neighborhood'] = city_venues['Neighborhood']

fixed_columns = [COH.columns[-1]] + list(COH.columns[:-1])
COH = COH[fixed_columns]

COH

Unnamed: 0,Borough,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2131,Etobicoke,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2132,Etobicoke,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2133,Etobicoke,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2134,Etobicoke,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## cleaning data and moving rows to columns to count

In [256]:

FOH = pd.get_dummies(one[['Venue Category']], prefix="", prefix_sep="")


FOH['Borough'] = one['Borough']
FOH['Neighborhood'] = one['Neighborhood']


fixed_columns = [FOH.columns[-1]] + list(FOH.columns[:-1])
FOH = FOH[fixed_columns]

print(FOH.shape)
print(FOH['Neighborhood'])
FOH

(1521, 274)
0                          Davisville
1                          Davisville
2                          Davisville
3                          Davisville
4                          Davisville
                    ...              
1516    Runnymede, The Junction North
1517    Runnymede, The Junction North
1518    Runnymede, The Junction North
1519    Runnymede, The Junction North
1520                           Weston
Name: Neighborhood, Length: 1521, dtype: object


Unnamed: 0,Borough,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1516,York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1517,York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1518,York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1519,York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## group rows by neighborhood

In [257]:
COHM = FOH.groupby('Neighborhood').mean().reset_index()
COHM.head(100)

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [259]:
COHM.shape

(96, 273)

In [260]:
num_top_venues = 5

for hood in COHM['Neighborhood']:
    print("----"+hood+"----")
    temp = COHM[COHM['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 6})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0             Clothing Store   0.2
1             Breakfast Spot   0.2
2                     Lounge   0.2
3               Skating Rink   0.2
4  Latin American Restaurant   0.2


----Alderwood, Long Branch----
         venue      freq
0  Pizza Place  0.166667
1          Gym  0.166667
2  Coffee Shop  0.166667
3          Pub  0.166667
4     Pharmacy  0.166667


----Bathurst Manor, Wilson Heights, Downsview North----
                       venue      freq
0                Pizza Place  0.052632
1                Coffee Shop  0.052632
2                Bridal Shop  0.052632
3  Middle Eastern Restaurant  0.052632
4                   Pharmacy  0.052632


----Bayview Village----
                 venue  freq
0  Japanese Restaurant  0.25
1   Chinese Restaurant  0.25
2                 Bank  0.25
3                 Café  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                     venue      freq
0  Comfort Food 

## Function for merging and creating new df with top venues by neighborhood

In [261]:
merged_nf = pd.merge(left=COHM, right=GeoTableMerged, left_on='Neighborhood', right_on='Neighborhood')

fixed_columns = [merged_nf.columns[-1]] + list(merged_nf.columns[:-1])
merged_nf = merged_nf[fixed_columns]
fixed_columns = [merged_nf.columns[-1]] + list(merged_nf.columns[:-1])
merged_nf = merged_nf[fixed_columns]
fixed_columns = [merged_nf.columns[-1]] + list(merged_nf.columns[:-1])
merged_nf = merged_nf[fixed_columns]

In [262]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [265]:

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = COHM['Neighborhood']

for ind in np.arange(COHM.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(COHM.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(40)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Clothing Store,Lounge,Breakfast Spot,Skating Rink,Latin American Restaurant,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Coffee Shop,Pub,Pharmacy,Gym,Greek Restaurant,Discount Store,Department Store,Dessert Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Pharmacy,Chinese Restaurant,Bridal Shop,Shopping Mall,Sandwich Place,Diner,Restaurant,Middle Eastern Restaurant,Supermarket,Bank
3,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
4,"Bedford Park, Lawrence Manor East",Greek Restaurant,Sushi Restaurant,Juice Bar,Café,Thai Restaurant,Sandwich Place,Restaurant,Indian Restaurant,Coffee Shop,American Restaurant
5,Berczy Park,Park,Restaurant,Basketball Stadium,Beach,Fish Market,Beer Bar,Bistro,Farmers Market,Shopping Mall,Japanese Restaurant
6,"Birch Cliff, Cliffside West",College Stadium,Skating Rink,General Entertainment,Café,Escape Room,Electronics Store,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Dim Sum Restaurant
7,"Brockton, Parkdale Village, Exhibition Place",Gym,Coffee Shop,Burrito Place,Café,Stadium,Restaurant,Bar,Italian Restaurant,Bakery,Furniture / Home Store
8,"Business reply mail Processing Centre, South C...",Pizza Place,Auto Workshop,Garden Center,Garden,Light Rail Station,Fast Food Restaurant,Farmers Market,Comic Shop,Park,Recording Studio
9,"CN Tower, King and Spadina, Railway Lands, Har...",Plane,Airport Terminal,Harbor / Marina,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry,Bar,Boutique,Airport Food Court
