# Introduction/Business Problem

#### The problem we will solve is: which of the neighbourhoods of Toronto are more suitable to open there a restaurant?

#### To solve this problem we will use the dataframe from the previous ptactical work. The dataframe is the following

In [1]:
import pandas as pd 
import numpy as np
import requests
from pandas.io.json import json_normalize

csv_path='/resources/data/Toronto_neighbourhoods_2.csv'
df=pd.read_csv(csv_path, sep=",", encoding='cp1252')
df.drop(['Unnamed: 0'], axis=1,inplace=True)

#### The dataframe df contains all Toronto postcodes and  boroughs as well as Toronto neighbourhoods grouped by their postcodes. Two last columns of the dataframe contain latitudes and longitudes of these neighbourhood groups.

In [2]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### The dataframe df contains 103 rows and 5 columns

In [3]:
df.shape

(103, 5)

## Define Foursquare Credentials and Version

In [4]:
CLIENT_ID = 'OTQDBGJPMXHTIMHHNO5OPVE1VJBEOZ3NHSWWMRMUB1N0MW5H' # your Foursquare ID
CLIENT_SECRET = 'QHV23BYJTV2BATLWELNUMSXG1VTPU4UV2MAYW4MIZMP2UN55' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT=30

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OTQDBGJPMXHTIMHHNO5OPVE1VJBEOZ3NHSWWMRMUB1N0MW5H
CLIENT_SECRET:QHV23BYJTV2BATLWELNUMSXG1VTPU4UV2MAYW4MIZMP2UN55


#### For each group of neighbourhoods in df we will be interested in number of restaurants within 1000 meters from the geographical position of that group (which is given by two last columns of df in the corresponding row). So, first put radius = 1000 and search_query='restaurant'

In [5]:
radius=1000
search_query='restaurant'

### Add to df a column 'sum', which is exactly the number of restaurants within 1000 meters from a neighbourhood 

In [6]:
df['sum']=""
for k, row in df.iterrows():
    (lat,lng)= (row["Latitude"], row["Longitude"])
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, search_query, radius, LIMIT)
    results = requests.get(url).json()
    df.set_value(index=k,col='sum',value=len(results['response']['venues']))
    
# Url = Url.append({'Italian food': url}, ignore_index=True)


  


In [7]:
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,sum
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,1
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,5
3,M1G,Scarborough,Woburn,43.770992,-79.216917,3
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,8
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,7
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,5
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,3
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,1
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,4


### Cluster neighbourhoods by parameter 'sum'

In [8]:
from sklearn.cluster import KMeans

In [9]:
X = df[['sum','Longitude','Latitude']]
X.head()

Unnamed: 0,sum,Longitude,Latitude
0,0,-79.194353,43.806686
1,1,-79.160497,43.784535
2,5,-79.188711,43.763573
3,3,-79.216917,43.770992
4,8,-79.239476,43.773136


In [10]:
# set number of clusters
N = 3

# run k-means clustering
kmeans = KMeans(n_clusters=N, random_state=0).fit(X)

## The results of clustering

In [11]:
# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 1, 2, 2, 0, 0, 0, 2, 0, 0, 2,
       1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 2, 1, 1, 2, 2,
       0, 1, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 1,
       1, 1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 1, 2, 0, 0, 0, 0, 2, 2, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0], dtype=int32)

#### Add cluster labels as the last column to df

In [15]:
df["Cluster Labels"] = kmeans.labels_ 
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,sum,Cluster Labels
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0,0
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,1,0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,5,0
3,M1G,Scarborough,Woburn,43.770992,-79.216917,3,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,8,2
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,7,2
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,5,0
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,3,0
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,1,0
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,4,0
