# Toronto Chinese Restaurant 

#### We need to install webscraping packages; bs4 and requests, and import BeautifulSoup.

#### and also pandas to clean the dataframe.

In [112]:
!pip install bs4
import pandas as pd
from bs4 import BeautifulSoup
import requests



Because we want to check if there are any odd data in the dataframe, thus we set the display function to call on all rows.

In [113]:
pd.set_option("display.max_rows", None)

## 1.  Getting the data.

In [114]:
url = "https://www.zipcodesonline.com/2020/06/postal-code-of-toronto-in-2020.html"
data = requests.get(url).text

In [115]:
soup = BeautifulSoup(data, 'html5lib')

#### There's two tables in the url.

#### We need the one that contains the columns postalcode, borough, neighbourhood.

In [116]:
tables = soup.find_all('table')
len(tables)

2

There are two tables in the website.

In [117]:
for index,table in enumerate(tables):
    if ("POSTAL CODE" in str(table)):
        table_index = index
print(table_index)

1


#### We now know that its the second table index 1 that contains the data. So now, we use pandas to create dataframe from the html.

In [118]:
toronto1 = pd.DataFrame(columns=["no","PostalCode","Borough","Neighbourhood"])

for row in tables[1].tbody.find_all("tr"):
    col = row.find_all("td")
    if (col != []):
        sl_no = col[0].text
        neighbourhood = col[1].text
        postalcode = col[2].text
        district = col[3].text
        toronto1 = toronto1.append({"no":sl_no, "PostalCode":postalcode, "Borough":district, "Neighbourhood":neighbourhood}, ignore_index=True)   

In [119]:
toronto1.head(10)

Unnamed: 0,no,PostalCode,Borough,Neighbourhood
0,\n \nSL. NO. \n,\n \nPOSTAL CODES\n,\n \nDISTRICT\n,\n \nNEIGHBOURHOOD\n
1,,,,
2,\n \n1\n,\n \nM5H\n,\n \nDowntown Toronto\n,\n \n Adelaide\n
3,\n \n2\n,\n \nM1V \n,\n \nScarborough \n,\n \nAgincourt North\n
4,\n \n3\n,\n \nM1S \n,\n \nScarborough\n,\n \nAgincourt\n
5,\n \n4\n,\n \nM9V \n,\n \nEtobicoke\n,\n \nAlbion Gardens\n
6,\n \n5\n,\n \nM8W \n,\n \nEtobicoke\n,\n \nAlderwood\n
7,\n \n6\n,\n \nM3H\n,\n \nNorth York\n,\n \nBathurst Manor\n
8,\n \n7\n,\n \nM5V\n,\n \nDowntown Toronto\n,\n \nBathurst Quay \n
9,\n \n8\n,\n \nM2K\n,\n \nNorth York\n,\n \nBayview Village\n


Now we notice that, the data is very uncleaned. The most obvious one - "\n " in every string. So now, we proceed to data cleaning.

<h2/> 2. Cleaning Data I. </h2>

#### We want to remove the first and second row because its irrelevant.

In [120]:
df = toronto1.iloc[2:,:]
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,no,PostalCode,Borough,Neighbourhood
0,\n \n1\n,\n \nM5H\n,\n \nDowntown Toronto\n,\n \n Adelaide\n
1,\n \n2\n,\n \nM1V \n,\n \nScarborough \n,\n \nAgincourt North\n
2,\n \n3\n,\n \nM1S \n,\n \nScarborough\n,\n \nAgincourt\n
3,\n \n4\n,\n \nM9V \n,\n \nEtobicoke\n,\n \nAlbion Gardens\n
4,\n \n5\n,\n \nM8W \n,\n \nEtobicoke\n,\n \nAlderwood\n


#### We need to remove the annoying \n and space in the data.

In [121]:
df = df.replace(('\n',' ','\xa0'),'', regex=True)
df.head()

Unnamed: 0,no,PostalCode,Borough,Neighbourhood
0,1,M5H,DowntownToronto,Adelaide
1,2,M1V,Scarborough,AgincourtNorth
2,3,M1S,Scarborough,Agincourt
3,4,M9V,Etobicoke,AlbionGardens
4,5,M8W,Etobicoke,Alderwood


#### Just checking what types of data each columns is.

In [122]:
df.columns 

Index(['no', 'PostalCode', 'Borough', 'Neighbourhood'], dtype='object')

#### Just renaming the postal code with adding space to make it neater. Also, we will sort this by the postal code and resetting the index.

In [123]:
# and renaming postal code
df.rename(columns={'PostalCode': 'Postal Code'}, inplace=True)
# removing no 
df.drop(columns=['no'],inplace=True)
# and also sorting based on postal code
df.sort_values(by=['Postal Code'],ignore_index = True,inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,Rouge
1,M1B,Scarborough,Malvern
2,M1C,Scarborough,HighlandCreek
3,M1C,Scarborough,RougeHill
4,M1C,Scarborough,PortUnion


In [124]:
df.shape

(205, 3)

## 3. Retrieving Latitude and Longitude.

We just take the given dataframe the website provided by Coursera.

In [125]:
hi = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv")
hi.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [126]:
hi.shape

(103, 3)

#### Because we want the neighbourhood to have the latitude and longitude we need to merge this two dataframe: hi and df. 

But, we notice that the length of two dataframe is different; hi is 103 and df is 205.

One obvious reason after inspecting hi is because each postal code only appear once in the column.

#### We now move on to check the the Postal Code column in df.

## 4. Cleaning Data II.

We slice the Postal Code column and change it to list so that we can we view it in the rawest form.

In [127]:
postcode = list(df['Postal Code'])
postcode 

['M1B',
 'M1B',
 'M1C',
 'M1C',
 'M1C',
 'M1E',
 'M1E',
 'M1E',
 'M1G',
 'M1H',
 'M1J',
 'M1K',
 'M1K',
 'M1K',
 'M1L',
 'M1L',
 'M1L',
 'M1M',
 'M1M',
 'M1N',
 'M1N',
 'M1P',
 'M1P',
 'M1P',
 'M1R',
 'M1R',
 'M1S',
 'M1T',
 'M1T',
 'M1T',
 'M1V',
 'M1V',
 'M1V',
 'M1V',
 'M1W',
 'M1W',
 'M1X',
 'M2H',
 'M2J',
 'M2J',
 'M2J',
 'M2K',
 'M2L',
 'M2L',
 'M2M',
 'M2M,M2N,M2R',
 'M2P',
 'M3A',
 'M3B',
 'M3H',
 'M3H',
 'M3H',
 'M3J',
 'M3K',
 'M3M,M3L,M3N',
 'M4A',
 'M4B',
 'M4B',
 'M4C',
 'M4E',
 'M4G',
 'M4H',
 'M4J',
 'M4J',
 'M4K',
 'M4K',
 'M4L',
 'M4L',
 'M4M',
 'M4N',
 'M4P',
 'M4R',
 'M4R',
 'M4S',
 'M4T',
 'M4T',
 'M4V',
 'M4V',
 'M4V',
 'M4V',
 'M4V',
 'M4W',
 'M4X',
 'M4X',
 'M4Y',
 'M5A',
 'M5A',
 'M5B',
 'M5B',
 'M5C',
 'M5E',
 'M5G',
 'M5H',
 'M5H',
 'M5J',
 'M5J',
 'M5J',
 'M5K',
 'M5K',
 'M5L',
 'M5L',
 'M5M',
 'M5M',
 'M5N',
 'M5P',
 'M5P',
 'M5R',
 'M5R',
 'M5R',
 'M5S',
 'M5S',
 'M5T',
 'M5T',
 'M5T',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5W',
 'M5X',


#### We notice that there are some oddities.
#### 1. The last five of the postal code contains '\xa0' in their string.

We solve the second problem first because that's the easier one.

Now, we have removed '\xa0' from all the strings. We just need to replace this in the Postal Code column in df.

In [129]:
df["Postal Code"] = o
df.tail()

Unnamed: 0,Postal Code,Borough,Neighbourhood
200,M9V,Etobicoke,MountOlive
201,M9V,Etobicoke,AlbionGardens
202,M9V,Etobicoke,Humbergate
203,M9W,Etobicoke,Northwest
204,M9W,Etobicoke,WestHumberClairville


Before anything, it's good to check that we have the df without the 'xa0'.

In [130]:
postcode = list(df['Postal Code'])
postcode 

['M1B',
 'M1B',
 'M1C',
 'M1C',
 'M1C',
 'M1E',
 'M1E',
 'M1E',
 'M1G',
 'M1H',
 'M1J',
 'M1K',
 'M1K',
 'M1K',
 'M1L',
 'M1L',
 'M1L',
 'M1M',
 'M1M',
 'M1N',
 'M1N',
 'M1P',
 'M1P',
 'M1P',
 'M1R',
 'M1R',
 'M1S',
 'M1T',
 'M1T',
 'M1T',
 'M1V',
 'M1V',
 'M1V',
 'M1V',
 'M1W',
 'M1W',
 'M1X',
 'M2H',
 'M2J',
 'M2J',
 'M2J',
 'M2K',
 'M2L',
 'M2L',
 'M2M',
 'M2M,M2N,M2R',
 'M2P',
 'M3A',
 'M3B',
 'M3H',
 'M3H',
 'M3H',
 'M3J',
 'M3K',
 'M3M,M3L,M3N',
 'M4A',
 'M4B',
 'M4B',
 'M4C',
 'M4E',
 'M4G',
 'M4H',
 'M4J',
 'M4J',
 'M4K',
 'M4K',
 'M4L',
 'M4L',
 'M4M',
 'M4N',
 'M4P',
 'M4R',
 'M4R',
 'M4S',
 'M4T',
 'M4T',
 'M4V',
 'M4V',
 'M4V',
 'M4V',
 'M4V',
 'M4W',
 'M4X',
 'M4X',
 'M4Y',
 'M5A',
 'M5A',
 'M5B',
 'M5B',
 'M5C',
 'M5E',
 'M5G',
 'M5H',
 'M5H',
 'M5J',
 'M5J',
 'M5J',
 'M5K',
 'M5K',
 'M5L',
 'M5L',
 'M5M',
 'M5M',
 'M5N',
 'M5P',
 'M5P',
 'M5R',
 'M5R',
 'M5R',
 'M5S',
 'M5S',
 'M5T',
 'M5T',
 'M5T',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5V',
 'M5W',
 'M5X',


As you can see, the last five do not have the 'xa0' string anymore. Thus, now we are good to proceed to the final problem of the Postal Code string.

### We first inspect how many rows have this multiple Postal Code.

In [131]:
o = []
for index, sublist in enumerate(df["Postal Code"]):
    if "," in sublist:
        o += [index]
        
#         df["Postal Code"]==sublist
moo = df.iloc[o,:]
moo
# moo = df.iloc[o,:]        
# moo

Unnamed: 0,Postal Code,Borough,Neighbourhood
45,"M2M,M2N,M2R",NorthYork,Willowdale
54,"M3M,M3L,M3N",NorthYork,Downsview
181,"M9B,M9A",Etobicoke,IslingtonAvenue


We only have three rows as so.

#### We solve this problem by putting them in another dataframe, df2 and making the same borough and neighbourhood for every postal code that appeared in the rows.

In [132]:
df2 = pd.DataFrame(columns = ['Postal Code', 'Borough', 'Neighbourhood'])

for index, sublist in enumerate(df["Postal Code"]):
    if "," in sublist:
        i = sublist.split(",")
        for ko in i:
            df2 = df2.append({"Postal Code":ko,
                    "Borough":df.iloc[index,1],
                    "Neighbourhood":df.iloc[index,2]},
                      ignore_index = True)

#  df["Postal Code"]==sublist
df2
# moo = df.iloc[o,:]        
# moo

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M2M,NorthYork,Willowdale
1,M2N,NorthYork,Willowdale
2,M2R,NorthYork,Willowdale
3,M3M,NorthYork,Downsview
4,M3L,NorthYork,Downsview
5,M3N,NorthYork,Downsview
6,M9B,Etobicoke,IslingtonAvenue
7,M9A,Etobicoke,IslingtonAvenue


In [133]:
df2.sort_values(by=['Postal Code'])
df2

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M2M,NorthYork,Willowdale
1,M2N,NorthYork,Willowdale
2,M2R,NorthYork,Willowdale
3,M3M,NorthYork,Downsview
4,M3L,NorthYork,Downsview
5,M3N,NorthYork,Downsview
6,M9B,Etobicoke,IslingtonAvenue
7,M9A,Etobicoke,IslingtonAvenue


We now want to remove the ones that appeared multiples in df.

In [134]:
for index, sublist in enumerate(df["Postal Code"]):
    if "," in sublist:
        df.drop([index], axis=0,inplace=True)
#         df["Postal Code"]==sublist
df.head

<bound method NDFrame.head of     Postal Code          Borough                             Neighbourhood
0           M1B      Scarborough                                     Rouge
1           M1B      Scarborough                                   Malvern
2           M1C      Scarborough                             HighlandCreek
3           M1C      Scarborough                                 RougeHill
4           M1C      Scarborough                                 PortUnion
5           M1E      Scarborough                               Morningside
6           M1E      Scarborough                                  WestHill
7           M1E      Scarborough                                 Guildwood
8           M1G      Scarborough                                    Woburn
9           M1H      Scarborough                                 Cedarbrae
10          M1J      Scarborough                        ScarboroughVillage
11          M1K      Scarborough                                   Ion

Now, we want to add df2 to df. and sort them by Postal Code.

In [135]:
df = df.append(df2)
df.sort_values(by=['Postal Code'],inplace=True,ignore_index =True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,Rouge
1,M1B,Scarborough,Malvern
2,M1C,Scarborough,HighlandCreek
3,M1C,Scarborough,RougeHill
4,M1C,Scarborough,PortUnion
5,M1E,Scarborough,Morningside
6,M1E,Scarborough,WestHill
7,M1E,Scarborough,Guildwood
8,M1G,Scarborough,Woburn
9,M1H,Scarborough,Cedarbrae


In [136]:
df.shape

(210, 3)

## 4. Combining different neighbourhoods in df with the same postal code.
### saving it in 'gu'.

#### Im just trying out how to do it using M1C postal code

In [137]:
hu = merge.loc[merge['Postal Code'] == 'M1C']
hu
len(hu)
for i in range(0,len(hu)+1):
    print(i)

0
1


Checking if any is null values

In [138]:
hi.isnull().any()

Postal Code    False
Latitude       False
Longitude      False
dtype: bool

In [139]:
wu = hu.loc[:,'Neighbourhood']
print(wu)

1    RougeHill, HighlandCreek, PortUnion
Name: Neighbourhood, dtype: object


In [140]:
full_str = ', '.join([str(elem) for elem in wu])
display(full_str)

# it worked!!

'RougeHill, HighlandCreek, PortUnion'

In [141]:
from collections import Iterable
def flatten(lis):
     for item in lis:
         if isinstance(item, Iterable) and not isinstance(item, str):
             for x in flatten(item):
                 yield x
         else:        
             yield item

  from collections import Iterable


#### Putting postal code under one variable

In [142]:
mylist = df[['Postal Code']]
mylist

Unnamed: 0,Postal Code
0,M1B
1,M1B
2,M1C
3,M1C
4,M1C
5,M1E
6,M1E
7,M1E
8,M1G
9,M1H


#### Doing it for all postal code 
#### creating a list for unique postal code and naming it 'full_str'

In [143]:
import numpy as np
post = list((np.unique(mylist)))
len(post)

102

In [144]:
full_str = []
for i in range(0, len(post)):
    hu = df.loc[df['Postal Code'] == post[i]]
    wu = hu.loc[:,'Neighbourhood']
    full_str.append(', '.join([str(elem) for elem in wu]))

len(full_str)

102

In [145]:
full_str

['Rouge, Malvern',
 'HighlandCreek, RougeHill, PortUnion',
 'Morningside, WestHill, Guildwood',
 'Woburn',
 'Cedarbrae',
 'ScarboroughVillage',
 'Ionview, KennedyPark, EastBirchmountPark',
 'Oakridge, Clairlea, GoldenMile',
 'Cliffcrest, Cliffside',
 'CliffsideWest, BirchCliff',
 'ScarboroughTownCentre, DorsetPark, WexfordHeights',
 'Wexford, Maryvale',
 'Agincourt',
 "TamO'Shanter, ClarksCorners, Sullivan",
 "L'AmoreauxEast, AgincourtNorth, SteelesEast, Milliken",
 "SteelesWest, L'AmoreauxWest",
 'UpperRouge',
 'HillcrestVillage',
 'Oriole, Fairview, HenryFarm',
 'BayviewVillage',
 'SilverHills, YorkMills',
 'Willowdale, Newtonbrook',
 'Willowdale',
 'YorkMillsWest',
 'Willowdale',
 'Parkwoods',
 'DonMills',
 'BathurstManor, WilsonHeights, DownsviewNorth',
 'NorthwoodPark',
 'Downsview',
 'Downsview',
 'Downsview',
 'Downsview',
 'VictoriaVillage',
 'ParkviewHill, WoodbineGardens,',
 'WoodbineHeights',
 'TheBeaches',
 'Leaside',
 'ThorncliffePark',
 'EastToronto, BroadviewNorth',
 'Ri

In [146]:
gu = df.drop_duplicates(subset =["Postal Code"], keep = "first")
print(gu.shape)
len(gu)

(102, 3)


102

Checking gu doesn't have duplicate of postal code.

In [147]:
gu

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,Rouge
2,M1C,Scarborough,HighlandCreek
5,M1E,Scarborough,Morningside
8,M1G,Scarborough,Woburn
9,M1H,Scarborough,Cedarbrae
10,M1J,Scarborough,ScarboroughVillage
11,M1K,Scarborough,Ionview
14,M1L,Scarborough,Oakridge
17,M1M,Scarborough,Cliffcrest
19,M1N,Scarborough,CliffsideWest


In [148]:
gu.reset_index(drop=True, inplace=True)

Assigning 'gu' Neighbourhood with full_str, that have all the neighbourhoods with the same postal code.

In [149]:
gu.loc[:,'Neighbourhood']= full_str
gu.iloc[-1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Postal Code                                  M9W
Borough                                Etobicoke
Neighbourhood    Northwest, WestHumberClairville
Name: 101, dtype: object

In [150]:
gu.reset_index(drop=True, inplace=True)
gu

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"HighlandCreek, RougeHill, PortUnion"
2,M1E,Scarborough,"Morningside, WestHill, Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,ScarboroughVillage
6,M1K,Scarborough,"Ionview, KennedyPark, EastBirchmountPark"
7,M1L,Scarborough,"Oakridge, Clairlea, GoldenMile"
8,M1M,Scarborough,"Cliffcrest, Cliffside"
9,M1N,Scarborough,"CliffsideWest, BirchCliff"


In [151]:
gu.shape

(102, 3)

## 5. Merging 'gu' and 'hi'

In [152]:
hi.shape

(103, 3)

We know that hi is bigger than gu by one rows. But we're gonna merge by right(hi) anyways, so that we will know which Postal Code gu doesn't have.

In [153]:
merge = gu.merge(hi, on="Postal Code", how="right")

In [154]:
merge

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"HighlandCreek, RougeHill, PortUnion",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, WestHill, Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,ScarboroughVillage,43.744734,-79.239476
6,M1K,Scarborough,"Ionview, KennedyPark, EastBirchmountPark",43.727929,-79.262029
7,M1L,Scarborough,"Oakridge, Clairlea, GoldenMile",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside",43.716316,-79.239476
9,M1N,Scarborough,"CliffsideWest, BirchCliff",43.692657,-79.264848


In [155]:
merge.isnull().values.any()
merge.isnull().sum()

Postal Code      0
Borough          1
Neighbourhood    1
Latitude         0
Longitude        0
dtype: int64

In [156]:
null_data = merge[merge.isnull().any(axis=1)]
null_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
27,M3C,,,43.7259,-79.340923


Now, we know that gu doesn't have M3C postcode. That's just for our analysis.

We're just going to remove the null rows because we don't know what neighbourhood and borough is that. Plus, it's only one row.

In [157]:
merge.dropna(inplace=True)
merge.reset_index(drop=True, inplace=True)
merge

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"HighlandCreek, RougeHill, PortUnion",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, WestHill, Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,ScarboroughVillage,43.744734,-79.239476
6,M1K,Scarborough,"Ionview, KennedyPark, EastBirchmountPark",43.727929,-79.262029
7,M1L,Scarborough,"Oakridge, Clairlea, GoldenMile",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside",43.716316,-79.239476
9,M1N,Scarborough,"CliffsideWest, BirchCliff",43.692657,-79.264848


In [158]:
print('The dataframe has {} boroughs and {} neighbourhoods.'.format(
        len(merge['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 210 neighbourhoods.


### Naming it toronto.

In [159]:
toronto = merge

## 6. Exploring Neighbourhoods

<h3>FourSquare Credentials</h3>

In [56]:
CLIENT_ID = 'XSSIZCXDKP53HRAXUM3BKFR0DBUGHSXAT4TKR1R4OKH3YHPF' # your Foursquare ID
CLIENT_SECRET = 'RHEUGTCM3J2A2ZXXAMWL4CD34V5NQTK5ZA4L3LVDII2LB1QQ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentials:
CLIENT_ID: XSSIZCXDKP53HRAXUM3BKFR0DBUGHSXAT4TKR1R4OKH3YHPF
CLIENT_SECRET:RHEUGTCM3J2A2ZXXAMWL4CD34V5NQTK5ZA4L3LVDII2LB1QQ


### Finding the nearest venue.
### Trying it on M1B Postal Code.

In [57]:
neighbourhood_latitude = toronto.loc[0, 'Latitude'] # neighbourhood latitude value
neighbourhood_longitude = toronto.loc[0, 'Longitude'] # neighbourhood longitude value

neighbourhood_name = toronto.loc[0, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.8066863, -79.1943534.


In [58]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
 # create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)

#### Defining url for M1B postal code example

In [59]:
import requests

In [60]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '610bd6236207a918c5dc0452'},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 1,
  'suggestedBounds': {'ne': {'lat': 43.811186304500005,
    'lng': -79.1881295807304},
   'sw': {'lat': 43.8021862955, 'lng': -79.20057721926959}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bb6b9446edc76b0d771311c',
       'name': 'Wendy’s',
       'location': {'crossStreet': 'Morningside & Sheppard',
        'lat': 43.80744841934756,
        'lng': -79.19905558052072,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.80744841934756,
          'lng': -79.19905558052072}],
        'distance': 387,
        'cc': 'CA',
        'city': 'Toronto',

#### Defining category making function

In [61]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Importing json normalize package

In [160]:
from pandas.io.json import json_normalize # transform JSON file into a pandas dataframe

In [161]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wendy’s,Fast Food Restaurant,43.807448,-79.199056


In [64]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

1 venues were returned by Foursquare.


<h3> Finding nearby venues doing it for all </h3>

In [162]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [163]:
toronto_venues = getNearbyVenues(names=toronto['Neighbourhood'],
                                   latitudes=toronto['Latitude'], 
                                   longitudes=toronto['Longitude']
                                  )
print(toronto_venues[0:5])

Rouge, Malvern
HighlandCreek, RougeHill, PortUnion
Morningside, WestHill, Guildwood
Woburn
Cedarbrae
ScarboroughVillage
Ionview, KennedyPark, EastBirchmountPark
Oakridge, Clairlea, GoldenMile
Cliffcrest, Cliffside
CliffsideWest, BirchCliff
ScarboroughTownCentre, DorsetPark, WexfordHeights
Wexford, Maryvale
Agincourt
TamO'Shanter, ClarksCorners, Sullivan
L'AmoreauxEast, AgincourtNorth, SteelesEast, Milliken
SteelesWest, L'AmoreauxWest
UpperRouge
HillcrestVillage
Oriole, Fairview, HenryFarm
BayviewVillage
SilverHills, YorkMills
Willowdale, Newtonbrook
Willowdale
YorkMillsWest
Willowdale
Parkwoods
DonMills
BathurstManor, WilsonHeights, DownsviewNorth
NorthwoodPark
Downsview
Downsview
Downsview
Downsview
VictoriaVillage
ParkviewHill, WoodbineGardens,
WoodbineHeights
TheBeaches
Leaside
ThorncliffePark
EastToronto, BroadviewNorth
Riverdale, TheDanforthWest
TheBeachesWest, IndiaBazaar
StudioDistrict
LawrencePark
DavisvilleNorth
LawrencePark, NorthTorontoWest
Davisville
MoorePark, SummerhillEa

In [165]:
print(toronto_venues.shape)
toronto_venues.head()

(2104, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"HighlandCreek, RougeHill, PortUnion",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"HighlandCreek, RougeHill, PortUnion",43.784535,-79.160497,Affordable Toronto Movers,43.787919,-79.162977,Moving Target
3,"Morningside, WestHill, Guildwood",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,"Morningside, WestHill, Guildwood",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


### Visualizing postal code in Toronto in a map

In [210]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)


# add markers to map
for lat, lng, label in zip(toronto['Latitude'], toronto['Longitude'], toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## 7. Finding chinese restaurant in Toronto. 

In [166]:
k = []
oop = toronto_venues['Venue Category']
for n,i in enumerate(oop):
   if i=='Chinese Restaurant':
    k += [n]  

chinese = toronto_venues.iloc[k,:]
chinese

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
49,"ScarboroughTownCentre, DorsetPark, WexfordHeights",43.75741,-79.273304,Kim Kim restaurant,43.753833,-79.276611,Chinese Restaurant
69,"TamO'Shanter, ClarksCorners, Sullivan",43.781638,-79.304302,The Royal Chinese Restaurant 避風塘小炒,43.780505,-79.298844,Chinese Restaurant
82,"SteelesWest, L'AmoreauxWest",43.799525,-79.318389,Mr Congee Chinese Cuisine 龍粥記,43.798879,-79.318335,Chinese Restaurant
155,"Oriole, Fairview, HenryFarm",43.778517,-79.346556,Szechuan Express,43.777966,-79.343316,Chinese Restaurant
164,BayviewVillage,43.786947,-79.385975,Sun Star Chinese Cuisine 翠景小炒,43.787914,-79.381234,Chinese Restaurant
460,"LawrencePark, NorthTorontoWest",43.715383,-79.405678,C'est Bon,43.716785,-79.400406,Chinese Restaurant
557,"Cabbagetown, St.JamesTown",43.667967,-79.367675,China Gourmet,43.66418,-79.368359,Chinese Restaurant
768,"GardenDistrict, Ryerson",43.657162,-79.378937,GB Hand-Pulled Noodles,43.656434,-79.383783,Chinese Restaurant
1119,"UnionStation, TorontoIslands, HarbourfrontEast",43.640816,-79.381752,Pearl Harbourfront,43.638157,-79.380688,Chinese Restaurant
1297,"TorontoDominionCentre, DesignExchange",43.647177,-79.381576,Szechuan Express,43.646973,-79.379549,Chinese Restaurant


In [167]:
chinese.shape

(11, 7)

Now, we will map them using the folium package.

In [168]:
# create map of Toronto using latitude and longitude values
chinese_map = folium.Map(location=[latitude, longitude], zoom_start=11)


# add markers to map
for lat, lng, label in zip(chinese['Neighbourhood Latitude'], chinese['Neighbourhood Longitude'], chinese['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(chinese_map)  
    
chinese_map

In [172]:
chinese['Neighbourhood']

49      ScarboroughTownCentre, DorsetPark, WexfordHeights
69                  TamO'Shanter, ClarksCorners, Sullivan
82                            SteelesWest, L'AmoreauxWest
155                           Oriole, Fairview, HenryFarm
164                                        BayviewVillage
460                        LawrencePark, NorthTorontoWest
557                             Cabbagetown, St.JamesTown
768                               GardenDistrict, Ryerson
1119       UnionStation, TorontoIslands, HarbourfrontEast
1297                TorontoDominionCentre, DesignExchange
2080                                            Westmount
Name: Neighbourhood, dtype: object

In [177]:
o = 0
for i in chinese['Neighbourhood']:
    o += 1
    for u in i:
        o += u.count(',')

print(f'There is {o} neighbourhoods or {len(chinese)} boroughs that have Chinese restaurants')

There is 24 neighbourhoods or 11 boroughs that have Chinese restaurants


In [194]:
hue = list(chinese['Neighbourhood'])

In [208]:
possible_chinese = toronto[~toronto.Neighbourhood.isin(hue)]
possible_chinese.reset_index(inplace=True)
possible_chinese

Unnamed: 0,index,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,1,M1C,Scarborough,"HighlandCreek, RougeHill, PortUnion",43.784535,-79.160497
2,2,M1E,Scarborough,"Morningside, WestHill, Guildwood",43.763573,-79.188711
3,3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,5,M1J,Scarborough,ScarboroughVillage,43.744734,-79.239476
6,6,M1K,Scarborough,"Ionview, KennedyPark, EastBirchmountPark",43.727929,-79.262029
7,7,M1L,Scarborough,"Oakridge, Clairlea, GoldenMile",43.711112,-79.284577
8,8,M1M,Scarborough,"Cliffcrest, Cliffside",43.716316,-79.239476
9,9,M1N,Scarborough,"CliffsideWest, BirchCliff",43.692657,-79.264848


In [217]:
o = 0
for i in possible_chinese['Neighbourhood']:
    o += 1
    for u in i:
        o += u.count(',')

print(f'There is {o} neighbourhoods or {len(possible_chinese)} boroughs that are possible to open a Chinese restaurants.')

There is 190 neighbourhoods or 91 boroughs that are possible to open a Chinese restaurants.


In [216]:
# create map of Toronto using latitude and longitude values
chinese_map = folium.Map(location=[latitude, longitude], zoom_start=11)


# add markers to map
for lat, lng, label in zip(chinese['Neighbourhood Latitude'], chinese['Neighbourhood Longitude'], chinese['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=False).add_to(chinese_map)  
    
for lat, lng, label in zip(possible_chinese['Latitude'], possible_chinese['Longitude'], possible_chinese['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7,
        parse_html=False).add_to(chinese_map) 
    
chinese_map