In [2]:
!conda install beautifulsoup4 -y

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - beautifulsoup4


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    beautifulsoup4-4.9.0       |           py36_0         167 KB
    ca-certificates-2020.1.1   |                0         125 KB
    certifi-2020.4.5.1         |           py36_0         155 KB
    openssl-1.1.1g             |       h7b6447c_0         2.5 MB
    soupsieve-2.0              |             py_0          33 KB
    ------------------------------------------------------------
                                           Total:         3.0 MB

The following NEW packages will be INSTALLED:

  beautifulsoup4     pkgs/main/linux-64::beautifulsoup4-4.9.0-py36_0
  soupsieve          pkgs/main/noarch::soupsieve-2.0-py_0

The followin

## Importing the necessary libraries

In [3]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

### Checking for the filters

In [4]:
import requests

wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_random_wikipedia_page=requests.get(wikipedia_link)
markup=raw_random_wikipedia_page.text
soupy = BeautifulSoup(markup, 'html.parser')
soup=soupy.find_all('tbody')[0]
table=soup.find_all('th')
table                      

[<th>Postal Code
 </th>,
 <th>Borough
 </th>,
 <th>Neighborhood
 </th>]

### Creating blank lists and assigning values to them

In [5]:
poplu = []
bor = []
neigh = []

for data in soup.find_all('tr')[1:]:
    #soup.find_all('td')[0]
    poplu.append(data.find_all('td')[0].text)
    bor.append(data.find_all('td')[1].text)
    neigh.append(data.find_all('td')[2].text)
    

### Assigning the lists into a dataframe

In [6]:
poplu
dfObj = pd.DataFrame({'PostalCode': poplu, 'Borough':bor, 'Neighborhood':neigh})
dfObj.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


### Cleaning the '\n' from the dataframe, assigning np.nan to the Neighborhood column and stripping the columns of whitespaces

In [7]:
dfObj = dfObj.replace('\n',' ', regex=True)
dfObj['Neighborhood'] = dfObj['Neighborhood'].replace(' ', np.nan)
dfObj[dfObj.columns] = dfObj.apply(lambda x: x.str.strip())

In [8]:
dfObj.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Removing the 'Not Assigned' Boroughs from the dataframe

In [9]:
dfObj = dfObj[dfObj.Borough != 'Not assigned']

### Checking for missing values

In [10]:
dfObj['PostalCode'].unique

<bound method Series.unique of 2      M3A
3      M4A
4      M5A
5      M6A
6      M7A
      ... 
160    M8X
165    M4Y
168    M7Y
169    M8Y
178    M8Z
Name: PostalCode, Length: 103, dtype: object>

In [11]:
print(dfObj.shape)
print(dfObj.head())

(103, 3)
  PostalCode           Borough                                 Neighborhood
2        M3A        North York                                    Parkwoods
3        M4A        North York                             Victoria Village
4        M5A  Downtown Toronto                    Regent Park, Harbourfront
5        M6A        North York             Lawrence Manor, Lawrence Heights
6        M7A  Downtown Toronto  Queen's Park, Ontario Provincial Government


## Downloading the Geospatial Data

In [16]:
!wget -q -O 'geospatial_data.csv' https://cocl.us/Geospatial_data

In [17]:
df_n = pd.read_csv("geospatial_data.csv")

In [18]:
df_n.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merging both the Dataframes

In [19]:
df_mer = pd.merge(dfObj, df_n, how='inner' , left_on='PostalCode', right_on='Postal Code')

In [20]:
df_mer.drop('Postal Code', axis =1, inplace=True)

In [21]:
df_mer.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Installing and importing Folium

In [28]:

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library
from folium import plugins

print('Folium installed')
print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Folium installed
Libraries imported.


In [25]:
CLIENT_ID = 'QRMSCY4AGE0FA4NPLVWQXP0SEGSTS4GEKKEBSZKB1DVUHQXC' # your Foursquare ID
CLIENT_SECRET = '2IPN11JJ55OHJVBM0L2ZQDZGKJIJ3GTTLV2FAJRKNZWDKQWZ' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QRMSCY4AGE0FA4NPLVWQXP0SEGSTS4GEKKEBSZKB1DVUHQXC
CLIENT_SECRET:2IPN11JJ55OHJVBM0L2ZQDZGKJIJ3GTTLV2FAJRKNZWDKQWZ


## Visualizing the data on Map

In [29]:
# creating map instance
map = folium.Map(location=[37.77, -100.42], zoom_start=4, tiles='Stamen Terrain')

# instantiate a mark cluster object for the incidents in the dataframe
markers_C = plugins.MarkerCluster().add_to(map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label in zip(df_mer.Latitude, df_mer.Longitude, df_mer.PostalCode):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=folium.Popup(label,max_width=500),
    ).add_to(markers_C)

# display map
map