# IBM Data Science Professional Certificate Capstone Project

This Jupyter Notebook will be used for IBM Data Science Professional Capstone Project on Coursera. 

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import numpy as np
print('Hello Capstone Project Course')

Hello Capstone Project Course


## Week-3 / Part-1
It is capstone project week-3 part-1. In this part, 
- Wikipedia page scraped
- Dataframe created
- 'Not assigned' values in Borough column dropped
- Neighborhood names belong to same postal code seperated with commas in the same row
- .shape method used to see number of rows and columns

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
# scraping the table and turning it into pandas df
result = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(result.content, 'html.parser')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))[0]
df = pd.DataFrame(df)
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge


In [4]:
# renaming the columns
df.columns = ['PostalCode', 'Borough', 'Neighborhood'] 

# dropping 'Not assigned' values
df = df[df.Borough != 'Not assigned']

In [5]:
# function to replace '/' with comma
def replace_with_comma(text):
    text = text.replace(' /',',')
    return text

In [6]:
# avoiding SettingWithCopyWarning 
df = df.copy()

# replacing '/' with comma
df['Neighborhood'] = df['Neighborhood'].apply(replace_with_comma)

In [7]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [8]:
# checking if we have NaN values left in Neighborhood column
df['Neighborhood'].isnull().value_counts()

False    103
Name: Neighborhood, dtype: int64

In [9]:
df.shape

(103, 3)

## Week-3 / Part-2
It is capstone project week-3 part-2. In this part,
- Since I wasn't able to use geocoder package, I used Geospatial_Coordinates.csv file to get the locations of each postal code
- I've read the csv file into pandas dataframe and changed the column names in order to merge it to earlier dataframe in Part-1
- I've merged dataframes to obtain following 'postal_code_df' dataframe

In [10]:
# read csv file into pandas dataframe
coordinates = pd.read_csv('Geospatial_Coordinates.csv')
coordinates = pd.DataFrame(coordinates)

# rename column names
coordinates.columns = ['PostalCode','Latitude','Longitude']

# merge dataframes
postal_code_df = df.merge(coordinates, how = 'left', on='PostalCode')
postal_code_df.shape

(103, 5)

In [11]:
postal_code_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## Week-3 / Part-3
It is capstone project week-3 part-3. In this part,

In [27]:
# get credentials from .env file
from dotenv import load_dotenv
import os

load_dotenv()
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
VERSION = os.getenv('VERSION')

__Create a map of Toronto with neighborhoods.__

In [13]:
import folium

# Toronto latitude and longitude values
latitude = 43.651070
longitude = -79.347015

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood, postalcode in zip(postal_code_df['Latitude'], postal_code_df['Longitude'], postal_code_df['Borough'], postal_code_df['Neighborhood'], postal_code_df['PostalCode']):
    label = '{},{}'.format(neighborhood, postalcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

__Let's explore the first neighborhood in our dataframe.__

In [14]:
# get the neihgborhood's name
neighborhood_name = postal_code_df.loc[0,'Neighborhood']
neighborhood_name

'Parkwoods'

In [15]:
# get the neighborhoods latitude and longitude values
neighborhood_lat = postal_code_df.loc[0,'Latitude']
neighborhood_lon = postal_code_df.loc[0,'Longitude']

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_lat, 
                                                               neighborhood_lon))


Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


__Now, let's get the top 100 venues that are in Parkwoods within a radius of 500 meters.__

In [30]:
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            neighborhood_lat, 
            neighborhood_lon, 
            radius, 
            LIMIT)

In [31]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e879457aba297001ba50b10'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 2,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c