## Part 1
### Import Libraries and scrape data from wikipedia url.

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
url ='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text

In [3]:
soup = BeautifulSoup(source, 'xml')

In [4]:
table=soup.find('table')

In [5]:
columns =['Postalcode', 'Borough', 'Neighbourhood']
df = pd.DataFrame(columns=columns)

In [6]:
for tr_cell in table.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==3:
        df.loc[len(df)] = row_data

In [7]:
df.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Clean the data.

In [8]:
df = df[~df['Borough'].isnull()]
df.drop(df[df.Borough == 'Not assigned'].index, inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Rename column "Neighbourhood" to "Neighborhood".

In [9]:
df.rename(columns={'Neighbourhood':'Neighborhood'}, inplace=True)
df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [10]:
df.shape

(103, 3)

## Part 3
### Get the latitude and longitude from CSV file.

In [11]:
toro_geo = pd.read_csv('http://cocl.us/Geospatial_data')

In [12]:
toro_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Rename column "Postal Code" to "Postalcode".

In [13]:
toro_geo.rename(columns={'Postal Code':'Postalcode'}, inplace=True)
geo_merge = pd.merge(toro_geo, df, on='Postalcode')

### Merged Latitude and Longitude to dataframe. "Postalcode" was removed.

In [14]:
toronto_df = geo_merge[['Borough', 'Neighborhood', 'Latitude', 'Longitude']]
toronto_df.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476


## Part 2
### Import libaries and use Foursquare API to segement and cluster neighborhoods.

In [15]:
import requests
import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import random

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

from IPython.display import Image
from IPython.core.display import HTML

import json
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs

!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0          conda-forge
    geopy:           

### Define Foursquare Credentials and Version.

In [16]:
CLIENT_ID = 'DPR2TRJDTRBYCTN4YIRLNJ4F3GRM3IHAWXWCB4MQGXDU2H5E'
CLIENT_SECRET = 'ZVGOPN1OCEFCGB5RDDUQXBGE11AXDT2OHB0VNGFUAZUO3COZ'
VERSION = '20200701'
LIMIT = 30
print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET: ' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: DPR2TRJDTRBYCTN4YIRLNJ4F3GRM3IHAWXWCB4MQGXDU2H5E
CLIENT_SECRET: ZVGOPN1OCEFCGB5RDDUQXBGE11AXDT2OHB0VNGFUAZUO3COZ


### Use Foursquare to get the latitude and longitude of Toronto.

In [17]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordiate of Toronto are {}, {}.'.format(latitude, longitude))

The geographical coordiate of Toronto are 43.6534817, -79.3839347.


In [18]:
toronto_df

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476
5,Scarborough,Scarborough Village,43.744734,-79.239476
6,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


### Folium map of Tornoto.

In [19]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(
        toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define Foursquare Credentials and Version.

In [20]:
CLIENT_ID = 'DPR2TRJDTRBYCTN4YIRLNJ4F3GRM3IHAWXWCB4MQGXDU2H5E'
CLIENT_SECRET = 'ZVGOPN1OCEFCGB5RDDUQXBGE11AXDT2OHB0VNGFUAZUO3COZ'
VERSION = '20200629'
LIMIT =30
print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET: ' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: DPR2TRJDTRBYCTN4YIRLNJ4F3GRM3IHAWXWCB4MQGXDU2H5E
CLIENT_SECRET: ZVGOPN1OCEFCGB5RDDUQXBGE11AXDT2OHB0VNGFUAZUO3COZ


### Explore the 75th neighborhood in the dataframe, "Christie".

In [21]:
toronto_df.loc[75, 'Neighborhood']

'Christie'

### Get "Christie" latitude and longitude values.

In [22]:
neighborhood_latitude = toronto_df.loc[75, 'Latitude']
neighborhood_longitude = toronto_df.loc[75, 'Longitude']

neighborhood_name = toronto_df.loc[75, 'Neighborhood']

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name,
                                                       neighborhood_latitude,
                                                       neighborhood_longitude))    

Latitude and longitude values of Christie are 43.669542, -79.4225637.


In [23]:
latitude = 43.669542

longitude = -79.4225637

### Create the GET request URL. Then get the top 100 venues that are in "Christie" within a radius of 500 meters.

In [24]:
LIMIT = 100

radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=DPR2TRJDTRBYCTN4YIRLNJ4F3GRM3IHAWXWCB4MQGXDU2H5E&client_secret=ZVGOPN1OCEFCGB5RDDUQXBGE11AXDT2OHB0VNGFUAZUO3COZ&ll=43.669542,-79.4225637&v=20200629&radius=500&limit=100'

### Send the GET request to examine results.

In [25]:
results = requests.get(url).json()

In [26]:
results

{'meta': {'code': 200, 'requestId': '5efc8d236e5c0a46488afe98'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Christie Pits',
  'headerFullLocation': 'Christie Pits, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 18,
  'suggestedBounds': {'ne': {'lat': 43.6740420045, 'lng': -79.41635411972038},
   'sw': {'lat': 43.6650419955, 'lng': -79.42877328027961}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4adcfd7cf964a5203e6321e3',
       'name': 'Fiesta Farms',
       'location': {'address': '200 Christie St',
        'crossStreet': 'at Essex St',
        'lat': 43.66847077052224,
        'lng': -79.42048512748114,
        'labeledLatLngs': [{'label': 'd

### Function that extracts the category of the venues.

In [27]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Clean the json and structure into a pandas dataframe.

In [28]:
venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name','venue.categories', 'venue.location.lat', 'venue.location.lng'] 
nearby_venues =nearby_venues.loc[:, filtered_columns]


nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)


nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Fiesta Farms,Grocery Store,43.668471,-79.420485
1,Contra Cafe,Café,43.669107,-79.426105
2,Starbucks,Coffee Shop,43.67153,-79.4214
3,Vinny’s Panini,Italian Restaurant,43.670679,-79.426148
4,Scout and Cash Caffe,Café,43.66736,-79.419938


### Print the number of venues returned in "Christie" by Foursquare.

In [29]:
print('{} venues were returned for "Christie" by Foursquare.'.format(nearby_venues.shape[0]))

18 venues were returned for "Christie" by Foursquare.


In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={}&radius={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        lng,
        radius,
        LIMIT) 
    
    results = requests.get(url).json()['response']['groups'][0]['items']
    
    venues_list.append([(
        name,
        lat,
        lng,
        v['venue']['name'],
        v['venue']['location']['lat'],
        v['venue']['location']['lng'],
        v['venue']['catagories'][0]['name']) for v in results])
    
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood',
                            'Neighborhood Latitude',
                            'Neighborhood Longitude',
                            'Venue',
                            'Venue Latitude',
                            'Venue Longitude',
                            'Venue Category']
   
    return(nearby_venues)

### List of nearby_venues near "Christie".

In [31]:
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Fiesta Farms,Grocery Store,43.668471,-79.420485
1,Contra Cafe,Café,43.669107,-79.426105
2,Starbucks,Coffee Shop,43.67153,-79.4214
3,Vinny’s Panini,Italian Restaurant,43.670679,-79.426148
4,Scout and Cash Caffe,Café,43.66736,-79.419938
5,Universal Grill,Diner,43.67055,-79.426541
6,Actinolite,Restaurant,43.667858,-79.428054
7,Stubbe Chocolates,Candy Store,43.671566,-79.421289
8,Faema Caffe,Café,43.671046,-79.419297
9,Loblaws,Grocery Store,43.671657,-79.421364


### List how many different catagories were returned from "Christie" using the groupby method.

In [32]:
nearby_venues.groupby('categories').count()

Unnamed: 0_level_0,name,lat,lng
categories,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Athletics & Sports,1,1,1
Baby Store,1,1,1
Café,3,3,3
Candy Store,1,1,1
Coffee Shop,2,2,2
Diner,1,1,1
Grocery Store,4,4,4
Italian Restaurant,1,1,1
Nightclub,1,1,1
Park,2,2,2


### Folium map showing the venues around "Christie".

In [33]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=16)

folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Christie',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

for lat, lng, label in zip(nearby_venues.lat, nearby_venues.lng, nearby_venues.name):
    folium.features.CircleMarker(
    [lat, lng],
    radius=5,
    color='blue',
    popup=label,
    fill = True,
    fill_color='blue',
    fill_opacity=0.6
    ).add_to(venues_map)

In [34]:
venues_map

In [35]:
nearby_venues.shape

(18, 4)