# Classification of Montreal Metro stations

## Data

In [1]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!pip -q install geopy 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

# install and import folium library 
!pip -q install folium
import folium

from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

### List of stations and their geographical coordinates
The list of stations will be gathered from **Foursquare City Guide**. to this end, the fist step will be finding geographical coordinate of Montreal, QC:

In [2]:
address = 'Montreal'

geolocator = Nominatim(user_agent="myGeocoder")
location   = geolocator.geocode(address)
latitude   = location.latitude
longitude  = location.longitude

print('The geographical coordinate of Montreal are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Montreal are 45.4972159, -73.6103642.


In [3]:
CLIENT_ID = '3HBDPILEY41WD5VPJ15VOUYEUNGEXCT13VETJH5EKLD53E2W' # your Foursquare ID
CLIENT_SECRET = 'Q0UPMKP3FBLJVLMFUFDJ00KXD42FXNXWZM1KAITX0JEZXEC0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [4]:
LIMIT = 150 # limit of number of venues returned by Foursquare API

radius = 15000 # define radius

categoryId = '4bf58dd8d48988d1fd931735' #  Foursquare categoryId for metro stations (see: https://developer.foursquare.com/docs/build-with-foursquare/categories/)

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude,
    categoryId,
    radius, 
    LIMIT)


# make the GET request

results = requests.get(url).json()

#### Basic Cleanup

In [5]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [6]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# Rename headers
nearby_venues.rename(columns={"venue.name": "Name", "venue.location.lat": "Latitude", "venue.location.lng":"Longitude"},inplace = True)

stations_venues_df = nearby_venues[nearby_venues["Name"].str.startswith("STM Station")]
stations_venues_df.reset_index(drop=True, inplace=True)

# Removing 'STM' in front of name of sations for clearity
stations_venues_df['Name'] = stations_venues_df['Name'].str.replace('STM ','')

# The DataFrame will be:
stations_venues_df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Name,Latitude,Longitude
0,Station Guy-Concordia,45.494677,-73.580909
1,Station McGill,45.504365,-73.571575
2,Station Saint-Michel,45.559425,-73.599749
3,Station Université-de-Montréal,45.503466,-73.618103
4,Station de la Côte-des-Neiges,45.496377,-73.622564
5,Station Bonaventure,45.498205,-73.566667
6,Station Édouard-Montpetit,45.509822,-73.612816
7,Station de Vendôme,45.473825,-73.604328
8,Station de la Place-Saint-Henri,45.477212,-73.586523
9,Station Mont-Royal,45.52456,-73.581798


### Visulizing the locations of stations

In [7]:
# create map of Montreal using latitude and longitude values
map_montreal_metro = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, name in zip(stations_venues_df['Latitude'], stations_venues_df['Longitude'], stations_venues_df['Name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='#EF161E',
        fill=True,
        fill_color='#FFD702',
        fill_opacity=0.7,
        parse_html=False).add_to(map_montreal_metro)  

    
map_montreal_metro

In [8]:
map_montreal_metro.save('ini_map\index.html')

### Venues and categories
Foursquare API will be called again to explore venue categories surrounding each station. Venues can be categorized as residential, professional, shopping or leisure.

In [9]:
# create URL
categories_url = 'https://api.foursquare.com/v2/venues/categories?client_id={}&client_secret={}&v={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION)


# make the GET request

results = requests.get(categories_url).json()

len(results['response']['categories'])

10

In [10]:
categories_list = []
# Let's print only the top-level categories and their IDs and also add them to categories_list

def print_categories(categories, level=0, max_level=0):    
    if level>max_level: return
    out = ''
    out += '-'*level
    for category in categories:
        print(out + category['name'] + ' (' + category['id'] + ')')
        print_categories(category['categories'], level+1, max_level)
        categories_list.append((category['name'], category['id']))
        
print_categories(results['response']['categories'], 0, 0)

Arts & Entertainment (4d4b7104d754a06370d81259)
College & University (4d4b7105d754a06372d81259)
Event (4d4b7105d754a06373d81259)
Food (4d4b7105d754a06374d81259)
Nightlife Spot (4d4b7105d754a06376d81259)
Outdoors & Recreation (4d4b7105d754a06377d81259)
Professional & Other Places (4d4b7105d754a06375d81259)
Residence (4e67e38e036454776db1fb3a)
Shop & Service (4d4b7105d754a06378d81259)
Travel & Transport (4d4b7105d754a06379d81259)
