<h1>Finding a NW European Business Destination</h1>
<h2>Part 1 - create df, wrangle data and create city clusters</h2>
<h3>Import the numpy, pandas and requests libraries</h3>

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import requests

import json # library to handle JSON files
from pandas import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# Import k-means from clustering stage
from sklearn.cluster import KMeans


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

<h3>Install software packages on the server and import libraries</h3>

In [2]:
# install folium and geop packages and import folium rendering library
!conda install -c conda-forge folium=0.5.0 --yes
!conda install -c conda-forge geopy --yes
import folium

# install beautiful soup 4 and lxml packages and import the beautiful soup library
!conda install -c conda-forge beautifulsoup4 --yes
!conda install -c conda-forge lxml --yes
from bs4 import BeautifulSoup

Collecting package metadata (current_repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    brotlipy-0.7.0             |py36h8c4c3a4_1000         346 KB  conda-forge
    chardet-3.0.4              |py36h9f0ad1d_1006         188 KB  conda-forge
    cryptography-2.9.2         |   py36h45558ae_0         613 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    pandas-1.0.3               |   py36h83

In [3]:
CLIENT_ID = 'ZSOHGJWMB2AZHOIVGX1H14EL5ACXAYVRPQBWJ3ZNFMHJWR50' # your Foursquare ID
CLIENT_SECRET = 'SBK4AO5VRB1PL5VDOUNP0BHF3F1J3JGVUNHR1J4P2QIIWPM1' # your Foursquare Secret
VERSION = '20180604'
radius = 3000

In [None]:
# install geocoder package and import the geocoder library
!conda install -c conda-forge geocoder --yes
import geocoder

<h3>Create a function that calls the Foursquare API to generate a list of venues situated within a <b>3000m</b> radius of each city center.</h3>

In [5]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# function that extracts the category of the venue for each of the target Cities
def getNearbyVenues(city, latitudes, longitudes, radius=radius):
    
    venues_list=[]
    for city, lat, lng in zip(city, latitudes, longitudes):
        print(city)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            city, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

<h3>Create a cities list and call on the geopy geocoder to populate latitude and longitudes for each city</h3>

In [6]:
from geopy.geocoders import Nominatim

# create a tuple setting out the target cities that we want to compare
#cities = ("Helsinki", "Oslo", "Copenhagen", "Amsterdam", "Stockholm")


# set the latitude and longitude for the business districts in each of the cities that we are comparing
cities = {
    1 : {
        "country" : "Finland",
        "city" : "Helsinki",
        "latitude" : 60.1675,
        "longitude" : 24.9311
    },
    2 : {
        "country" : "Norway",
        "city" : "Oslo",
        "latitude" : 60.472,
        "longitude" : 8.4689
    },
    3 : {
        "country" : "Denmark",
        "city" : "Copenhagen",
        "latitude" : 55.6333,
        "longitude" : 12.5782
    },
    4 : {
        "country" : "Netherlands",
        "city" : "Amsterdam",
        "latitude" : 52.3383,
        "longitude" : 4.8729
    },
    5 : {
        "country" : "Sweden",
        "city" : "Stockholm",
        "latitude" : 59.3293,
        "longitude" : 18.0686
    }
}
for city in cities:
    address = '{}, {}'.format(cities[city]['city'],cities[city]['country'])
    geolocator = Nominatim(user_agent="explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    city_venues = getNearbyVenues(city=cities[city]['city'],
                                   latitudes=cities[city]['latitude'],
                                   longitudes=cities[city]['longitude']
                                  )
    print('The geographical coordinates of {} are {}, {}.'.format(cities[city]['city'], latitude, longitude))



TypeError: zip argument #2 must support iteration

<h3>Call the function and create a venues list for each city</h3>

In [10]:
city = 'Stockholm'
lat = 59.3293
lon = 18.0686
city_venues = getNearbyVenues(city=city,
                                   latitudes=lat,
                                   longitudes=lon
                                  )

TypeError: zip argument #2 must support iteration

Create a dataframe (neighborhoods) and set the column names as <b>PostCode</b>, <b>Borough</b> and <b>Neighborhood</b>

In [None]:
# Define list of column names to be used in the neighborhoods dataframe
column_names = ['PostCode', 'Borough', 'Neighborhood'] 

# Instantiate the dataframe and set the column name
neighborhoods = pd.DataFrame(columns=column_names)

Import beautiful soup library and assign the target Wikipedia url. Scrape the post code data from the Wikipedia table.

In [None]:
# Set the target url and extract the html text from the wiki url
wiki_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

# Create the Beautifulsoup object and assign to variable soup
soup = BeautifulSoup(wiki_url,'lxml')

# Find the post code table (wikitable) and assign only those elements belonging to the table to a variable - pcode_tbl
pcode_tbl = soup.find('table', class_= 'wikitable')

Define city parameters for each of the NW European cities that we are trying to compare

In [5]:
# import folium rendering library
import folium

In [None]:
from geopy.geocoders import Nominatim

for city in cities:
    address = '{}, {}'.format(cities[city]['city'],cities[city]['country'])
    geolocator = Nominatim(user_agent="explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geographical coordinates of {} are {}, {}.'.format(cities[city]['city'], latitude, longitude))
    # create map using the latitude and logitude values

In [76]:
for city in cities:
    print('{}, {}'.format(cities[city]['city'],cities[city]['country']))
    

Helsinki, Finland
Oslo, Norway
Copenhagen, Denmark
Amsterdam, Netherlands
Stockholm, Sweden


Create a function that calls the Foursquare API to generate a list of venues situated within a 3000m radius of each city CBD.

In [None]:
https://jupyterlab-33.labs.cognitiveclass.ai/user/edenpaki/lab#Call-the-function-and-create-a-venues-list-for-each-city

<h3>Call the function and create a venues list for each city</h3>

In [None]:
cities = {
    1 : {
        "country" : "Finland",
        "city" : "Helsinki",
        "latitude" : 60.1675,
        "longitude" : 24.9311
    },
    2 : {
        "country" : "Norway",
        "city" : "Oslo",
        "latitude" : 60.472,
        "longitude" : 8.4689
    },
    3 : {
        "country" : "Denmark",
        "city" : "Copenhagen",
        "latitude" : 55.6333,
        "longitude" : 12.5782
    },
    4 : {
        "country" : "Netherlands",
        "city" : "Amsterdam",
        "latitude" : 52.3383,
        "longitude" : 4.8729
    },
    5 : {
        "country" : "Sweden",
        "city" : "Stockholm",
        "latitude" : 59.3293,
        "longitude" : 18.0686
    }
}

In [None]:
map_city = folium.Map(location=[latitude, longitude], zoom_start=14)
map_city

In [None]:
# Import folium rendering library
import folium

# Create map using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# Add markers to map
for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto