# Clustering Destinations
## Applied Data Science course - Capstone project

### Installing necessary libraries

In [26]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

import html


import matplotlib.cm as cm
import matplotlib.colors as colors

In [27]:
!pip install foursquare
import foursquare

Collecting foursquare
  Downloading https://files.pythonhosted.org/packages/16/c7/d51ecf7e06a75741a61ff752e5e010db8794ec0af01da98f42db7ab64ffe/foursquare-1%212020.1.30-py3-none-any.whl
Installing collected packages: foursquare
Successfully installed foursquare-1!2020.1.30


In [5]:
!pip install geocoder
import geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 6.9MB/s ta 0:00:011
[?25hCollecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [6]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

### Parsing city data from wikipedia

In [15]:
WIKI_URL = 'https://en.wikipedia.org/wiki/List_of_cities_by_international_visitors'

cities = pd.DataFrame(columns=['City', 'Country'])

soup = BeautifulSoup(requests.get(WIKI_URL).text, 'html.parser')
for row in soup.select_one('table.wikitable tbody').find_all('tr'):
    cols = row.find_all('td')
    if len(cols) is 0:
        continue
    cities = cities.append({'City': cols[2].get_text().strip(), 'Country': cols[3].get_text().strip()}, ignore_index=True)



In [20]:
cities.shape

(139, 2)

### Gathering location data for the cities

In [8]:
MAX_RETRIES = 10
def get_lat_lon(city, country):
    
    retries = MAX_RETRIES
    location = None
    while retries > 0 and location is None:
        location = geocoder.osm('{}, {}'.format(city, country)).latlng
        retries -= 1
        
    if location is None:        
        return { 'Latitude': None, 'Longitude': None }

    return { 'Latitude': location[0], 'Longitude': location[1] }



In [21]:
lat_lon_df = pd.DataFrame(columns=['Latitude', 'Longitude'])
for i, row in cities.iterrows():
    lat_lon_df = lat_lon_df.append(get_lat_lon(row['City'], row['Country']), ignore_index=True)
cities_with_location = pd.concat([cities, lat_lon_df], axis=1, sort=False)

In [29]:
cities_with_location.shape

(139, 4)

In [28]:
cities_with_location.head()

Unnamed: 0,City,Country,Latitude,Longitude
0,Hong Kong,Hong Kong,22.279328,114.162813
1,Bangkok,Thailand,13.754253,100.493087
2,London,United Kingdom,51.507322,-0.127647
3,Macau,Macau,-5.113366,-36.634996
4,Singapore,Singapore,1.357107,103.819499


In [24]:
cities_with_location.dropna(inplace=True)

In [30]:
cities_with_location.shape

(139, 4)