# Travel map: Scraping FCO advice

### Import modules

In [96]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

### Get main page and country pages

In [97]:
soup = BeautifulSoup(requests.get('https://www.gov.uk/foreign-travel-advice').text, 'lxml')

In [99]:
countries = [{'name': a.text, 'url': 'https://www.gov.uk' + a.get('href')} for a in 
            soup.findAll('a', {'class':'countries-list__link'})]

In [100]:
len(countries)

226

In [101]:
countries[0]

{'name': 'Afghanistan',
 'url': 'https://www.gov.uk/foreign-travel-advice/afghanistan'}

In [102]:
for country in countries:
    country['entry_requirements'] = country['url'] + '/entry-requirements'

## Get travel corridor list

In [111]:
soup = BeautifulSoup(requests.get('https://www.gov.uk/guidance/coronavirus-covid-19-travel-corridors').text, 'lxml')

corridors = soup.find('h2', {'id': 'countries-and-territories-with-no-self-isolation-requirement-on-arrival-in-england'}).find_next_sibling('ul')

In [112]:
for_checking = []
for country in corridors.findAll('li'):
    country_raw = country.text.split('(')[0].strip()
    for_checking.append(country_raw)
    print(country_raw)
    if len(country.findAll('a')) > 0:
        url = country.find('a').get('href')
        for c in countries:
            if 'url' not in c.keys():
                continue
            if c['url'] == url:
                c['travel_corridor'] = True
    else:
        d = {}
        d['name'] = country_raw
        d['travel_corridor'] = True
        countries.append(d)

Akrotiri and Dhekelia
Andorra
Anguilla
Antigua and Barbuda
Aruba
Australia
Austria
The Bahamas
Barbados
Belgium
Bermuda
Bonaire, St Eustatius and Saba
British Antarctic Territory
British Indian Ocean Territory
British Virgin Islands
Cayman Islands
the Channel Islands
Croatia
Curaçao
Cyprus
Czech Republic
Denmark
Dominica
Estonia
Falkland Islands
Faroe Islands
Fiji
Finland
France
French Polynesia
Gibraltar
Germany
Greece
Greenland
Grenada
Guadeloupe
Hong Kong
Hungary
Iceland
Ireland
the Isle of Man
Italy
Jamaica
Japan
Latvia
Liechtenstein
Lithuania
Macao
Malta
Mauritius
Monaco
Montserrat
the Netherlands
New Caledonia
New Zealand
Norway
Pitcairn, Henderson, Ducie and Oeno Islands
Poland
Reunion
San Marino
Seychelles
Slovakia
Slovenia
South Korea
South Georgia and the South Sandwich Islands
St Barthélemy
St Helena, Ascension and Tristan da Cunha
St Kitts and Nevis
St Lucia
St Pierre and Miquelon
St Vincent and the Grenadines
Switzerland
Taiwan
Trinidad and Tobago
Turkey
Turks and Caicos I

## Check for quarantine on arrival

In [106]:
for country in countries:
    if 'entry_requirements' not in country.keys():
        continue
    soup = BeautifulSoup(requests.get(country['entry_requirements']).text, 'lxml')
    quaran = soup.find('h3', {'id':'quarantine-requirements'})
    try:
        country['quarantine'] = quaran.find_next_sibling('p').text
    except:
        country ['quarantine'] = np.nan

In [114]:
df = pd.DataFrame(countries)
df.travel_corridor = df.travel_corridor.fillna(False)

In [115]:
df.head()

Unnamed: 0,name,url,entry_requirements,quarantine,travel_corridor
0,Afghanistan,https://www.gov.uk/foreign-travel-advice/afgha...,https://www.gov.uk/foreign-travel-advice/afgha...,There are currently no compulsory quarantine r...,False
1,Albania,https://www.gov.uk/foreign-travel-advice/albania,https://www.gov.uk/foreign-travel-advice/alban...,,False
2,Algeria,https://www.gov.uk/foreign-travel-advice/algeria,https://www.gov.uk/foreign-travel-advice/alger...,Current quarantine requirements are a compulso...,False
3,Andorra,https://www.gov.uk/foreign-travel-advice/andorra,https://www.gov.uk/foreign-travel-advice/andor...,,True
4,Angola,https://www.gov.uk/foreign-travel-advice/angola,https://www.gov.uk/foreign-travel-advice/angol...,All air passengers arriving in Angola will nee...,False


In [116]:
len(for_checking)

78

In [110]:
df.to_csv('FCO_scrape.csv')

In [83]:
len(df.dropna(subset=['quarantine']))

105

In [109]:
df.travel_corridor.sum()

75