# RSS: Department of State, Travel Advisory Board

## I. Acquisition

In [200]:
import pandas as pd
import requests
import feedparser as fp

In [201]:
url = 'https://travel.state.gov/_res/rss/TAsTWs.xml'
dos_ta = fp.parse(url)
type(dos_ta) #'fp.parse' returns a dictionary

feedparser.FeedParserDict

In [202]:
#We use '.keys()' to find out the dict keys.
dos_ta.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

In [203]:
dos_ta_df = pd.DataFrame(dos_ta.entries)

### Saving the data into a local file 

In [204]:
dos_ta_df.to_json('./data/dos_ta_raw.json')

## II. Wrangling 

In [205]:
dos_score = pd.read_json('./data/dos_ta_raw.json')
dos_score.head()

Unnamed: 0,dc_identifier,guidislink,id,link,links,published,published_parsed,summary,summary_detail,tags,title,title_detail
0,"BT,advisory",False,http://travel.state.gov/content/travel/en/trav...,http://travel.state.gov/content/travel/en/trav...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 25 Jan 2019","[2019, 1, 25, 0, 0, 0, 4, 25, 0]",<p>Exercise normal precautions in Bhutan.&nbsp...,"{'type': 'text/html', 'language': None, 'base'...",[{'term': 'Level 1: Exercise Normal Precaution...,Bhutan - Level 1: Exercise Normal Precautions,"{'type': 'text/plain', 'language': None, 'base..."
1,"SG,advisory",False,http://travel.state.gov/content/travel/en/trav...,http://travel.state.gov/content/travel/en/trav...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 25 Apr 2019","[2019, 4, 25, 0, 0, 0, 3, 115, 0]",<p>Exercise normal precautions in Senegal. Som...,"{'type': 'text/html', 'language': None, 'base'...",[{'term': 'Level 1: Exercise Normal Precaution...,Senegal - Level 1: Exercise Normal Precautions,"{'type': 'text/plain', 'language': None, 'base..."
10,"RW,advisory",False,http://travel.state.gov/content/travel/en/trav...,http://travel.state.gov/content/travel/en/trav...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 11 Dec 2019","[2019, 12, 11, 0, 0, 0, 2, 345, 0]",<p>Exercise normal precautions in Rwanda. Some...,"{'type': 'text/html', 'language': None, 'base'...",[{'term': 'Level 1: Exercise Normal Precaution...,Rwanda - Level 1: Exercise Normal Precautions,"{'type': 'text/plain', 'language': None, 'base..."
100,"TP,advisory",False,http://travel.state.gov/content/travel/en/trav...,http://travel.state.gov/content/travel/en/trav...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",<p>Exercise normal precautions in Sao Tome and...,"{'type': 'text/html', 'language': None, 'base'...",[{'term': 'Level 1: Exercise Normal Precaution...,Sao Tome and Principe - Level 1: Exercise Norm...,"{'type': 'text/plain', 'language': None, 'base..."
101,"AY,advisory",False,http://travel.state.gov/content/travel/en/trav...,http://travel.state.gov/content/travel/en/trav...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",<p>Exercise increased caution in Antarctica du...,"{'type': 'text/html', 'language': None, 'base'...",[{'term': 'Level 2: Exercise Increased Caution...,Antarctica - Level 2: Exercise Increased Caution,"{'type': 'text/plain', 'language': None, 'base..."


### Dropping features

In [206]:
column_to_drop = ['dc_identifier','guidislink','id','link','summary','summary_detail','title_detail']
dos_score = dos_score.drop(column_to_drop, axis=1)

In [207]:
dos_score.head()

Unnamed: 0,links,published,published_parsed,tags,title
0,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 25 Jan 2019","[2019, 1, 25, 0, 0, 0, 4, 25, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Bhutan - Level 1: Exercise Normal Precautions
1,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 25 Apr 2019","[2019, 4, 25, 0, 0, 0, 3, 115, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Senegal - Level 1: Exercise Normal Precautions
10,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 11 Dec 2019","[2019, 12, 11, 0, 0, 0, 2, 345, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Rwanda - Level 1: Exercise Normal Precautions
100,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Sao Tome and Principe - Level 1: Exercise Norm...
101,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 2: Exercise Increased Caution...,Antarctica - Level 2: Exercise Increased Caution


### Cleaning the data (Source)

In [208]:
dos_score.links[0][0]['href']

'http://travel.state.gov/content/travel/en/traveladvisories/traveladvisories/bhutan-travel-advisory.html'

In [209]:
dos_score['Source'] = [x[0]['href'] for x in dos_score.links]
dos_score.head()

Unnamed: 0,links,published,published_parsed,tags,title,Source
0,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 25 Jan 2019","[2019, 1, 25, 0, 0, 0, 4, 25, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Bhutan - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...
1,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 25 Apr 2019","[2019, 4, 25, 0, 0, 0, 3, 115, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Senegal - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...
10,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 11 Dec 2019","[2019, 12, 11, 0, 0, 0, 2, 345, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Rwanda - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...
100,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Sao Tome and Principe - Level 1: Exercise Norm...,http://travel.state.gov/content/travel/en/trav...
101,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 2: Exercise Increased Caution...,Antarctica - Level 2: Exercise Increased Caution,http://travel.state.gov/content/travel/en/trav...


### Cleaning the data (ISO)

In [210]:
dos_score.tags[0][1]['term']

'BT'

In [211]:
dos_score['ISO'] = [x[1]['term'] for x in dos_score.tags]
dos_score.head()

Unnamed: 0,links,published,published_parsed,tags,title,Source,ISO
0,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 25 Jan 2019","[2019, 1, 25, 0, 0, 0, 4, 25, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Bhutan - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,BT
1,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 25 Apr 2019","[2019, 4, 25, 0, 0, 0, 3, 115, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Senegal - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,SG
10,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 11 Dec 2019","[2019, 12, 11, 0, 0, 0, 2, 345, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Rwanda - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,RW
100,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Sao Tome and Principe - Level 1: Exercise Norm...,http://travel.state.gov/content/travel/en/trav...,TP
101,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 2: Exercise Increased Caution...,Antarctica - Level 2: Exercise Increased Caution,http://travel.state.gov/content/travel/en/trav...,AY


### Cleaning the data (Name)

In [212]:
import re

In [213]:
name = re.match(r'^.*?(?= -)', dos_score.title[0])
name.group()

'Bhutan'

In [214]:
def only_name(line):
    name = re.search(r'^.*?(?= -)', line)
    if name :
      return(name.group())

In [215]:
dos_score['Name'] = dos_score['title'].apply(only_name)
dos_score.head()

Unnamed: 0,links,published,published_parsed,tags,title,Source,ISO,Name
0,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 25 Jan 2019","[2019, 1, 25, 0, 0, 0, 4, 25, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Bhutan - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,BT,Bhutan
1,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 25 Apr 2019","[2019, 4, 25, 0, 0, 0, 3, 115, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Senegal - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,SG,Senegal
10,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 11 Dec 2019","[2019, 12, 11, 0, 0, 0, 2, 345, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Rwanda - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,RW,Rwanda
100,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Sao Tome and Principe - Level 1: Exercise Norm...,http://travel.state.gov/content/travel/en/trav...,TP,Sao Tome and Principe
101,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 2: Exercise Increased Caution...,Antarctica - Level 2: Exercise Increased Caution,http://travel.state.gov/content/travel/en/trav...,AY,Antarctica


### Cleaning the data (Last Update)

In [216]:
dos_score.published_parsed[0][:3]

[2019, 1, 25]

### Cleaning the data (Score)

In [217]:
score = re.search(r'\d(?=:)',dos_score.title[0])
score.group()

'1'

In [218]:
def only_score(line):
    score = re.search(r'\d(?=:)', line)
    if score :
      return(score.group())

In [219]:
dos_score['Score'] = dos_score['title'].apply(only_score)
dos_score.head()

Unnamed: 0,links,published,published_parsed,tags,title,Source,ISO,Name,Score
0,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 25 Jan 2019","[2019, 1, 25, 0, 0, 0, 4, 25, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Bhutan - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,BT,Bhutan,1
1,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 25 Apr 2019","[2019, 4, 25, 0, 0, 0, 3, 115, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Senegal - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,SG,Senegal,1
10,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 11 Dec 2019","[2019, 12, 11, 0, 0, 0, 2, 345, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Rwanda - Level 1: Exercise Normal Precautions,http://travel.state.gov/content/travel/en/trav...,RW,Rwanda,1
100,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 1: Exercise Normal Precaution...,Sao Tome and Principe - Level 1: Exercise Norm...,http://travel.state.gov/content/travel/en/trav...,TP,Sao Tome and Principe,1
101,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 10 Jan 2019","[2019, 1, 10, 0, 0, 0, 3, 10, 0]",[{'term': 'Level 2: Exercise Increased Caution...,Antarctica - Level 2: Exercise Increased Caution,http://travel.state.gov/content/travel/en/trav...,AY,Antarctica,2


### Cleaning the data (Status)

In [239]:
dos_score.tags[0][0]['term']

'Level 1: Exercise Normal Precautions'

In [238]:
dos_ = [x[0]['term'] for x in dos_score.tags]


['Level 1: Exercise Normal Precautions',
 'Level 1: Exercise Normal Precautions',
 'Level 1: Exercise Normal Precautions',
 'Level 1: Exercise Normal Precautions',
 'Level 2: Exercise Increased Caution',
 'Level 3: Reconsider Travel',
 'Level 2: Exercise Increased Caution',
 'Level 2: Exercise Increased Caution',
 'Level 1: Exercise Normal Precautions',
 'Level 2: Exercise Increased Caution',
 'Caution',
 'Level 1: Exercise Normal Precautions',
 'Level 1: Exercise Normal Precautions',
 'Level 2: Exercise Increased Caution',
 'Level 2: Exercise Increased Caution',
 'Level 1: Exercise Normal Precautions',
 'Level 1: Exercise Normal Precautions',
 'Level 2: Exercise Increased Caution',
 'Level 2: Exercise Increased Caution',
 'Level 1: Exercise Normal Precautions',
 'Level 3: Reconsider Travel',
 'Level 1: Exercise Normal Precautions',
 'Level 2: Exercise Increased Caution',
 'Level 2: Exercise Increased Caution',
 'Level 1: Exercise Normal Precautions',
 'Level 2: Exercise Increased Caut

In [220]:
#Status
status = re.search(r'(?<=: ).*?$',dos_score.tags[0][0]['term'])
status.group()

'Exercise Normal Precautions'

In [222]:
def only_status(line):
    status = re.search(r'(?<=: ).*?$', line)
    if status :
      return(status.group())

TypeError: list indices must be integers or slices, not str