# API: Travel Advisory Data

### I. Acquisition

In [1]:
#1 - I first import the necessary libraries:

from datetime import date
import requests
import json
import pandas as pd
import re

In [2]:
#2 - I then submit a request to my chosen API. I apply the '.json()' method afterwards.

ta_url = 'https://www.travel-advisory.info/api'
ta_request = requests.get(ta_url)
ta_json = ta_request.json()
ta_json

{'api_status': {'request': {'item': 'not specified'},
  'reply': {'cache': 'cached',
   'code': 200,
   'status': 'ok',
   'note': 'The api works, we could fetch countries.',
   'count': 238}},
 'data': {'AD': {'iso_alpha2': 'AD',
   'name': 'Andorra',
   'continent': 'EU',
   'advisory': {'score': 1.3,
    'sources_active': 3,
    'message': '',
    'updated': '2020-03-11 07:23:35',
    'source': 'https://www.travel-advisory.info/andorra'}},
  'AE': {'iso_alpha2': 'AE',
   'name': 'United Arab Emirates',
   'continent': 'AS',
   'advisory': {'score': 2.3,
    'sources_active': 6,
    'message': '',
    'updated': '2020-03-11 07:23:35',
    'source': 'https://www.travel-advisory.info/united-arab-emirates'}},
  'AF': {'iso_alpha2': 'AF',
   'name': 'Afghanistan',
   'continent': 'AS',
   'advisory': {'score': 5,
    'sources_active': 10,
    'message': '',
    'updated': '2020-03-11 07:23:35',
    'source': 'https://www.travel-advisory.info/afghanistan'}},
  'AG': {'iso_alpha2': 'AG',
 

In [3]:
#3 - The request seems to give me nested dictionaries as opposed to a JSON file. No problem, I can work with them.
type(ta_json)

dict

In [4]:
#4 - I locate the dictionary with the relevant data and print it.
ta_json['data']

{'AD': {'iso_alpha2': 'AD',
  'name': 'Andorra',
  'continent': 'EU',
  'advisory': {'score': 1.3,
   'sources_active': 3,
   'message': '',
   'updated': '2020-03-11 07:23:35',
   'source': 'https://www.travel-advisory.info/andorra'}},
 'AE': {'iso_alpha2': 'AE',
  'name': 'United Arab Emirates',
  'continent': 'AS',
  'advisory': {'score': 2.3,
   'sources_active': 6,
   'message': '',
   'updated': '2020-03-11 07:23:35',
   'source': 'https://www.travel-advisory.info/united-arab-emirates'}},
 'AF': {'iso_alpha2': 'AF',
  'name': 'Afghanistan',
  'continent': 'AS',
  'advisory': {'score': 5,
   'sources_active': 10,
   'message': '',
   'updated': '2020-03-11 07:23:35',
   'source': 'https://www.travel-advisory.info/afghanistan'}},
 'AG': {'iso_alpha2': 'AG',
  'name': 'Antigua and Barbuda',
  'continent': 'NA',
  'advisory': {'score': 1,
   'sources_active': 2,
   'message': '',
   'updated': '2020-03-11 07:23:35',
   'source': 'https://www.travel-advisory.info/antigua-and-barbuda'}

### II. Wrangling

In [5]:
#5 - I convert my dictionary into a data frame and export it as my 'raw' .csv file.

ta_df = pd.DataFrame(ta_json['data'])
ta_df.to_csv('/Users/alejandroarrya/Desktop/Ironhack/DAFTMX/web-project/Data/api_raw.csv')

In [6]:
#6 - I apply '.transpose()' for better readability. I also change the indeces in my data frame to 
#be numbers as opposed to the ISO country codes. This will be important later on when I merge data frames.

ta_df.columns = [x for x in range(len(ta_df.columns))]
ta_df = ta_df.transpose()
ta_df.head()

Unnamed: 0,advisory,continent,iso_alpha2,name
0,"{'score': 1.3, 'sources_active': 3, 'message':...",EU,AD,Andorra
1,"{'score': 2.3, 'sources_active': 6, 'message':...",AS,AE,United Arab Emirates
2,"{'score': 5, 'sources_active': 10, 'message': ...",AS,AF,Afghanistan
3,"{'score': 1, 'sources_active': 2, 'message': '...",,AG,Antigua and Barbuda
4,"{'score': 1, 'sources_active': 2, 'message': '...",,AI,Anguilla


In [7]:
#7 - It seems I have nested dictionaries within my advisory column. I expand them and create a new data frame

advisory_columns = [dictionary.keys() for dictionary in ta_df['advisory']][0]
advisory_indeces = [dictionary.values() for dictionary in ta_df['advisory']]
advisory_df = pd.DataFrame(advisory_indeces, columns=advisory_columns)
advisory_df.head()

Unnamed: 0,score,sources_active,message,updated,source
0,1.3,3,,2020-03-11 07:23:35,https://www.travel-advisory.info/andorra
1,2.3,6,,2020-03-11 07:23:35,https://www.travel-advisory.info/united-arab-e...
2,5.0,10,,2020-03-11 07:23:35,https://www.travel-advisory.info/afghanistan
3,1.0,2,,2020-03-11 07:23:35,https://www.travel-advisory.info/antigua-and-b...
4,1.0,2,,2020-03-11 07:23:35,https://www.travel-advisory.info/anguilla


In [8]:
#8 - I concatenate my two data frames and drop the unnecessary columns.

main_df = pd.concat([advisory_df, ta_df], axis=1)

columns_to_drop = ['advisory', 'message','sources_active','continent']
main_df = main_df.drop(columns_to_drop, axis=1)

main_df.head()

Unnamed: 0,score,updated,source,iso_alpha2,name
0,1.3,2020-03-11 07:23:35,https://www.travel-advisory.info/andorra,AD,Andorra
1,2.3,2020-03-11 07:23:35,https://www.travel-advisory.info/united-arab-e...,AE,United Arab Emirates
2,5.0,2020-03-11 07:23:35,https://www.travel-advisory.info/afghanistan,AF,Afghanistan
3,1.0,2020-03-11 07:23:35,https://www.travel-advisory.info/antigua-and-b...,AG,Antigua and Barbuda
4,1.0,2020-03-11 07:23:35,https://www.travel-advisory.info/anguilla,AI,Anguilla


In [9]:
#9 - I need to express each country's 'score' in a better, clearer way. I shall use bins. First, I need to 
#check what's the min and max for that column.

print(main_df['score'].max())
print(main_df['score'].min())

5.0
0.0


In [10]:
#10 - I will create a new column to bin the numeric scores.

safety_labels = ['Exercise Normal Precautions','Exercised Increased Caution','Reconsider Travel','Do Not Travel']

bins = pd.cut(main_df['score'],[0, 2.5, 3.5, 4.5, 5], labels=safety_labels, include_lowest=True)
main_df['status'] = bins
main_df.head()

Unnamed: 0,score,updated,source,iso_alpha2,name,status
0,1.3,2020-03-11 07:23:35,https://www.travel-advisory.info/andorra,AD,Andorra,Exercise Normal Precautions
1,2.3,2020-03-11 07:23:35,https://www.travel-advisory.info/united-arab-e...,AE,United Arab Emirates,Exercise Normal Precautions
2,5.0,2020-03-11 07:23:35,https://www.travel-advisory.info/afghanistan,AF,Afghanistan,Do Not Travel
3,1.0,2020-03-11 07:23:35,https://www.travel-advisory.info/antigua-and-b...,AG,Antigua and Barbuda,Exercise Normal Precautions
4,1.0,2020-03-11 07:23:35,https://www.travel-advisory.info/anguilla,AI,Anguilla,Exercise Normal Precautions


In [11]:
def date_transform(line):
    date_string = re.match(r'.{10}', line)
    date_string = date_string.group(0)
    return date(*map(int, date_string.split('-')))

In [12]:
main_df.updated = main_df.updated.apply(date_transform)

In [13]:
#11 - I will reorder the columns.

column_order = [
    'name',
    'iso_alpha2',
    'score',
    'status',
    'updated',
    'source'
]

main_df = main_df[column_order]

In [14]:
#12 - I will rename the columns

main_df['Name'] = main_df.name
main_df['ISO'] = main_df.iso_alpha2
main_df['Score'] = main_df.score
main_df['Status'] = main_df.status
main_df['Last Update'] = main_df.updated
main_df['Source'] = main_df.source

In [15]:
duplicate_columns = ['name','iso_alpha2','score','status','updated','source']
main_df = main_df.drop(duplicate_columns, axis=1)

In [16]:
#13 - I will sort the data alphabetically

main_df = main_df.sort_values('Name').reset_index(drop=True)
main_df.head()

Unnamed: 0,Name,ISO,Score,Status,Last Update,Source
0,Afghanistan,AF,5.0,Do Not Travel,2020-03-11,https://www.travel-advisory.info/afghanistan
1,Albania,AL,1.5,Exercise Normal Precautions,2020-03-11,https://www.travel-advisory.info/albania
2,Algeria,DZ,2.8,Exercised Increased Caution,2020-03-11,https://www.travel-advisory.info/algeria
3,American Samoa,AS,0.0,Exercise Normal Precautions,2020-03-11,https://www.travel-advisory.info/american-samoa
4,Andorra,AD,1.3,Exercise Normal Precautions,2020-03-11,https://www.travel-advisory.info/andorra


### III. Reporting

In [17]:
#14 - I will export my 'clean' data frame as a .csv file

main_df.to_csv('/Users/alejandroarrya/Desktop/Ironhack/DAPTMX/final_project/TASAM_final_project/data/ta_api.csv')