In [1]:
#load libraries
import pandas as pd
import os as os
import numpy as np
import re
import ipywidgets as widgets
from ipywidgets import interactive
pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)

## Crawled database from https://www.henleypassportindex.com

In [2]:
# scrapping script here: https://github.com/jeremielamboley/world-passport-power-rank-enriched/blob/master/henley_passport_index_data_collection.ipynb
df_visa_free = pd.read_csv('https://raw.githubusercontent.com/jeremielamboley/henley/master/henley_visa_free_11_01_2019.csv', encoding = "ISO-8859-1")

In [3]:
df_visa_free.rename(columns={'passport': 'passportfrom_alpha_2code'}, inplace=True)
df_visa_free.rename(columns={'to': 'countryto_alpha_2code'}, inplace=True)

In [4]:
df_visa_free.shape

(45173, 3)

In [5]:
df_visa_free['passportfrom_alpha_2code'].nunique()

198

In [6]:
df_visa_free['countryto_alpha_2code'].nunique()

226

In [7]:
df_visa_free.head(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree
0,AM,AF,0
1,AM,AL,1
2,AM,DZ,0
3,AM,AS,0
4,AM,AD,0


## List of world countries with ISO codes
### Source Wikipedia https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
### Project by Radcliff https://gist.github.com/radcliff/f09c0f88344a7fcef373

In [8]:
df_iso = pd.read_csv('https://gist.githubusercontent.com/radcliff/f09c0f88344a7fcef373/raw/2753c482ad091c54b1822288ad2e4811c021d8ec/wikipedia-iso-country-codes.csv', encoding = "utf8")

In [9]:
df_iso.rename(columns={'English short name lower case': 'Country name'}, inplace=True)
df_iso.rename(columns={'Alpha-2 code': 'country_alpha_2code'}, inplace=True)
df_iso.rename(columns={'Alpha-3 code': 'country_alpha_3code'}, inplace=True)
df_iso.rename(columns={'Numeric code': 'country_numeric_code'}, inplace=True)
df_iso = df_iso.drop(['ISO 3166-2'], axis=1)

In [10]:
df_iso.shape

(246, 4)

In [11]:
df_iso.sample(5)

Unnamed: 0,Country name,country_alpha_2code,country_alpha_3code,country_numeric_code
208,Suriname,SR,SUR,740
16,Bahamas,BS,BHS,44
193,Saudi Arabia,SA,SAU,682
20,Belarus,BY,BLR,112
9,Antigua and Barbuda,AG,ATG,28


## GDP per countries in USD, 2016, from the United Nations Statistics Division
### Source: http://data.un.org/

In [12]:
df_GDP_UN = pd.read_csv('http://data.un.org/_Docs/SYB/CSV/SYB61_T13_GDP%20and%20GDP%20Per%20Capita.csv', 
                        encoding = "ISO-8859-1", 
                        delimiter = ',',
                        header=1)

In [13]:
# filter the data to GDP in 2016
df_GDP_UN = df_GDP_UN[df_GDP_UN['Series'] == 'GDP in current prices (millions of US dollars)']
df_GDP_UN = df_GDP_UN[df_GDP_UN.Year == 2016]

In [14]:
# Organize the data
df_GDP_UN['Value'] = df_GDP_UN['Value'].str.replace(',', '')
df_GDP_UN['Value'] = df_GDP_UN['Value'].astype(int)
# Renaming columns
df_GDP_UN.rename(columns={'Region/Country/Area': 'country_numeric_code'}, inplace=True)
df_GDP_UN.rename(columns={'Value': 'UN_GDP_currentPrices2016'}, inplace=True)
# Convert GDP from millions to billions
df_GDP_UN['UN_GDP_currentPrices2016_billions'] = df_GDP_UN['UN_GDP_currentPrices2016'] / 1000

In [15]:
df_GDP_UN.shape

(242, 8)

In [16]:
df_GDP_UN.sample(5)

Unnamed: 0,country_numeric_code,Unnamed: 1,Year,Series,UN_GDP_currentPrices2016,Footnotes,Source,UN_GDP_currentPrices2016_billions
3117,328,Guyana,2016,GDP in current prices (millions of US dollars),3437,,"United Nations Statistics Division, New York, ...",3.437
814,61,Polynesia,2016,GDP in current prices (millions of US dollars),6969,,"United Nations Statistics Division, New York, ...",6.969
702,9,Oceania,2016,GDP in current prices (millions of US dollars),1535831,,"United Nations Statistics Division, New York, ...",1535.831
3641,414,Kuwait,2016,GDP in current prices (millions of US dollars),110346,,"United Nations Statistics Division, New York, ...",110.346
3801,430,Liberia,2016,GDP in current prices (millions of US dollars),2757,,"United Nations Statistics Division, New York, ...",2.757


In [17]:
df_GDP_UN = df_GDP_UN[['country_numeric_code','UN_GDP_currentPrices2016_billions']]

### World total GDP for 2016 to get a % of coverage per passport

In [18]:
# country code = 1 for the world aggregate
df_GDP_UN['UN_World_GDP_percent'] = df_GDP_UN['UN_GDP_currentPrices2016_billions'] / df_GDP_UN[df_GDP_UN['country_numeric_code'] == 1].iloc[0,1]

In [19]:
df_GDP_UN.sample(5)

Unnamed: 0,country_numeric_code,UN_GDP_currentPrices2016_billions,UN_World_GDP_percent
5449,690,1.434,1.8956e-05
1526,72,15.566,0.0002057665
4217,583,0.33,4.36226e-06
4077,470,10.999,0.0001453954
4325,500,0.062,8.19576e-07


# Population and surface, 2015, from the United Nations Statistics Division

### Source: http://data.un.org/

In [20]:
df_population_surface_UN = pd.read_csv('http://data.un.org/_Docs/SYB/CSV/SYB61_T02_Population,%20Surface%20Area%20and%20Density.csv', 
                        encoding = "ISO-8859-1", 
                        delimiter = ',',
                        header=1)

In [21]:
# filter the data to GDP in 2015
df_population_surface_UN = df_population_surface_UN[(df_population_surface_UN['Series'] == 'Population mid-year estimates (millions)') | (df_population_surface_UN['Series'] == 'Surface area (thousand km2)')]
df_population_surface_UN = df_population_surface_UN[df_population_surface_UN['Year'] == 2015]

In [22]:
# Organize the data
df_population_surface_UN['Value'] = df_population_surface_UN['Value'].str.replace(',', '')
df_population_surface_UN['Value'] = df_population_surface_UN['Value'].astype(float)

In [23]:
df_population_surface_UN = df_population_surface_UN[['Region/Country/Area','Series','Value']]
df_population_surface_UN.rename(columns={'Region/Country/Area': 'country_numeric_code'}, inplace=True)

In [24]:
# pivot and un-pivot
df_population_surface_UN = df_population_surface_UN.pivot(index='country_numeric_code', columns='Series', values='Value')
df_population_surface_UN = pd.DataFrame(df_population_surface_UN.to_records())

In [25]:
# get the world total for population and surface
df_population_UN_world = df_population_surface_UN[(df_population_surface_UN.country_numeric_code == 1)].iloc[0,1]
df_surface_UN_world = df_population_surface_UN[(df_population_surface_UN.country_numeric_code == 1)].iloc[0,2]

In [26]:
# World Population mid-year estimates 7383 millions, 2015
# Source: United Nations Statistics Division, New York, World Population Prospects: The 2017 Revision, last accessed June 2017.
df_population_surface_UN['UN_population_Value_2015_percent'] = (df_population_surface_UN['Population mid-year estimates (millions)'] / df_population_UN_world)

In [27]:
df_population_surface_UN['Surface_area_convered_percent'] = (df_population_surface_UN['Surface area (thousand km2)'] / df_surface_UN_world)

In [28]:
df_population_surface_UN.sample(5)

Unnamed: 0,country_numeric_code,Population mid-year estimates (millions),Surface area (thousand km2),UN_population_Value_2015_percent,Surface_area_convered_percent
249,840,319.93,9834.0,0.043333,0.072223
215,703,5.44,49.0,0.000737,0.00036
100,268,3.95,70.0,0.000535,0.000514
108,304,0.06,2166.0,8e-06,0.015908
195,630,3.67,9.0,0.000497,6.6e-05


# Unesco World Heritage list 

### https://whc.unesco.org/en/syndication

In [29]:
# Fetch the data from the Unesco website
unesco = pd.read_excel('https://whc.unesco.org/en/list/xls/whc-sites-2018.xls', encoding="Windows-1252",header=0)
unesco = unesco.reindex(['name_en', 'iso_code'], axis="columns")

In [30]:
# check the shape
unesco.shape

(1092, 2)

In [31]:
# Some values are comma separated and need to be reformated
unesco.tail(5)

Unnamed: 0,name_en,iso_code
1087,Prehistoric Rock Art Sites in the Côa Valley a...,"pt,es"
1088,Uvs Nuur Basin,"ru,mn"
1089,Landscapes of Dauria,"ru,mn"
1090,Heritage of Mercury. Almadén and Idrija,"si,es"
1091,Mosi-oa-Tunya / Victoria Falls,"zm,zw"


In [32]:
# Split the values
unesco = pd.concat([unesco['name_en'], unesco['iso_code'].str.split(',', expand=True)], axis=1)

# Now we need to the countries in the first column
unesco2 = []
for i in range(0,11):
    temp = unesco.loc[:,['name_en',i]]
    temp.columns = ['name_en','countryto_alpha_2code']
    unesco2.append(temp)
unesco2 = pd.concat(unesco2, ignore_index=True)

# Remove empty values
unesco2 = unesco2.replace(to_replace='None', value=np.nan).dropna()
# preview
unesco2

Unnamed: 0,name_en,countryto_alpha_2code
0,Cultural Landscape and Archaeological Remains ...,af
1,Minaret and Archaeological Remains of Jam,af
2,Historic Centres of Berat and Gjirokastra,al
3,Butrint,al
4,Al Qal'a of Beni Hammad,dz
5,Tassili n'Ajjer,dz
6,M'Zab Valley,dz
7,Djémila,dz
8,Tipasa,dz
9,Timgad,dz


In [33]:
# create a pivot table
unesco_pivot = pd.pivot_table(unesco2,index=['countryto_alpha_2code'], aggfunc='count')
unesco_pivot = pd.DataFrame(unesco_pivot.to_records())
unesco_pivot = unesco_pivot.rename(columns = {'name_en' :'unesco_total_sites'})
unesco_pivot = unesco_pivot.sort_values('unesco_total_sites',ascending=False)

In [34]:
# Count the total of world unesco sites
unesco_total_sites = unesco_pivot['unesco_total_sites'].sum()
# create the % of sites located in each country
unesco_pivot['unesco_total_sites_percent'] = (unesco_pivot['unesco_total_sites'] / unesco_total_sites)

In [35]:
# change to uppercase
unesco_pivot['countryto_alpha_2code'] = unesco_pivot['countryto_alpha_2code'].str.upper()

In [36]:
unesco_pivot.sample(5)

Unnamed: 0,countryto_alpha_2code,unesco_total_sites,unesco_total_sites_percent
86,LB,5,0.004288
149,TJ,2,0.001715
54,GA,1,0.000858
41,DM,1,0.000858
61,GT,3,0.002573


## Intersect df_visa_free and df_iso on 'country to'

In [37]:
# copy df_visa_free, rename the columns from centadata and hauntedhouse
df_visa_free2 = df_visa_free.copy()

In [38]:
df_visa_free2.rename(columns={'countryto_alpha_2code': 'country_alpha_2code'}, inplace=True)

In [39]:
# Intersect on 'countryto_alpha_2code'
df = pd.merge(df_visa_free2, df_iso, on=['country_alpha_2code'], how='left')

In [40]:
df.rename(columns={'country_alpha_2code': 'countryto_alpha_2code'}, inplace=True)
df.rename(columns={'Country name': 'countryto_Country name'}, inplace=True)
df.rename(columns={'country_alpha_3code': 'countryto_country_alpha_3code'}, inplace=True)
df.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [41]:
df['passportfrom_alpha_2code'].nunique()

198

In [42]:
df['countryto_alpha_2code'].nunique()

226

In [43]:
df.sample(10)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code
31247,MK,TV,1,Tuvalu,TUV,798.0
34410,GR,MZ,1,Mozambique,MOZ,508.0
41683,CF,NP,1,Nepal,NPL,524.0
2058,BW,FW,0,,,
30248,KG,QA,0,Qatar,QAT,634.0
30599,GR,EE,1,Estonia,EST,233.0
24642,IE,CK,1,Cook Islands,COK,184.0
7186,SE,KM,1,Comoros,COM,174.0
7056,KN,AR,1,Argentina,ARG,32.0
22349,LR,TD,0,Chad,TCD,148.0


## Intersect df_visa_free and df_iso on 'passport'

In [44]:
df_iso.rename(columns={'country_alpha_2code': 'passportfrom_alpha_2code'}, inplace=True)

In [45]:
# Intersect on 'passportfrom_alpha_2code'
df = pd.merge(df, df_iso, on=['passportfrom_alpha_2code'], how='left')

In [46]:
df_iso.rename(columns={'passportfrom_alpha_2code': 'country_alpha_2code'}, inplace=True)

In [47]:
df.rename(columns={'Country name': 'passportfrom_Country name'}, inplace=True)

In [48]:
df = df.drop(['country_alpha_3code'], axis=1)
df = df.drop(['country_numeric_code'], axis=1)

## Intersect df and df_GDP_UN

In [49]:
df_GDP_UN2 = df_GDP_UN.copy()
df_GDP_UN2.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [50]:
df2 = pd.merge(df, df_GDP_UN2, on=['countryto_country_numeric_code'], how='left')

In [51]:
df2.rename(columns={'UN_GDP_currentPrices2016_billions': 'countryto_UN_GDP_currentPrices2016'}, inplace=True)

In [52]:
df2.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent
27616,IE,XK,1,,,,Ireland,,
3027,VA,GU,0,Guam,GUM,316.0,Holy See (Vatican City State),,
39801,CR,MH,1,Marshall Islands,MHL,584.0,Costa Rica,0.183,2e-06
25793,LS,HK,1,Hong Kong,HKG,344.0,Lesotho,320.912,0.004242
14604,SC,IE,1,Ireland,IRL,372.0,Seychelles,304.819,0.004029


## Intersect df2 and df_population_UN

In [53]:
df_population_UN2 = df_population_surface_UN.copy()
df_population_UN2.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [54]:
df3 = pd.merge(df2, df_population_UN2, on=['countryto_country_numeric_code'], how='left')

In [55]:
#df3.rename(columns={'UN_population_Value_2015_millions': 'countryto_UN_population_Value_2015_millions'}, inplace=True)
df3.rename(columns={'Population mid-year estimates (millions)': 'countryto_UN_population_Value_2015_millions'}, inplace=True)
df3.rename(columns={'Surface area (thousand km2)': 'countryto_Surface area (thousand km2)'}, inplace=True)

In [56]:
df3.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,countryto_Surface area (thousand km2),UN_population_Value_2015_percent,Surface_area_convered_percent
25087,LA,FO,0,Faroe Islands,FRO,234.0,Lao People's Democratic Republic,,,0.05,1.0,7e-06,7e-06
33364,DO,AT,0,Austria,AUT,40.0,Dominican Republic,390.8,0.005166,8.68,84.0,0.001176,0.000617
7729,SE,GQ,0,Equatorial Guinea,GNQ,226.0,Sweden,10.678,0.000141,1.18,28.0,0.00016,0.000206
37508,IS,ZM,1,Zambia,ZMB,894.0,Iceland,21.063,0.000278,16.1,753.0,0.002181,0.00553
39280,GD,VE,1,"Venezuela, Bolivarian Republic of",VEN,862.0,Grenada,291.376,0.003852,31.16,912.0,0.004221,0.006698


## Intersect df3 and unesco_pivot

In [57]:
unesco_pivot2 = unesco_pivot.copy()

In [58]:
df4 = pd.merge(df3, unesco_pivot2, on=['countryto_alpha_2code'], how='left')

In [59]:
df4.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,countryto_Surface area (thousand km2),UN_population_Value_2015_percent,Surface_area_convered_percent,unesco_total_sites,unesco_total_sites_percent
20975,PE,SC,1,Seychelles,SYC,690.0,Peru,1.434,1.9e-05,0.09,0.0,1.2e-05,0.0,2.0,0.001715
40210,AT,PF,1,French Polynesia,PYF,258.0,Austria,5.418,7.2e-05,0.28,4.0,3.8e-05,2.9e-05,,
40385,CM,LT,0,Lithuania,LTU,440.0,Cameroon,42.773,0.000565,2.93,65.0,0.000397,0.000477,4.0,0.003431
12590,QA,CU,0,Cuba,CUB,192.0,Qatar,89.689,0.001186,11.46,110.0,0.001552,0.000808,9.0,0.007719
35006,FJ,DJ,1,Djibouti,DJI,262.0,Fiji,1.892,2.5e-05,0.93,23.0,0.000126,0.000169,,


## Reordering the columns

In [60]:
df4 = df4.reset_index(drop=True)
cols = df4.columns.tolist()
cols

['passportfrom_alpha_2code',
 'countryto_alpha_2code',
 'visafree',
 'countryto_Country name',
 'countryto_country_alpha_3code',
 'countryto_country_numeric_code',
 'passportfrom_Country name',
 'countryto_UN_GDP_currentPrices2016',
 'UN_World_GDP_percent',
 'countryto_UN_population_Value_2015_millions',
 'countryto_Surface area (thousand km2)',
 'UN_population_Value_2015_percent',
 'Surface_area_convered_percent',
 'unesco_total_sites',
 'unesco_total_sites_percent']

In [61]:
df4 = df4[['passportfrom_alpha_2code','passportfrom_Country name','countryto_alpha_2code','countryto_Country name','countryto_country_alpha_3code', 'countryto_country_numeric_code','countryto_UN_GDP_currentPrices2016','UN_World_GDP_percent', 'countryto_UN_population_Value_2015_millions','UN_population_Value_2015_percent','countryto_Surface area (thousand km2)','Surface_area_convered_percent','unesco_total_sites','unesco_total_sites_percent','visafree']]

In [62]:
df4['passportfrom_alpha_2code'].nunique()

198

In [63]:
df4.head(5)

Unnamed: 0,passportfrom_alpha_2code,passportfrom_Country name,countryto_alpha_2code,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_area_convered_percent,unesco_total_sites,unesco_total_sites_percent,visafree
0,AM,Armenia,AF,Afghanistan,AFG,4.0,20.235,0.000267,33.74,0.00457,653.0,0.004796,2.0,0.001715,0
1,AM,Armenia,AL,Albania,ALB,8.0,11.864,0.000157,2.92,0.000396,29.0,0.000213,3.0,0.002573,1
2,AM,Armenia,DZ,Algeria,DZA,12.0,159.049,0.002102,39.87,0.0054,2382.0,0.017494,7.0,0.006003,0
3,AM,Armenia,AS,American Samoa,ASM,16.0,,,0.06,8e-06,0.0,0.0,,,0
4,AM,Armenia,AD,Andorra,AND,20.0,2.858,3.8e-05,0.08,1.1e-05,0.0,0.0,1.0,0.000858,0


## Pandas pivot_table

In [64]:
df_pivot = pd.pivot_table(df4[df4.visafree == 1],index='passportfrom_Country name',values=['countryto_UN_GDP_currentPrices2016','UN_World_GDP_percent','countryto_UN_population_Value_2015_millions','UN_population_Value_2015_percent','countryto_Surface area (thousand km2)','Surface_area_convered_percent','unesco_total_sites','unesco_total_sites_percent','visafree'], aggfunc=np.sum, margins=False,dropna=True)
df_pivot.sort_values('visafree', ascending=False)
df_pivot.head(5)

Unnamed: 0_level_0,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,unesco_total_sites,unesco_total_sites_percent,visafree
passportfrom_Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Afghanistan,0.038432,0.005828,0.048784,5233.0,440.912,360.17,39.0,0.033448,30
Albania,0.235712,0.304555,0.269138,32095.0,23039.214,1987.05,637.0,0.546312,115
Algeria,0.10854,0.038378,0.126856,14779.0,2903.233,936.58,157.0,0.134648,50
Andorra,0.541803,0.758468,0.424182,73773.0,57377.264,3131.74,915.0,0.784734,169
Angola,0.113872,0.04281,0.149718,15505.0,3238.54,1105.37,140.0,0.120069,49


## Turn the pivot table in a Data frame

In [65]:
df_pivot_flattened = pd.DataFrame(df_pivot.to_records())
df_pivot_flattened.sample(5)

Unnamed: 0,passportfrom_Country name,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,unesco_total_sites,unesco_total_sites_percent,visafree
32,Central African Republic,0.085736,0.03016,0.114044,11674.0,2281.565,841.99,125.0,0.107204,49
0,Afghanistan,0.038432,0.005828,0.048784,5233.0,440.912,360.17,39.0,0.033448,30
102,Luxembourg,0.583562,0.761553,0.440671,79459.0,57610.593,3253.48,937.0,0.803602,186
29,Cameroon,0.102261,0.028978,0.12457,13924.0,2192.162,919.7,107.0,0.091767,48
63,Germany,0.59488,0.719216,0.442758,81000.0,54407.864,3268.89,907.0,0.777873,188


In [66]:
df_pivot_flattened['Surface_Rank'] = df_pivot_flattened['countryto_Surface area (thousand km2)'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['GDP_Rank'] = df_pivot_flattened['countryto_UN_GDP_currentPrices2016'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['Population_Rank'] = df_pivot_flattened['countryto_UN_population_Value_2015_millions'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['visafree_countries_Rank'] = df_pivot_flattened['visafree'].rank(method='min', ascending=False).astype(int)

## Calculate the overall ranking

In [67]:
df_pivot_flattened['Overall_ranking_calculation'] = df_pivot_flattened[['visafree_countries_Rank','GDP_Rank','Surface_Rank','Population_Rank']].sum(axis=1)

In [68]:
df_pivot_flattened['New_Overall_ranking'] = df_pivot_flattened['Overall_ranking_calculation'].rank(method='min', ascending=True).astype(int)

## Create a new column Gain/loss in ranking

In [69]:
df_pivot_flattened['gain-loss'] = df_pivot_flattened['visafree_countries_Rank'] - df_pivot_flattened['New_Overall_ranking']

In [70]:
df_pivot_flattened.sample(5)

Unnamed: 0,passportfrom_Country name,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,unesco_total_sites,unesco_total_sites_percent,visafree,Surface_Rank,GDP_Rank,Population_Rank,visafree_countries_Rank,Overall_ranking_calculation,New_Overall_ranking,gain-loss
173,Tajikistan,0.248557,0.070175,0.17626,33844.0,5308.642,1301.33,199.0,0.170669,58,107,132,136,143,518,130,13
49,Dominican Republic,0.13033,0.147811,0.181231,17746.0,11181.771,1338.03,221.0,0.189537,65,139,105,131,130,505,124,6
150,Samoa,0.390116,0.351231,0.304291,53119.0,26570.222,2246.58,701.0,0.601201,128,75,82,87,78,322,83,-5
129,Niger,0.107636,0.036713,0.14046,14656.0,2777.265,1037.02,136.0,0.116638,54,164,173,167,156,660,167,-11
68,Guinea,0.118102,0.048297,0.16422,16081.0,3653.606,1212.44,149.0,0.127787,56,151,154,151,149,605,156,-7


## Reorder the columns

In [71]:
df_pivot_flattened = df_pivot_flattened[['passportfrom_Country name',
                                         'New_Overall_ranking',
                                         'gain-loss',
                                         'visafree',
                                         'visafree_countries_Rank',
                                         'countryto_UN_GDP_currentPrices2016',
                                         'GDP_Rank',
                                         'UN_World_GDP_percent',
                                         'countryto_UN_population_Value_2015_millions',
                                         'Population_Rank',
                                         'UN_population_Value_2015_percent',
                                         'countryto_Surface area (thousand km2)',
                                         'Surface_Rank',
                                         'Surface_area_convered_percent',
                                         'Overall_ranking_calculation']]

In [72]:
# Re-order and format the values
df_pivot_flattened.sort_values('New_Overall_ranking',ascending=True, inplace=True)
df_pivot_flattened = df_pivot_flattened.reset_index(drop=True)
df_pivot_flattened.style.format({
    'UN_World_GDP_percent': '{:.2%}'.format,
    'UN_population_Value_2015_percent': '{:.2%}'.format,
    'Surface_area_convered_percent': '{:.2%}'.format
    })

Unnamed: 0,passportfrom_Country name,New_Overall_ranking,gain-loss,visafree,visafree_countries_Rank,countryto_UN_GDP_currentPrices2016,GDP_Rank,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,Population_Rank,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_Rank,Surface_area_convered_percent,Overall_ranking_calculation
0,Singapore,1,1,189,2,68781.9,1,90.92%,4746.51,3,64.29%,90971,2,66.81%,8
1,Japan,2,-1,190,1,64790.1,4,85.65%,5923.63,1,80.23%,86657,4,63.64%,10
2,"Korea, Republic of",3,-1,189,2,60116.5,6,79.47%,4826.81,2,65.38%,100305,1,73.67%,11
3,Denmark,4,2,187,6,57567.6,9,76.10%,3341.93,17,45.27%,79750,9,58.57%,41
4,Finland,5,1,187,6,57636.0,7,76.19%,3342.14,16,45.27%,79455,13,58.35%,42
5,Sweden,6,0,187,6,57360.0,15,75.82%,3337.86,18,45.21%,79354,14,58.28%,53
6,Luxembourg,7,3,186,10,57610.6,8,76.16%,3253.48,26,44.07%,79459,12,58.36%,56
7,San Marino,8,38,168,46,68520.1,2,90.58%,4481.91,4,60.71%,82795,7,60.81%,59
8,Brunei Darussalam,9,39,165,48,65240.0,3,86.24%,4237.51,5,57.40%,84063,6,61.74%,62
9,Italy,10,-4,187,6,56015.6,28,74.05%,3288.12,22,44.54%,79491,11,58.38%,67


In [73]:
# Export to CSV
df_pivot_flattened.to_csv('ranking.csv')

## Biggest winners and losers

In [74]:
df_rankgainloss = df_pivot_flattened.sort_values('gain-loss', ascending=False, inplace=False)
df_rankgainloss.style.format({
    'UN_World_GDP_percent': '{:.2%}'.format,
    'UN_population_Value_2015_percent': '{:.2%}'.format,
    'Surface_area_convered_percent': '{:.2%}'.format
    })

Unnamed: 0,passportfrom_Country name,New_Overall_ranking,gain-loss,visafree,visafree_countries_Rank,countryto_UN_GDP_currentPrices2016,GDP_Rank,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,Population_Rank,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_Rank,Surface_area_convered_percent,Overall_ranking_calculation
138,Nepal,139,49,40,188,4964.39,133,6.56%,2448.87,73,33.17%,14385,166,10.56%,560
59,Ecuador,60,39,93,99,23156.8,90,30.61%,3379.38,15,45.77%,63280,45,46.47%,249
8,Brunei Darussalam,9,39,165,48,65240.0,3,86.24%,4237.51,5,57.40%,84063,6,61.74%,62
7,San Marino,8,38,168,46,68520.1,2,90.58%,4481.91,4,60.71%,82795,7,60.81%,59
76,Belarus,76,36,77,112,20372.7,98,26.93%,3191.17,31,43.22%,61245,51,44.98%,292
57,Bosnia and Herzegovina,58,33,118,91,35374.1,60,46.76%,3485.16,13,47.21%,59135,61,43.43%,225
129,Lao People's Democratic Republic,130,33,52,163,5365.78,131,7.09%,1485.53,116,20.12%,33337,108,24.48%,518
25,"Taiwan, Province of China",26,32,149,58,62452.6,5,82.56%,3868.61,8,52.40%,64458,44,47.34%,115
45,Serbia,46,30,129,76,41387.1,47,54.71%,3583.12,12,48.53%,67113,40,49.29%,175
71,Fiji,72,29,89,101,22117.1,94,29.24%,3272.0,23,44.32%,57109,67,41.94%,285


# A little program to access the results easily

In [75]:
# create a new list
countrylist = list(set(df_pivot_flattened['passportfrom_Country name'].drop_duplicates().values.tolist()))
countrylist.sort()

In [76]:
# Select the passport holder's country
countryselected = widgets.Dropdown(
    options=countrylist,
    value='Hong Kong',
    description="Country:",
    disabled=False,
)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        print ("Country selected: %s" % change['new'])

countryselected.observe(on_change)
display(countryselected)

Dropdown(description='Country:', index=74, options=('Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', …

In [77]:
# Selected country in the drop down menu
countryselectedvisafree = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['visafree']
countryselectedvisafree = countryselectedvisafree.to_frame()
countryselectedvisafree = countryselectedvisafree.iloc[0]['visafree']
# the overall country rank
countryrank = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['New_Overall_ranking']
countryrank = countryrank.to_frame()
countryrank = countryrank.iloc[0]['New_Overall_ranking']
# the gain-loss compare the to Henley Passport Index 
gainloss = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['gain-loss']
gainloss = gainloss.to_frame()
gainloss = gainloss.iloc[0]['gain-loss']
# the rank regarding the number of countries accessible visa-free
visafreecountriesRank = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['visafree_countries_Rank']
visafreecountriesRank = visafreecountriesRank.to_frame()
visafreecountriesRank = visafreecountriesRank.iloc[0]['visafree_countries_Rank']
# the rank regarding to the population accessible visa-free
countryselectedpopulation = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['UN_population_Value_2015_percent']
countryselectedpopulation = countryselectedpopulation.to_frame()
countryselectedpopulation = countryselectedpopulation.iloc[0]['UN_population_Value_2015_percent']
# the rank regarding to the globe surface accessible visa-free
countryselectedsurface = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['Surface_area_convered_percent']
countryselectedsurface = countryselectedsurface.to_frame()
countryselectedsurface = countryselectedsurface.iloc[0]['Surface_area_convered_percent']
countryselectedsurface = countryselectedsurface.item()
# the rank regarding to the world GDP accessible visa-free
countryselectedGDP = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['UN_World_GDP_percent']
countryselectedGDP = countryselectedGDP.to_frame()
countryselectedGDP = countryselectedGDP.iloc[0]['UN_World_GDP_percent']

In [78]:
# Run this query to get the statistics
def country_access(countryselected,countryselectedvisafree,countryrank,visafreecountriesRank,gainloss,countryselectedpopulation,countryselectedsurface,countryselectedGDP):
    print("With " + countryselected.value + "'s passport you have access to:")
    print(str(countryselectedvisafree) + " countries visa-free, ranking " + str(countryrank)  +  " over " + str(df_pivot_flattened['passportfrom_Country name'].nunique()) + " countries' passports according to our enriched ranking.")
    print("Ranked " + str(visafreecountriesRank) + " according to Henley Passport Index. " + str(np.where(gainloss < 0, "Rank loss: "+str(gainloss), "Rank gain: +" + str(gainloss))))
    print("\nIt gives you visa-free access to:")
    print("\t" + "{:.0%}".format(countryselectedpopulation) + " of the world's population")
    print("\t" + "{:.0%}".format(countryselectedsurface) + " of the world's surface")
    print("\t" + "{:.0%}".format(countryselectedGDP) + " of the world's GPD")
    
country_access(countryselected,countryselectedvisafree,countryrank,visafreecountriesRank,gainloss,countryselectedpopulation,countryselectedsurface,countryselectedGDP)

With Hong Kong's passport you have access to:
169 countries visa-free, ranking 37 over 197 countries' passports according to our enriched ranking.
Ranked 42 according to Henley Passport Index. Rank gain: +5

It gives you visa-free access to:
	40% of the world's population
	65% of the world's surface
	52% of the world's GPD
