In [1]:
#load libraries
import pandas as pd
import os as os
import numpy as np
import re
import ipywidgets as widgets
from ipywidgets import interactive
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

## Crawled database from https://www.henleypassportindex.com

In [2]:
# scrapping script here: https://github.com/jeremielamboley/world-passport-power-rank-enriched/blob/master/henley_passport_index_data_collection.ipynb
df_visa_free = pd.read_csv('https://raw.githubusercontent.com/jeremielamboley/henley/master/henley_visa_free_11_01_2019.csv', encoding = "ISO-8859-1")

In [3]:
df_visa_free.rename(columns={'passport': 'passportfrom_alpha_2code'}, inplace=True)
df_visa_free.rename(columns={'to': 'countryto_alpha_2code'}, inplace=True)

In [4]:
df_visa_free.shape

(45173, 3)

In [5]:
df_visa_free['passportfrom_alpha_2code'].nunique()

198

In [6]:
df_visa_free['countryto_alpha_2code'].nunique()

226

In [7]:
# Let's consider the passport holders countries as a country which gives visa-free access
df_visa_free.loc[df_visa_free['passportfrom_alpha_2code'] == df_visa_free['countryto_alpha_2code'], 'visafree'] = 1

In [8]:
# visafree matrix preview
df_visa_free.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree
25997,MA,ST,1
22901,IL,BB,1
9112,SK,AO,0
44856,BE,TZ,1
40578,DM,NC,0


In [9]:
# Nationals have visa free access to their own countries
df_visa_free[df_visa_free.passportfrom_alpha_2code == df_visa_free.countryto_alpha_2code].head()

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree
9,AM,AM,1
234,AG,AG,1
267,AO,AO,1
316,AR,AR,1
351,DZ,DZ,1


## List of world countries with ISO codes
### Source Wikipedia https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
### Project by Radcliff https://gist.github.com/radcliff/f09c0f88344a7fcef373

In [10]:
df_iso = pd.read_csv('https://gist.githubusercontent.com/radcliff/f09c0f88344a7fcef373/raw/2753c482ad091c54b1822288ad2e4811c021d8ec/wikipedia-iso-country-codes.csv', encoding = "utf8")

In [11]:
df_iso.rename(columns={'English short name lower case': 'Country name'}, inplace=True)
df_iso.rename(columns={'Alpha-2 code': 'country_alpha_2code'}, inplace=True)
df_iso.rename(columns={'Alpha-3 code': 'country_alpha_3code'}, inplace=True)
df_iso.rename(columns={'Numeric code': 'country_numeric_code'}, inplace=True)
df_iso = df_iso.drop(['ISO 3166-2'], axis=1)

In [12]:
df_iso.shape

(246, 4)

In [13]:
df_iso.sample(5)

Unnamed: 0,Country name,country_alpha_2code,country_alpha_3code,country_numeric_code
239,"Virgin Islands, British",VG,VGB,92
87,Guam,GU,GUM,316
185,Saint Kitts and Nevis,KN,KNA,659
184,"Saint Helena, Ascension and Tristan da Cunha",SH,SHN,654
215,Tajikistan,TJ,TJK,762


## GDP per countries in USD, 2016, from the United Nations Statistics Division
### Source: http://data.un.org/

In [14]:
df_GDP_UN = pd.read_csv('http://data.un.org/_Docs/SYB/CSV/SYB61_T13_GDP%20and%20GDP%20Per%20Capita.csv', 
                        encoding = "ISO-8859-1", 
                        delimiter = ',',
                        header=1)

In [15]:
# filter the data to GDP in 2016
df_GDP_UN = df_GDP_UN[df_GDP_UN['Series'] == 'GDP in current prices (millions of US dollars)']
df_GDP_UN = df_GDP_UN[df_GDP_UN.Year == 2016]

In [16]:
# Organize the data
df_GDP_UN['Value'] = df_GDP_UN['Value'].str.replace(',', '')
df_GDP_UN['Value'] = df_GDP_UN['Value'].astype(int)
# Renaming columns
df_GDP_UN.rename(columns={'Region/Country/Area': 'country_numeric_code'}, inplace=True)
df_GDP_UN.rename(columns={'Value': 'UN_GDP_currentPrices2016'}, inplace=True)
# Convert GDP from millions to billions
df_GDP_UN['UN_GDP_currentPrices2016_billions'] = df_GDP_UN['UN_GDP_currentPrices2016'] / 1000

In [17]:
df_GDP_UN.shape

(242, 8)

In [18]:
df_GDP_UN.sample(5)

Unnamed: 0,country_numeric_code,Unnamed: 1,Year,Series,UN_GDP_currentPrices2016,Footnotes,Source,UN_GDP_currentPrices2016_billions
5041,634,Qatar,2016,GDP in current prices (millions of US dollars),152452,,"United Nations Statistics Division, New York, ...",152.452
5600,90,Solomon Islands,2016,GDP in current prices (millions of US dollars),1134,,"United Nations Statistics Division, New York, ...",1.134
1834,136,Cayman Islands,2016,GDP in current prices (millions of US dollars),3844,,"United Nations Statistics Division, New York, ...",3.844
1198,44,Bahamas,2016,GDP in current prices (millions of US dollars),11262,,"United Nations Statistics Division, New York, ...",11.262
646,39,Southern Europe,2016,GDP in current prices (millions of US dollars),3693991,,"United Nations Statistics Division, New York, ...",3693.991


In [19]:
df_GDP_UN = df_GDP_UN[['country_numeric_code','UN_GDP_currentPrices2016_billions']]

### World total GDP for 2016 to get a % of coverage per passport

In [20]:
# country code = 1 for the world aggregate
df_GDP_UN['UN_World_GDP_percent'] = df_GDP_UN['UN_GDP_currentPrices2016_billions'] / df_GDP_UN[df_GDP_UN['country_numeric_code'] == 1].iloc[0,1]

In [21]:
df_GDP_UN.sample(5)

Unnamed: 0,country_numeric_code,UN_GDP_currentPrices2016_billions,UN_World_GDP_percent
6,1,75648.868,1.0
1501,70,16.91,0.000224
398,142,27538.651,0.364033
2676,231,70.315,0.000929
5477,694,3.675,4.9e-05


# Population and surface, 2015, from the United Nations Statistics Division

### Source: http://data.un.org/

In [22]:
df_population_surface_UN = pd.read_csv('http://data.un.org/_Docs/SYB/CSV/SYB61_T02_Population,%20Surface%20Area%20and%20Density.csv', 
                        encoding = "ISO-8859-1", 
                        delimiter = ',',
                        header=1)

In [23]:
# filter the data to GDP in 2015
df_population_surface_UN = df_population_surface_UN[(df_population_surface_UN['Series'] == 'Population mid-year estimates (millions)') | (df_population_surface_UN['Series'] == 'Surface area (thousand km2)')]
df_population_surface_UN = df_population_surface_UN[df_population_surface_UN['Year'] == 2015]

In [24]:
# Organize the data
df_population_surface_UN['Value'] = df_population_surface_UN['Value'].str.replace(',', '')
df_population_surface_UN['Value'] = df_population_surface_UN['Value'].astype(float)

In [25]:
df_population_surface_UN = df_population_surface_UN[['Region/Country/Area','Series','Value']]
df_population_surface_UN.rename(columns={'Region/Country/Area': 'country_numeric_code'}, inplace=True)

In [26]:
# pivot and un-pivot
df_population_surface_UN = df_population_surface_UN.pivot(index='country_numeric_code', columns='Series', values='Value')
df_population_surface_UN = pd.DataFrame(df_population_surface_UN.to_records())

In [27]:
# get the world total for population and surface
df_population_UN_world = df_population_surface_UN[(df_population_surface_UN.country_numeric_code == 1)].iloc[0,1]
df_surface_UN_world = df_population_surface_UN[(df_population_surface_UN.country_numeric_code == 1)].iloc[0,2]

In [28]:
# World Population mid-year estimates 7383 millions, 2015
# Source: United Nations Statistics Division, New York, World Population Prospects: The 2017 Revision, last accessed June 2017.
df_population_surface_UN['UN_population_Value_2015_percent'] = (df_population_surface_UN['Population mid-year estimates (millions)'] / df_population_UN_world)

In [29]:
df_population_surface_UN['Surface_area_convered_percent'] = (df_population_surface_UN['Surface area (thousand km2)'] / df_surface_UN_world)

In [30]:
df_population_surface_UN.sample(5)

Unnamed: 0,country_numeric_code,Population mid-year estimates (millions),Surface area (thousand km2),UN_population_Value_2015_percent,Surface_area_convered_percent
90,233,1.32,45.0,0.000179,0.00033
78,196,1.16,9.0,0.000157,6.6e-05
12,17,153.74,6613.0,0.020823,0.048567
110,312,0.45,2.0,6.1e-05,1.5e-05
55,124,35.95,9985.0,0.004869,0.073332


# Unesco World Heritage list 

### https://whc.unesco.org/en/syndication

In [31]:
# Fetch the data from the Unesco website
unesco = pd.read_excel('https://whc.unesco.org/en/list/xls/whc-sites-2018.xls', encoding="Windows-1252",header=0)
unesco = unesco.reindex(['name_en', 'iso_code'], axis="columns")

In [32]:
# check the shape
unesco.shape

(1092, 2)

In [33]:
# Some values are comma separated and need to be reformated
unesco.tail(5)

Unnamed: 0,name_en,iso_code
1087,Prehistoric Rock Art Sites in the Côa Valley a...,"pt,es"
1088,Uvs Nuur Basin,"ru,mn"
1089,Landscapes of Dauria,"ru,mn"
1090,Heritage of Mercury. Almadén and Idrija,"si,es"
1091,Mosi-oa-Tunya / Victoria Falls,"zm,zw"


In [34]:
# Split the values
unesco = pd.concat([unesco['name_en'], unesco['iso_code'].str.split(',', expand=True)], axis=1)

unesco2 = []
for i in range(0,11):
    temp = unesco.loc[:,['name_en',i]]
    temp.columns = ['name_en','countryto_alpha_2code']
    unesco2.append(temp)
unesco2 = pd.concat(unesco2, ignore_index=True)

# Remove empty values
unesco2 = unesco2.replace(to_replace='None', value=np.nan).dropna().reset_index(drop=True)
# preview
unesco2.sample(5)

Unnamed: 0,name_en,countryto_alpha_2code
61,Flemish Béguinages,be
245,Salonga National Park,cd
816,Saloum Delta,sn
966,L'viv – the Ensemble of the Historic Centre,ua
44,Historic Centre of the City of Salzburg,at


In [35]:
# create a pivot table
unesco_pivot = pd.pivot_table(unesco2,index=['countryto_alpha_2code'], aggfunc='count')
unesco_pivot = pd.DataFrame(unesco_pivot.to_records())
unesco_pivot = unesco_pivot.rename(columns = {'name_en' :'unesco_total_sites'})
unesco_pivot = unesco_pivot.sort_values('unesco_total_sites',ascending=False)

In [36]:
# Count the total of world unesco sites
unesco_total_sites = unesco_pivot['unesco_total_sites'].sum()
# create the % of sites located in each country
unesco_pivot['unesco_total_sites_percent'] = (unesco_pivot['unesco_total_sites'] / unesco_total_sites)

In [37]:
# change to uppercase
unesco_pivot['countryto_alpha_2code'] = unesco_pivot['countryto_alpha_2code'].str.upper()

In [38]:
unesco_pivot.sample(5)

Unnamed: 0,countryto_alpha_2code,unesco_total_sites,unesco_total_sites_percent
73,IT,54,0.046312
33,CO,9,0.007719
42,DO,1,0.000858
134,SB,1,0.000858
75,JO,5,0.004288


## Intersect df_visa_free and df_iso on 'country to'

In [39]:
# copy df_visa_free, rename the columns from centadata and hauntedhouse
df_visa_free2 = df_visa_free.copy()

In [40]:
df_visa_free2.rename(columns={'countryto_alpha_2code': 'country_alpha_2code'}, inplace=True)

In [41]:
# Intersect on 'countryto_alpha_2code'
df = pd.merge(df_visa_free2, df_iso, on=['country_alpha_2code'], how='left')

In [42]:
df.rename(columns={'country_alpha_2code': 'countryto_alpha_2code'}, inplace=True)
df.rename(columns={'Country name': 'countryto_Country_name'}, inplace=True)
df.rename(columns={'country_alpha_3code': 'countryto_country_alpha_3code'}, inplace=True)
df.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [43]:
df['passportfrom_alpha_2code'].nunique()

198

In [44]:
df['countryto_alpha_2code'].nunique()

226

In [45]:
df.sample(10)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country_name,countryto_country_alpha_3code,countryto_country_numeric_code
41048,CM,FM,1,"Micronesia, Federated States of",FSM,583.0
37685,SV,JM,1,Jamaica,JAM,388.0
28112,MK,NP,1,Nepal,NPL,524.0
2080,BG,JM,1,Jamaica,JAM,388.0
38557,CL,ID,1,Indonesia,IDN,360.0
5234,BA,TT,1,Trinidad and Tobago,TTO,780.0
42668,BS,MR,1,Mauritania,MRT,478.0
27294,MO,MC,1,Monaco,MCO,492.0
18666,SO,TW,0,"Taiwan, Province of China",TWN,158.0
32762,HK,LI,1,Liechtenstein,LIE,438.0


## Intersect df_visa_free and df_iso on 'passport'

In [46]:
df_iso.rename(columns={'country_alpha_2code': 'passportfrom_alpha_2code'}, inplace=True)

In [47]:
# Intersect on 'passportfrom_alpha_2code'
df = pd.merge(df, df_iso, on=['passportfrom_alpha_2code'], how='left')

In [48]:
df_iso.rename(columns={'passportfrom_alpha_2code': 'country_alpha_2code'}, inplace=True)

In [49]:
df.rename(columns={'Country name': 'passportfrom_Country name'}, inplace=True)

In [50]:
df = df.drop(['country_alpha_3code'], axis=1)
df = df.drop(['country_numeric_code'], axis=1)

## Intersect df and df_GDP_UN

In [51]:
df_GDP_UN2 = df_GDP_UN.copy()
df_GDP_UN2.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [52]:
df2 = pd.merge(df, df_GDP_UN2, on=['countryto_country_numeric_code'], how='left')

In [53]:
df2.rename(columns={'UN_GDP_currentPrices2016_billions': 'countryto_UN_GDP_currentPrices2016'}, inplace=True)

In [54]:
df2.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country_name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent
11352,TO,NG,0,Nigeria,NGA,566.0,Tonga,404.649,0.005349
41826,CR,RW,1,Rwanda,RWA,646.0,Costa Rica,8.474,0.000112
44381,BO,SE,0,Sweden,SWE,752.0,"Bolivia, Plurinational State of",514.476,0.006801
20021,MA,DJ,1,Djibouti,DJI,262.0,Morocco,1.892,2.5e-05
8897,TM,MX,0,Mexico,MEX,484.0,Turkmenistan,1076.914,0.014236


## Intersect df2 and df_population_UN

In [55]:
df_population_UN2 = df_population_surface_UN.copy()
df_population_UN2.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [56]:
df3 = pd.merge(df2, df_population_UN2, on=['countryto_country_numeric_code'], how='left')

In [57]:
#df3.rename(columns={'UN_population_Value_2015_millions': 'countryto_UN_population_Value_2015_millions'}, inplace=True)
df3.rename(columns={'Population mid-year estimates (millions)': 'countryto_UN_population_Value_2015_millions'}, inplace=True)
df3.rename(columns={'Surface area (thousand km2)': 'countryto_Surface area (thousand km2)'}, inplace=True)

In [58]:
df3.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country_name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,countryto_Surface area (thousand km2),UN_population_Value_2015_percent,Surface_area_convered_percent
13220,PK,AL,0,Albania,ALB,8.0,Pakistan,11.864,0.000157,2.92,29.0,0.000396,0.000213
44619,BJ,TM,0,Turkmenistan,TKM,795.0,Benin,36.18,0.000478,5.57,488.0,0.000754,0.003584
25306,LS,GE,0,Georgia,GEO,268.0,Lesotho,14.333,0.000189,3.95,70.0,0.000535,0.000514
25696,MD,SN,0,Senegal,SEN,686.0,"Moldova, Republic of",14.605,0.000193,14.98,197.0,0.002029,0.001447
44790,BB,TJ,0,Tajikistan,TJK,762.0,Barbados,6.952,9.2e-05,8.55,143.0,0.001158,0.00105


## Intersect df3 and unesco_pivot

In [59]:
unesco_pivot2 = unesco_pivot.copy()

In [60]:
df4 = pd.merge(df3, unesco_pivot2, on=['countryto_alpha_2code'], how='left')

In [61]:
df4.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country_name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,countryto_Surface area (thousand km2),UN_population_Value_2015_percent,Surface_area_convered_percent,unesco_total_sites,unesco_total_sites_percent
20328,ME,CR,1,Costa Rica,CRI,188.0,Montenegro,57.436,0.000759,4.81,51.0,0.000651,0.000375,4.0,0.003431
5762,BN,AE,1,United Arab Emirates,ARE,784.0,Brunei Darussalam,348.744,0.00461,9.15,84.0,0.001239,0.000617,1.0,0.000858
17617,MZ,AD,0,Andorra,AND,20.0,Mozambique,2.858,3.8e-05,0.08,0.0,1.1e-05,0.0,1.0,0.000858
17034,RU,,1,Namibia,NAM,516.0,Russian Federation,10.947,0.000145,2.43,824.0,0.000329,0.006052,,
38864,BH,CD,0,"Congo, the Democratic Republic of the",COD,180.0,Bahrain,40.337,0.000533,76.2,2345.0,0.010321,0.017222,5.0,0.004288


## Reordering the columns

In [62]:
df4 = df4.reset_index(drop=True)
cols = df4.columns.tolist()
cols

['passportfrom_alpha_2code',
 'countryto_alpha_2code',
 'visafree',
 'countryto_Country_name',
 'countryto_country_alpha_3code',
 'countryto_country_numeric_code',
 'passportfrom_Country name',
 'countryto_UN_GDP_currentPrices2016',
 'UN_World_GDP_percent',
 'countryto_UN_population_Value_2015_millions',
 'countryto_Surface area (thousand km2)',
 'UN_population_Value_2015_percent',
 'Surface_area_convered_percent',
 'unesco_total_sites',
 'unesco_total_sites_percent']

In [63]:
df4 = df4[['passportfrom_alpha_2code','passportfrom_Country name','countryto_alpha_2code','countryto_Country_name','countryto_country_alpha_3code', 'countryto_country_numeric_code','countryto_UN_GDP_currentPrices2016','UN_World_GDP_percent', 'countryto_UN_population_Value_2015_millions','UN_population_Value_2015_percent','countryto_Surface area (thousand km2)','Surface_area_convered_percent','unesco_total_sites','unesco_total_sites_percent','visafree']]

In [64]:
df4['passportfrom_alpha_2code'].nunique()

198

In [65]:
df4.head(5)

Unnamed: 0,passportfrom_alpha_2code,passportfrom_Country name,countryto_alpha_2code,countryto_Country_name,countryto_country_alpha_3code,countryto_country_numeric_code,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_area_convered_percent,unesco_total_sites,unesco_total_sites_percent,visafree
0,AM,Armenia,AF,Afghanistan,AFG,4.0,20.235,0.000267,33.74,0.00457,653.0,0.004796,2.0,0.001715,0
1,AM,Armenia,AL,Albania,ALB,8.0,11.864,0.000157,2.92,0.000396,29.0,0.000213,3.0,0.002573,1
2,AM,Armenia,DZ,Algeria,DZA,12.0,159.049,0.002102,39.87,0.0054,2382.0,0.017494,7.0,0.006003,0
3,AM,Armenia,AS,American Samoa,ASM,16.0,,,0.06,8e-06,0.0,0.0,,,0
4,AM,Armenia,AD,Andorra,AND,20.0,2.858,3.8e-05,0.08,1.1e-05,0.0,0.0,1.0,0.000858,0


## Pandas pivot_table

In [66]:
df_pivot = pd.pivot_table(df4[df4.visafree == 1],index='passportfrom_Country name',values=['countryto_UN_GDP_currentPrices2016','UN_World_GDP_percent','countryto_UN_population_Value_2015_millions','UN_population_Value_2015_percent','countryto_Surface area (thousand km2)','Surface_area_convered_percent','unesco_total_sites','unesco_total_sites_percent','visafree'], aggfunc=np.sum, margins=False,dropna=True)
df_pivot.sort_values('visafree', ascending=False)
df_pivot.head(5)

Unnamed: 0_level_0,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,unesco_total_sites,unesco_total_sites_percent,visafree
passportfrom_Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Afghanistan,0.043228,0.006096,0.053354,5886.0,461.147,393.91,41.0,0.035163,31
Albania,0.235925,0.304711,0.269534,32124.0,23051.078,1989.97,640.0,0.548885,116
Algeria,0.126034,0.04048,0.132256,17161.0,3062.282,976.45,164.0,0.140652,51
Andorra,0.541803,0.758506,0.424193,73773.0,57380.122,3131.82,916.0,0.785592,170
Angola,0.12303,0.044224,0.153492,16752.0,3345.458,1133.23,141.0,0.120926,50


## Turn the pivot table in a Data frame

In [67]:
df_pivot_flattened = pd.DataFrame(df_pivot.to_records())
df_pivot_flattened.sample(5)

Unnamed: 0,passportfrom_Country name,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,unesco_total_sites,unesco_total_sites_percent,visafree
149,Saint Vincent and the Grenadines,0.288348,0.358265,0.300035,39262.0,27102.313,2215.16,731.0,0.62693,145
166,Sudan,0.069704,0.019848,0.089168,9491.0,1501.443,658.33,100.0,0.085763,40
188,United States,0.512889,0.718008,0.388461,69836.0,54316.455,2868.01,880.0,0.754717,186
3,Andorra,0.541803,0.758506,0.424193,73773.0,57380.122,3131.82,916.0,0.785592,170
105,Madagascar,0.100021,0.048124,0.154079,13619.0,3640.514,1137.57,140.0,0.120069,54


In [68]:
df_pivot_flattened['Surface_Rank'] = df_pivot_flattened['countryto_Surface area (thousand km2)'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['GDP_Rank'] = df_pivot_flattened['countryto_UN_GDP_currentPrices2016'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['Population_Rank'] = df_pivot_flattened['countryto_UN_population_Value_2015_millions'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['visafree_countries_Rank'] = df_pivot_flattened['visafree'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['unesco_total_sites_Rank'] = df_pivot_flattened['unesco_total_sites'].rank(method='min', ascending=False).astype(int)

## Calculate the overall ranking

In [69]:
df_pivot_flattened['Overall_ranking_calculation'] = df_pivot_flattened[['visafree_countries_Rank','GDP_Rank','Surface_Rank','Population_Rank','unesco_total_sites_Rank']].sum(axis=1)

In [70]:
df_pivot_flattened['New_Overall_ranking'] = df_pivot_flattened['Overall_ranking_calculation'].rank(method='min', ascending=True).astype(int)

## Create a new column Gain/loss in ranking

In [71]:
df_pivot_flattened['gain-loss'] = df_pivot_flattened['visafree_countries_Rank'] - df_pivot_flattened['New_Overall_ranking']

In [72]:
df_pivot_flattened.sample(5)

Unnamed: 0,passportfrom_Country name,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,unesco_total_sites,unesco_total_sites_percent,visafree,Surface_Rank,GDP_Rank,Population_Rank,visafree_countries_Rank,unesco_total_sites_Rank,Overall_ranking_calculation,New_Overall_ranking,gain-loss
34,Chile,0.635016,0.760999,0.449879,86465.0,57568.736,3321.46,932.0,0.799314,176,5,17,23,36,14,95,15,21
80,Iraq,0.049588,0.012063,0.04493,6752.0,912.556,331.72,60.0,0.051458,31,195,193,197,196,194,975,196,0
6,Argentina,0.511721,0.491998,0.406577,69677.0,37219.055,3001.76,905.0,0.776158,171,37,55,50,41,33,216,44,-3
49,Dominican Republic,0.13069,0.148758,0.182657,17795.0,11253.355,1348.56,222.0,0.190395,66,144,107,133,130,122,636,126,4
59,France,0.589313,0.765937,0.457618,80242.0,57942.279,3378.6,951.0,0.815609,189,9,7,16,4,5,41,4,0


## Reorder the columns

In [73]:
df_pivot_flattened = df_pivot_flattened[['passportfrom_Country name',
                                         'New_Overall_ranking',
                                         'gain-loss',
                                         'visafree',
                                         'visafree_countries_Rank',
                                         'countryto_UN_GDP_currentPrices2016',
                                         'GDP_Rank',
                                         'UN_World_GDP_percent',
                                         'countryto_UN_population_Value_2015_millions',
                                         'Population_Rank',
                                         'UN_population_Value_2015_percent',
                                         'countryto_Surface area (thousand km2)',
                                         'Surface_Rank',
                                         'Surface_area_convered_percent',
                                         'unesco_total_sites',
                                         'unesco_total_sites_percent',
                                         'unesco_total_sites_Rank',
                                         'Overall_ranking_calculation']]

## New Ranking 

In [74]:
# Re-order and format the values
df_pivot_flattened.sort_values('New_Overall_ranking',ascending=True, inplace=True)
df_pivot_flattened = df_pivot_flattened.reset_index(drop=True)
df_pivot_flattened.style.format({
    'UN_World_GDP_percent': '{:.2%}'.format,
    'UN_population_Value_2015_percent': '{:.2%}'.format,
    'Surface_area_convered_percent': '{:.2%}'.format,
    'unesco_total_sites_percent': '{:.2%}'.format
    })

Unnamed: 0,passportfrom_Country name,New_Overall_ranking,gain-loss,visafree,visafree_countries_Rank,countryto_UN_GDP_currentPrices2016,GDP_Rank,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,Population_Rank,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_Rank,Surface_area_convered_percent,unesco_total_sites,unesco_total_sites_percent,unesco_total_sites_Rank,Overall_ranking_calculation
0,Japan,1,0,191,1,69726.3,1,92.17%,5923.63,1,80.23%,86657,4,63.64%,1024,87.82%,1,8
1,Singapore,2,0,190,2,69078.8,2,91.32%,4752.05,3,64.36%,90972,2,66.81%,1009,86.54%,3,12
2,"Korea, Republic of",3,-1,190,2,61527.8,6,81.33%,4877.4,2,66.06%,100405,1,73.74%,1017,87.22%,2,13
3,France,4,0,189,4,57942.3,7,76.59%,3378.6,16,45.76%,80242,9,58.93%,951,81.56%,5,41
4,Germany,5,-1,189,4,57885.7,8,76.52%,3350.6,17,45.38%,81357,8,59.75%,951,81.56%,5,42
5,Denmark,6,0,188,6,57874.5,9,76.50%,3347.62,18,45.34%,79793,10,58.60%,946,81.13%,7,50
6,Finland,6,0,188,6,57874.5,9,76.50%,3347.62,18,45.34%,79793,10,58.60%,946,81.13%,7,50
7,Sweden,6,0,188,6,57874.5,9,76.50%,3347.62,18,45.34%,79793,10,58.60%,946,81.13%,7,50
8,Italy,6,0,188,6,57874.5,9,76.50%,3347.62,18,45.34%,79793,10,58.60%,946,81.13%,7,50
9,San Marino,10,36,169,46,68521.7,3,90.58%,4481.94,4,60.71%,82795,7,60.81%,959,82.25%,4,64


In [75]:
# Export to CSV
df_pivot_flattened.to_csv('ranking.csv')

## Biggest winners and losers

In [76]:
df_rankgainloss = df_pivot_flattened[['passportfrom_Country name','gain-loss']].sort_values('gain-loss', ascending=False, inplace=False)
df_rankgainloss.style.format({
    'UN_World_GDP_percent': '{:.2%}'.format,
    'UN_population_Value_2015_percent': '{:.2%}'.format,
    'Surface_area_convered_percent': '{:.2%}'.format,
    'unesco_total_sites_percent': '{:.2%}'.format
    })

Unnamed: 0,passportfrom_Country name,gain-loss
146,Nepal,41
9,San Marino,36
77,Belarus,34
58,Bosnia and Herzegovina,32
16,Brunei Darussalam,31
132,Lao People's Democratic Republic,30
69,Ecuador,29
49,Serbia,27
111,Mongolia,26
75,Fiji,25


# A little program to access the results easily

In [77]:
# create a new list
countrylist = list(set(df_pivot_flattened['passportfrom_Country name'].drop_duplicates().values.tolist()))
countrylist.sort()

In [78]:
# Select the passport holder's country with drop drop menu
countryselected = widgets.Dropdown(
    options=countrylist,
    value='Hong Kong',
    description="Country:",
    disabled=False)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        print ("Country selected: %s" % change['new'])

countryselected.observe(on_change)
display(countryselected)

Dropdown(description='Country:', index=74, options=('Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', …

In [79]:
# the number of destinations accessible visa-free
countryselectedvisafree = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['visafree']
countryselectedvisafree = countryselectedvisafree.to_frame()
countryselectedvisafree = countryselectedvisafree.iloc[0]['visafree']
# the overall country rank
countryrank = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['New_Overall_ranking']
countryrank = countryrank.to_frame()
countryrank = countryrank.iloc[0]['New_Overall_ranking']
# the gain-loss compare the to Henley Passport Index 
gainloss = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['gain-loss']
gainloss = gainloss.to_frame()
gainloss = gainloss.iloc[0]['gain-loss']
# the rank regarding the number of countries accessible visa-free
visafreecountriesRank = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['visafree_countries_Rank']
visafreecountriesRank = visafreecountriesRank.to_frame()
visafreecountriesRank = visafreecountriesRank.iloc[0]['visafree_countries_Rank']
# the rank regarding to the population accessible visa-free
countryselectedpopulation = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['UN_population_Value_2015_percent']
countryselectedpopulation = countryselectedpopulation.to_frame()
countryselectedpopulation = countryselectedpopulation.iloc[0]['UN_population_Value_2015_percent']
# the rank regarding to the globe surface accessible visa-free
countryselectedsurface = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['Surface_area_convered_percent']
countryselectedsurface = countryselectedsurface.to_frame()
countryselectedsurface = countryselectedsurface.iloc[0]['Surface_area_convered_percent']
countryselectedsurface = countryselectedsurface.item()
# the rank regarding to the world GDP accessible visa-free
countryselectedGDP = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['UN_World_GDP_percent']
countryselectedGDP = countryselectedGDP.to_frame()
countryselectedGDP = countryselectedGDP.iloc[0]['UN_World_GDP_percent']
# the rank regarding to the number of unesco sites accessible visa-free
countryselectedUnesco = df_pivot_flattened[df_pivot_flattened['passportfrom_Country name'] == countryselected.value]['unesco_total_sites_percent']
countryselectedUnesco = countryselectedUnesco.to_frame()
countryselectedUnesco = countryselectedUnesco.iloc[0]['unesco_total_sites_percent']

# Run this query to get the statistics
def country_access(countryselected,countryselectedvisafree,countryrank,visafreecountriesRank,gainloss,countryselectedpopulation,countryselectedsurface,countryselectedGDP,countryselectedUnesco):
    print(countryselected.value + "'s passport gives you access to:")
    print("\n" + str(countryselectedvisafree) + " destinations visa-free (over " + str(df_visa_free['countryto_alpha_2code'].nunique()) + " possible), covering:")
    print("\t" + "{:.0%}".format(countryselectedpopulation) + " of the world's population")
    print("\t" + "{:.0%}".format(countryselectedsurface) + " of the world's surface")
    print("\t" + "{:.0%}".format(countryselectedGDP) + " of the world's GPD")
    print("\t" + "{:.0%}".format(countryselectedUnesco) + " of the world's Unesco World heritage sites")
    print("\nIt ranks " + str(countryrank)  +  " over " + str(df_pivot_flattened['passportfrom_Country name'].nunique()) + " countries' passports according to these 5 criteria.")
    print("It's a " + str(np.where(gainloss < 0, "rank loss of "+str(gainloss), "rank gain of +" + str(gainloss))) + " compare to the traditional Henley Passport Index (ranked " + str(visafreecountriesRank) + ").")
country_access(countryselected,countryselectedvisafree,countryrank,visafreecountriesRank,gainloss,countryselectedpopulation,countryselectedsurface,countryselectedGDP,countryselectedUnesco)

Hong Kong's passport gives you access to:

170 destinations visa-free (over 226 possible), covering:
	40% of the world's population
	65% of the world's surface
	53% of the world's GPD
	79% of the world's Unesco World heritage sites

It ranks 34 over 197 countries' passports according to these 5 criteria.
It's a rank gain of +8 compare to the traditional Henley Passport Index (ranked 42).
