In [1]:
#load libraries
import pandas as pd
import os as os
import numpy as np
import re
pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)

## Crawled database from https://www.henleypassportindex.com

In [2]:
df_visa_free = pd.read_csv('henley_visa_free_11_01_2019.csv', encoding = "ISO-8859-1")

In [3]:
df_visa_free.rename(columns={'passport': 'passportfrom_alpha_2code'}, inplace=True)
df_visa_free.rename(columns={'to': 'countryto_alpha_2code'}, inplace=True)

In [4]:
df_visa_free.shape

(45173, 3)

In [5]:
df_visa_free['passportfrom_alpha_2code'].nunique()

198

In [6]:
df_visa_free['countryto_alpha_2code'].nunique()

226

In [7]:
df_visa_free.head(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree
0,AM,AF,0
1,AM,AL,1
2,AM,DZ,0
3,AM,AS,0
4,AM,AD,0


In [8]:
# check the number of visa-free countries for Japan
sum(df_visa_free[df_visa_free['passportfrom_alpha_2code']=='JP']['visafree'])

190

## List of world countries with ISO codes
### Source Wikipedia https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
### Project by Radcliff https://gist.github.com/radcliff/f09c0f88344a7fcef373

In [9]:
df_iso = pd.read_csv('https://gist.githubusercontent.com/radcliff/f09c0f88344a7fcef373/raw/2753c482ad091c54b1822288ad2e4811c021d8ec/wikipedia-iso-country-codes.csv', encoding = "ISO-8859-1")

In [10]:
df_iso.rename(columns={'English short name lower case': 'Country name'}, inplace=True)
df_iso.rename(columns={'Alpha-2 code': 'country_alpha_2code'}, inplace=True)
df_iso.rename(columns={'Alpha-3 code': 'country_alpha_3code'}, inplace=True)
df_iso.rename(columns={'Numeric code': 'country_numeric_code'}, inplace=True)
df_iso = df_iso.drop(['ISO 3166-2'], axis=1)

In [11]:
df_iso.shape

(246, 4)

In [12]:
df_iso.sample(5)

Unnamed: 0,Country name,country_alpha_2code,country_alpha_3code,country_numeric_code
72,Finland,FI,FIN,246
191,San Marino,SM,SMR,674
183,Saint BarthÃ©lemy,BL,BLM,652
186,Saint Lucia,LC,LCA,662
16,Bahamas,BS,BHS,44


## GDP per countries in USD, 2016, from the United Nations
### Source: http://data.un.org/

In [13]:
df_GDP_UN = pd.read_csv('http://data.un.org/_Docs/SYB/CSV/SYB61_T13_GDP%20and%20GDP%20Per%20Capita.csv', 
                        encoding = "ISO-8859-1", 
                        delimiter = ',',
                        header=1)

## Filter the dataframe

In [14]:
# filter the data to GDP in 2016
df_GDP_UN = df_GDP_UN[df_GDP_UN['Series'] == 'GDP in current prices (millions of US dollars)']
df_GDP_UN = df_GDP_UN[df_GDP_UN.Year == 2016]

In [15]:
# Organize the data
df_GDP_UN['Value'] = df_GDP_UN['Value'].str.replace(',', '')
df_GDP_UN['Value'] = df_GDP_UN['Value'].astype(int)
# Renaming columns
df_GDP_UN.rename(columns={'Region/Country/Area': 'country_numeric_code'}, inplace=True)
df_GDP_UN.rename(columns={'Value': 'UN_GDP_currentPrices2016'}, inplace=True)
# Convert GDP from millions to billions
df_GDP_UN['UN_GDP_currentPrices2016_billions'] = df_GDP_UN['UN_GDP_currentPrices2016'] / 1000

In [16]:
df_GDP_UN.shape

(242, 8)

In [17]:
df_GDP_UN

Unnamed: 0,country_numeric_code,Unnamed: 1,Year,Series,UN_GDP_currentPrices2016,Footnotes,Source,UN_GDP_currentPrices2016_billions
6,1,"Total, all countries or areas",2016,GDP in current prices (millions of US dollars),75648868,,"United Nations Statistics Division, New York, ...",75648.868
34,2,Africa,2016,GDP in current prices (millions of US dollars),2143440,,"United Nations Statistics Division, New York, ...",2143.44
62,15,Northern Africa,2016,GDP in current prices (millions of US dollars),700350,,"United Nations Statistics Division, New York, ...",700.35
90,202,Sub-Saharan Africa,2016,GDP in current prices (millions of US dollars),1443090,,"United Nations Statistics Division, New York, ...",1443.09
118,14,Eastern Africa,2016,GDP in current prices (millions of US dollars),320971,,"United Nations Statistics Division, New York, ...",320.971
146,17,Middle Africa,2016,GDP in current prices (millions of US dollars),225212,,"United Nations Statistics Division, New York, ...",225.212
174,18,Southern Africa,2016,GDP in current prices (millions of US dollars),328201,,"United Nations Statistics Division, New York, ...",328.201
202,11,Western Africa,2016,GDP in current prices (millions of US dollars),568706,,"United Nations Statistics Division, New York, ...",568.706
230,19,Americas,2016,GDP in current prices (millions of US dollars),25404270,,"United Nations Statistics Division, New York, ...",25404.27
258,21,Northern America,2016,GDP in current prices (millions of US dollars),20162646,,"United Nations Statistics Division, New York, ...",20162.646


In [18]:
df_GDP_UN = df_GDP_UN[['country_numeric_code','UN_GDP_currentPrices2016_billions']]

## Get the World total GDP for 2016

In [19]:
# country code = 1 for the world aggregate
df_GDP_UN['UN_World_GDP_percent'] = df_GDP_UN['UN_GDP_currentPrices2016_billions'] / df_GDP_UN[df_GDP_UN['country_numeric_code'] == 1].iloc[0,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
df_GDP_UN

Unnamed: 0,country_numeric_code,UN_GDP_currentPrices2016_billions,UN_World_GDP_percent
6,1,75648.868,1.0
34,2,2143.44,0.02833407
62,15,700.35,0.009257905
90,202,1443.09,0.01907616
118,14,320.971,0.004242906
146,17,225.212,0.00297707
174,18,328.201,0.004338479
202,11,568.706,0.007517707
230,19,25404.27,0.3358182
258,21,20162.646,0.2665294


## Population per countries in millions, 2015, from the United Nations
### Source: http://data.un.org/_Docs/SYB/CSV/SYB61_T02_Population,%20Surface%20Area%20and%20Density.csv

In [21]:
df_population_UN = pd.read_csv('UN_population_2015_millions.csv', encoding = "ISO-8859-1")

In [22]:
df_population_UN.rename(columns={'countryCode': 'country_numeric_code'}, inplace=True)
df_population_UN.rename(columns={'countryName': 'UN_population_countryName'}, inplace=True)

In [23]:
df_population_UN['UN_population_Value_2015_millions'] = df_population_UN['UN_population_Value_2015_millions'].str.replace(',', '')
df_population_UN['UN_population_Value_2015_millions'] = df_population_UN['UN_population_Value_2015_millions'].astype(float)

In [24]:
# World Population mid-year estimates 7383 millions, 2015
# Source: United Nations Population Division, New York, World Population Prospects: The 2017 Revision, last accessed June 2017.
df_population_UN['UN_population_Value_2015_percent'] = (df_population_UN['UN_population_Value_2015_millions'] / 7383)

In [25]:
df_population_UN.shape

(228, 4)

In [26]:
df_population_UN.sample(5)

Unnamed: 0,country_numeric_code,UN_population_countryName,UN_population_Value_2015_millions,UN_population_Value_2015_percent
1,8,Albania,2.92,0.000396
31,854,Burkina Faso,18.11,0.002453
102,376,Israel,8.06,0.001092
20,84,Belize,0.36,4.9e-05
121,454,Malawi,17.57,0.00238


## Surface per countries in thousand of km2, from the United Nations
### http://data.un.org/_Docs/SYB/CSV/SYB61_T02_Population,%20Surface%20Area%20and%20Density.csv

In [27]:
df_surface_UN = pd.read_csv('UN_surface_areas.csv', encoding = "ISO-8859-1")

In [28]:
df_surface_UN.rename(columns={'countryCode': 'country_numeric_code'}, inplace=True)
df_surface_UN.rename(columns={'countryName': 'UN_surface_countryName'}, inplace=True)

In [29]:
df_surface_UN['Surface area (thousand km2)'] = df_surface_UN['Surface area (thousand km2)'].str.replace(',', '')
df_surface_UN['Surface area (thousand km2)'] = df_surface_UN['Surface area (thousand km2)'].astype(float)

In [30]:
# World Surface area 136,162 thousand km2
# Source: United Nations Statistics Division, New York, "Demographic Yearbook 2015" and the demographic statistics database, last accessed June 2017.
df_surface_UN['Surface_area_convered_percent'] = (df_surface_UN['Surface area (thousand km2)'] / 136162)

In [31]:
df_surface_UN.shape

(229, 5)

In [32]:
df_surface_UN.sample(5)

Unnamed: 0,country_numeric_code,UN_surface_countryName,Surface area (thousand km2),Year_Surface area,Surface_area_convered_percent
157,598,Papua New Guinea,463.0,2015,0.0034
70,238,Falkland Islands (Malvinas),12.0,2015,8.8e-05
135,496,Mongolia,1564.0,2015,0.011486
107,400,Jordan,89.0,2015,0.000654
214,784,United Arab Emirates,84.0,2015,0.000617


## Intersect df_visa_free and df_iso on 'country to'

In [33]:
# copy df_visa_free, rename the columns from centadata and hauntedhouse
df_visa_free2 = df_visa_free.copy()

In [34]:
df_visa_free2.rename(columns={'countryto_alpha_2code': 'country_alpha_2code'}, inplace=True)

In [35]:
# Intersect on 'countryto_alpha_2code'
df = pd.merge(df_visa_free2, df_iso, on=['country_alpha_2code'], how='left')

In [36]:
df.rename(columns={'country_alpha_2code': 'countryto_alpha_2code'}, inplace=True)
df.rename(columns={'Country name': 'countryto_Country name'}, inplace=True)
df.rename(columns={'country_alpha_3code': 'countryto_country_alpha_3code'}, inplace=True)
df.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [37]:
df['passportfrom_alpha_2code'].nunique()

198

In [38]:
df['countryto_alpha_2code'].nunique()

226

In [39]:
df.sample(10)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code
7056,KN,AR,1,Argentina,ARG,32.0
16144,KN,TN,1,Tunisia,TUN,788.0
18854,NZ,CI,0,CÃ´te d'Ivoire,CIV,384.0
3894,PS,MZ,1,Mozambique,MOZ,508.0
15793,TW,VE,0,"Venezuela, Bolivarian Republic of",VEN,862.0
15861,RO,IQ,0,Iraq,IRQ,368.0
27496,MR,QA,0,Qatar,QAT,634.0
11064,LC,JO,1,Jordan,JOR,400.0
3995,BG,SB,1,Solomon Islands,SLB,90.0
1481,BA,SV,0,El Salvador,SLV,222.0


## Intersect df_visa_free and df_iso on 'passport'

In [40]:
df_iso.rename(columns={'country_alpha_2code': 'passportfrom_alpha_2code'}, inplace=True)

In [41]:
# Intersect on 'passportfrom_alpha_2code'
df = pd.merge(df, df_iso, on=['passportfrom_alpha_2code'], how='left')

In [42]:
df_iso.rename(columns={'passportfrom_alpha_2code': 'country_alpha_2code'}, inplace=True)

In [43]:
df.rename(columns={'Country name': 'passportfrom_Country name'}, inplace=True)

In [44]:
df = df.drop(['country_alpha_3code'], axis=1)
df = df.drop(['country_numeric_code'], axis=1)

## Intersect df and df_GDP_UN

In [45]:
df_GDP_UN2 = df_GDP_UN.copy()
df_GDP_UN2.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [46]:
df2 = pd.merge(df, df_GDP_UN2, on=['countryto_country_numeric_code'], how='left')

In [47]:
df2.rename(columns={'UN_GDP_currentPrices2016_billions': 'countryto_UN_GDP_currentPrices2016'}, inplace=True)

In [48]:
df2.sample(10)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent
6469,SY,KY,0,Cayman Islands,CYM,136.0,Syrian Arab Republic,3.844,5.1e-05
6858,PS,YE,0,Yemen,YEM,887.0,"Palestinian Territory, Occupied",25.374,0.000335
4828,TG,AS,0,American Samoa,ASM,16.0,Togo,,
30480,LU,ZA,1,South Africa,ZAF,710.0,Luxembourg,295.44,0.003905
17278,PG,MY,1,Malaysia,MYS,458.0,Papua New Guinea,296.531,0.00392
44525,CV,TR,0,Turkey,TUR,792.0,Cape Verde,863.712,0.011417
22293,NL,LU,1,Luxembourg,LUX,442.0,Netherlands,58.631,0.000775
30134,HT,EG,1,Egypt,EGY,818.0,Haiti,270.144,0.003571
38871,BS,CG,0,Congo,COG,178.0,Bahamas,7.778,0.000103
32081,GY,IT,0,Italy,ITA,380.0,Guyana,1858.913,0.024573


## Intersect df2 and df_population_UN

In [49]:
df_population_UN2 = df_population_UN.copy()
df_population_UN2.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [50]:
df3 = pd.merge(df2, df_population_UN2, on=['countryto_country_numeric_code'], how='left')

In [51]:
df3 = df3.drop(['UN_population_countryName'], axis=1)

In [52]:
df3.rename(columns={'UN_population_Value_2015_millions': 'countryto_UN_population_Value_2015_millions'}, inplace=True)

In [53]:
df3.sample(10)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,UN_population_Value_2015_percent
31675,GT,GW,1,Guinea-Bissau,GNB,624.0,Guatemala,1.123,1.5e-05,1.77,0.00024
35533,FI,DO,1,Dominican Republic,DOM,214.0,Finland,71.584,0.000946,10.53,0.001426
28379,MT,SM,1,San Marino,SMR,674.0,Malta,1.591,2.1e-05,0.03,4e-06
24512,NR,RS,0,Serbia,SRB,688.0,Nauru,38.3,0.000506,8.85,0.001199
38594,KH,GI,0,Gibraltar,GIB,292.0,Cambodia,,,0.03,4e-06
4506,GB,CO,1,Colombia,COL,170.0,United Kingdom,282.463,0.003734,48.23,0.006533
21865,MY,DM,1,Dominica,DMA,212.0,Malaysia,0.581,8e-06,0.07,9e-06
29953,MG,SO,1,Somalia,SOM,706.0,Madagascar,1.318,1.7e-05,13.91,0.001884
22040,MV,HR,0,Croatia,HRV,191.0,Maldives,51.231,0.000677,4.24,0.000574
19102,,CY,0,Cyprus,CYP,196.0,Namibia,20.046,0.000265,1.16,0.000157


## Intersect df3 and df_surface_UN

In [54]:
df_surface_UN2 = df_surface_UN.copy()
df_surface_UN2.rename(columns={'country_numeric_code': 'countryto_country_numeric_code'}, inplace=True)

In [55]:
df4 = pd.merge(df3, df_surface_UN2, on=['countryto_country_numeric_code'], how='left')

In [56]:
df4 = df4.drop(['UN_surface_countryName'], axis=1)

In [57]:
df4.rename(columns={'Surface area (thousand km2)': 'countryto_Surface area (thousand km2)'}, inplace=True)
df4.rename(columns={'Year_Surface area': 'countryto_Year_Surface area'}, inplace=True)

In [58]:
df4.sample(5)

Unnamed: 0,passportfrom_alpha_2code,countryto_alpha_2code,visafree,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,passportfrom_Country name,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_Year_Surface area,Surface_area_convered_percent
37042,IR,TN,0,Tunisia,TUN,788.0,"Iran, Islamic Republic of",41.704,0.000551,11.27,0.001526,164.0,2015.0,0.001204
12019,TT,RS,0,Serbia,SRB,688.0,Trinidad and Tobago,38.3,0.000506,8.85,0.001199,88.0,2015.0,0.000646
19891,SA,TR,0,Turkey,TUR,792.0,Saudi Arabia,863.712,0.011417,78.27,0.010601,784.0,2015.0,0.005758
44435,CF,TM,0,Turkmenistan,TKM,795.0,Central African Republic,36.18,0.000478,5.57,0.000754,488.0,2015.0,0.003584
7216,TZ,CI,0,CÃ´te d'Ivoire,CIV,384.0,"Tanzania, United Republic of",36.768,0.000486,23.11,0.00313,322.0,2015.0,0.002365


## Reordering the columns

In [59]:
df4 = df4.reset_index(drop=True)
cols = df4.columns.tolist()
cols

['passportfrom_alpha_2code',
 'countryto_alpha_2code',
 'visafree',
 'countryto_Country name',
 'countryto_country_alpha_3code',
 'countryto_country_numeric_code',
 'passportfrom_Country name',
 'countryto_UN_GDP_currentPrices2016',
 'UN_World_GDP_percent',
 'countryto_UN_population_Value_2015_millions',
 'UN_population_Value_2015_percent',
 'countryto_Surface area (thousand km2)',
 'countryto_Year_Surface area',
 'Surface_area_convered_percent']

In [60]:
df4 = df4[['passportfrom_alpha_2code','passportfrom_Country name','countryto_alpha_2code','countryto_Country name','countryto_country_alpha_3code', 'countryto_country_numeric_code','countryto_UN_GDP_currentPrices2016','UN_World_GDP_percent', 'countryto_UN_population_Value_2015_millions','UN_population_Value_2015_percent','countryto_Surface area (thousand km2)','Surface_area_convered_percent','visafree']]

In [61]:
df4['passportfrom_alpha_2code'].nunique()

198

In [62]:
df4.head()

Unnamed: 0,passportfrom_alpha_2code,passportfrom_Country name,countryto_alpha_2code,countryto_Country name,countryto_country_alpha_3code,countryto_country_numeric_code,countryto_UN_GDP_currentPrices2016,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_area_convered_percent,visafree
0,AM,Armenia,AF,Afghanistan,AFG,4.0,20.235,0.000267,33.74,0.00457,653.0,0.004796,0
1,AM,Armenia,AL,Albania,ALB,8.0,11.864,0.000157,2.92,0.000396,29.0,0.000213,1
2,AM,Armenia,DZ,Algeria,DZA,12.0,159.049,0.002102,39.87,0.0054,2382.0,0.017494,0
3,AM,Armenia,AS,American Samoa,ASM,16.0,,,0.06,8e-06,0.0,0.0,0
4,AM,Armenia,AD,Andorra,AND,20.0,2.858,3.8e-05,0.08,1.1e-05,0.0,0.0,0


## check the countries that have visa-free access to Russia

In [63]:
#df4[(df4['countryto_alpha_2code'] == 'RU') & (df4['visafree'] == 1)] 

## Check data types

In [64]:
df4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45173 entries, 0 to 45172
Data columns (total 13 columns):
passportfrom_alpha_2code                       44946 non-null object
passportfrom_Country name                      44719 non-null object
countryto_alpha_2code                          44974 non-null object
countryto_Country name                         44178 non-null object
countryto_country_alpha_3code                  44178 non-null object
countryto_country_numeric_code                 44178 non-null float64
countryto_UN_GDP_currentPrices2016             40994 non-null float64
UN_World_GDP_percent                           40994 non-null float64
countryto_UN_population_Value_2015_millions    43183 non-null float64
UN_population_Value_2015_percent               43183 non-null float64
countryto_Surface area (thousand km2)          43382 non-null float64
Surface_area_convered_percent                  43382 non-null float64
visafree                                       45173 non

## Pandas pivot_table

In [65]:
df_pivot = pd.pivot_table(df4[df4.visafree == 1],index='passportfrom_Country name',values=['countryto_UN_GDP_currentPrices2016','UN_World_GDP_percent','countryto_UN_population_Value_2015_millions','UN_population_Value_2015_percent','countryto_Surface area (thousand km2)','Surface_area_convered_percent','visafree'], aggfunc=np.sum, margins=False,dropna=True)
df_pivot.sort_values('visafree', ascending=False)

Unnamed: 0_level_0,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,visafree
passportfrom_Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Japan,0.636426,0.856458,0.802334,86657.0,64790.088,5923.63,190
"Korea, Republic of",0.739435,0.794678,0.653774,100683.0,60116.506,4826.81,189
Singapore,0.670885,0.909226,0.642897,91349.0,68781.884,4746.51,189
Germany,0.597656,0.719216,0.442759,81378.0,54407.864,3268.89,188
France,0.588035,0.733346,0.448888,80068.0,55476.825,3314.14,188
Denmark,0.588475,0.760984,0.452652,80128.0,57567.6,3341.93,187
Finland,0.586309,0.761888,0.45268,79833.0,57635.997,3342.14,187
Sweden,0.585567,0.75824,0.452101,79732.0,57360.024,3337.86,187
Italy,0.586573,0.740468,0.445364,79869.0,56015.587,3288.12,187
Spain,0.579317,0.737269,0.436537,78881.0,55773.533,3222.95,186


## Turn the pivot table in a Data frame

In [66]:
df_pivot_flattened = pd.DataFrame(df_pivot.to_records())
df_pivot_flattened

Unnamed: 0,passportfrom_Country name,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,visafree
0,Afghanistan,0.038432,0.005828,0.048784,5233.0,440.912,360.17,30
1,Albania,0.235712,0.304555,0.269139,32095.0,23039.214,1987.05,115
2,Algeria,0.10854,0.038378,0.126856,14779.0,2903.233,936.58,50
3,Andorra,0.544579,0.758468,0.424183,74151.0,57377.264,3131.74,169
4,Angola,0.113872,0.04281,0.149718,15505.0,3238.54,1105.37,49
5,Antigua and Barbuda,0.280937,0.354537,0.314421,38253.0,26820.347,2321.37,150
6,Argentina,0.494081,0.484782,0.400696,67275.0,36673.189,2958.34,170
7,Armenia,0.332016,0.092259,0.194054,45208.0,6979.278,1432.7,61
8,Australia,0.45592,0.708646,0.392877,62079.0,53608.232,2900.61,181
9,Austria,0.579986,0.745744,0.428972,78972.0,56414.712,3167.1,185


In [67]:
df_pivot_flattened['Surface_Rank'] = df_pivot_flattened['countryto_Surface area (thousand km2)'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['GDP_Rank'] = df_pivot_flattened['countryto_UN_GDP_currentPrices2016'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['Population_Rank'] = df_pivot_flattened['countryto_UN_population_Value_2015_millions'].rank(method='min', ascending=False).astype(int)
df_pivot_flattened['visafree_countries_Rank'] = df_pivot_flattened['visafree'].rank(method='min', ascending=False).astype(int)

In [68]:
df_pivot_flattened.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197 entries, 0 to 196
Data columns (total 12 columns):
passportfrom_Country name                      197 non-null object
Surface_area_convered_percent                  197 non-null float64
UN_World_GDP_percent                           197 non-null float64
UN_population_Value_2015_percent               197 non-null float64
countryto_Surface area (thousand km2)          197 non-null float64
countryto_UN_GDP_currentPrices2016             197 non-null float64
countryto_UN_population_Value_2015_millions    197 non-null float64
visafree                                       197 non-null int64
Surface_Rank                                   197 non-null int32
GDP_Rank                                       197 non-null int32
Population_Rank                                197 non-null int32
visafree_countries_Rank                        197 non-null int32
dtypes: float64(6), int32(4), int64(1), object(1)
memory usage: 15.5+ KB


## Calculate the overall ranking

In [69]:
df_pivot_flattened['Overall_ranking_calculation'] = df_pivot_flattened[['visafree_countries_Rank','GDP_Rank','Surface_Rank','Population_Rank']].sum(axis=1)

In [70]:
df_pivot_flattened['New_Overall_ranking'] = df_pivot_flattened['Overall_ranking_calculation'].rank(method='min', ascending=True).astype(int)

## Create a new column Gain/loss in ranking

In [71]:
df_pivot_flattened['gain-loss'] = df_pivot_flattened['visafree_countries_Rank'] - df_pivot_flattened['New_Overall_ranking']

In [72]:
df_pivot_flattened

Unnamed: 0,passportfrom_Country name,Surface_area_convered_percent,UN_World_GDP_percent,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),countryto_UN_GDP_currentPrices2016,countryto_UN_population_Value_2015_millions,visafree,Surface_Rank,GDP_Rank,Population_Rank,visafree_countries_Rank,Overall_ranking_calculation,New_Overall_ranking,gain-loss
0,Afghanistan,0.038432,0.005828,0.048784,5233.0,440.912,360.17,30,197,197,195,196,785,197,-1
1,Albania,0.235712,0.304555,0.269139,32095.0,23039.214,1987.05,115,109,91,96,93,389,97,-4
2,Algeria,0.10854,0.038378,0.126856,14779.0,2903.233,936.58,50,163,171,172,168,674,172,-4
3,Andorra,0.544579,0.758468,0.424183,74151.0,57377.264,3131.74,169,30,14,40,42,126,31,11
4,Angola,0.113872,0.04281,0.149718,15505.0,3238.54,1105.37,49,157,166,162,170,655,166,4
5,Antigua and Barbuda,0.280937,0.354537,0.314421,38253.0,26820.347,2321.37,150,95,81,82,57,315,80,-23
6,Argentina,0.494081,0.484782,0.400696,67275.0,36673.189,2958.34,170,41,54,51,41,187,48,-7
7,Armenia,0.332016,0.092259,0.194054,45208.0,6979.278,1432.7,61,88,122,121,138,469,116,22
8,Australia,0.45592,0.708646,0.392877,62079.0,53608.232,2900.61,181,48,38,55,25,166,42,-17
9,Austria,0.579986,0.745744,0.428972,78972.0,56414.712,3167.1,185,18,23,33,12,86,17,-5


## Reorder the columns

In [73]:
df_pivot_flattened = df_pivot_flattened[['passportfrom_Country name',
                                         'New_Overall_ranking',
                                         'gain-loss',
                                         'visafree',
                                         'visafree_countries_Rank',
                                         'countryto_UN_GDP_currentPrices2016',
                                         'GDP_Rank',
                                         'UN_World_GDP_percent',
                                         'countryto_UN_population_Value_2015_millions',
                                         'Population_Rank',
                                         'UN_population_Value_2015_percent',
                                         'countryto_Surface area (thousand km2)',
                                         'Surface_Rank',
                                         'Surface_area_convered_percent',
                                         'Overall_ranking_calculation']]

In [74]:
df_pivot_flattened.sort_values('New_Overall_ranking',ascending=True, inplace=True)
df_pivot_flattened = df_pivot_flattened.reset_index(drop=True)
df_pivot_flattened.style.format({
    'UN_World_GDP_percent': '{:.2%}'.format,
    'UN_population_Value_2015_percent': '{:.2%}'.format,
    'Surface_area_convered_percent': '{:.2%}'.format
    })

Unnamed: 0,passportfrom_Country name,New_Overall_ranking,gain-loss,visafree,visafree_countries_Rank,countryto_UN_GDP_currentPrices2016,GDP_Rank,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,Population_Rank,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_Rank,Surface_area_convered_percent,Overall_ranking_calculation
0,Singapore,1,1,189,2,68781.9,1,90.92%,4746.51,3,64.29%,91349,2,67.09%,8
1,Japan,2,-1,190,1,64790.1,4,85.65%,5923.63,1,80.23%,86657,4,63.64%,10
2,"Korea, Republic of",3,-1,189,2,60116.5,6,79.47%,4826.81,2,65.38%,100683,1,73.94%,11
3,Denmark,4,2,187,6,57567.6,9,76.10%,3341.93,17,45.27%,80128,9,58.85%,41
4,Finland,5,1,187,6,57636.0,7,76.19%,3342.14,16,45.27%,79833,13,58.63%,42
5,Sweden,6,0,187,6,57360.0,15,75.82%,3337.86,18,45.21%,79732,14,58.56%,53
6,Luxembourg,7,3,186,10,57610.6,8,76.16%,3253.48,26,44.07%,79837,12,58.63%,56
7,San Marino,8,38,168,46,68520.1,2,90.58%,4481.91,4,60.71%,83173,7,61.08%,59
8,Brunei Darussalam,9,39,165,48,65240.0,3,86.24%,4237.51,5,57.40%,84441,6,62.02%,62
9,Italy,10,-4,187,6,56015.6,28,74.05%,3288.12,22,44.54%,79869,11,58.66%,67


## Biggest winners and losers

In [75]:
df_rankgainloss = df_pivot_flattened.sort_values('gain-loss', ascending=False, inplace=False)
df_rankgainloss.style.format({
    'UN_World_GDP_percent': '{:.2%}'.format,
    'UN_population_Value_2015_percent': '{:.2%}'.format,
    'Surface_area_convered_percent': '{:.2%}'.format
    })

Unnamed: 0,passportfrom_Country name,New_Overall_ranking,gain-loss,visafree,visafree_countries_Rank,countryto_UN_GDP_currentPrices2016,GDP_Rank,UN_World_GDP_percent,countryto_UN_population_Value_2015_millions,Population_Rank,UN_population_Value_2015_percent,countryto_Surface area (thousand km2),Surface_Rank,Surface_area_convered_percent,Overall_ranking_calculation
138,Nepal,139,49,40,188,4964.39,133,6.56%,2448.87,73,33.17%,14385,166,10.56%,560
8,Brunei Darussalam,9,39,165,48,65240.0,3,86.24%,4237.51,5,57.40%,84441,6,62.02%,62
59,Ecuador,60,39,93,99,23156.8,90,30.61%,3379.38,15,45.77%,63280,45,46.47%,249
7,San Marino,8,38,168,46,68520.1,2,90.58%,4481.91,4,60.71%,83173,7,61.08%,59
76,Belarus,76,36,77,112,20372.7,98,26.93%,3191.17,31,43.22%,61245,51,44.98%,292
129,Lao People's Democratic Republic,130,33,52,163,5365.78,131,7.09%,1485.53,116,20.12%,33337,108,24.48%,518
57,Bosnia and Herzegovina,58,33,118,91,35374.1,60,46.76%,3485.16,13,47.21%,59135,61,43.43%,225
25,"Taiwan, Province of China",26,32,149,58,62452.6,5,82.56%,3868.61,8,52.40%,64836,44,47.62%,115
45,Serbia,46,30,129,76,41387.1,47,54.71%,3583.12,12,48.53%,67491,40,49.57%,175
71,Fiji,72,29,89,101,22117.1,94,29.24%,3272.0,23,44.32%,57109,67,41.94%,285
