# Generate Passport Index datasets
* Data by Passport Index 2024: https://www.passportindex.org/
* In both tidy and matrix formats
* Using ISO-2, ISO-3, and full country names

In [33]:
import requests
import pandas as pd
import json
import time

# Load tqdm!
from tqdm import tqdm
tqdm.pandas()

In [29]:
country_data = (
    pd.read_csv(
        'https://gist.githubusercontent.com/ilyankou/b2580c632bdea4af2309dcaa69860013/raw/420fb417bcd17d833156efdf64ce8a1c3ceb2691/country-codes',
        dtype=str
    )
    .fillna('NA')
)

In [31]:
country_data

Unnamed: 0,Country,ISO2,ISO3
0,Afghanistan,AF,AFG
1,Albania,AL,ALB
2,Algeria,DZ,DZA
3,Andorra,AD,AND
4,Angola,AO,AGO
...,...,...,...
194,Venezuela,VE,VEN
195,Vietnam,VN,VNM
196,Yemen,YE,YEM
197,Zambia,ZM,ZMB


## Get data from PassportIndex

In [105]:
def get_data(country_code):

    time.sleep(0.2) # Do not overload the server

    return requests.post(
        'https://www.passportindex.org/incl/compare2.php',
        headers={
            'Host': 'www.passportindex.org',
            'User-Agent': 'PostmanRuntime/7.37.0',
            'Accept': '*/*',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate, br',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'X-Requested-With': 'XMLHttpRequest',
            'Origin': 'https://www.passportindex.org'
        },
        data={
            'compare': '3',
            'cc': str.lower(country_code),
            'year': '2024',
            'csrf_token': 'aaa'
        }
    ).json()

In [107]:
# Make sure below is visa-free!
[x for x in get_data('BY') if x.get('code') == "RU"]

[{'code': 'RU',
  'dur': '90',
  'text': 'visa-free',
  'text_col': 2,
  'text_url': '',
  'link': 0}]

In [108]:
country_data['travel'] = country_data.ISO2.progress_apply(get_data)

100%|█████████████████████████████████████████| 199/199 [02:33<00:00,  1.30it/s]


In [39]:
assert country_data['travel'].isna().sum() == 0, \
    f"Didn't fetch {country_data['travel'].isna().sum()} rows!"

In [72]:
set( [ x.get('text') for x in country_data.query('ISO2 == "BY"').travel.iloc[0] ] )

{'COVID-19 ban',
 'eVisa',
 'pre-enrollment',
 'pre-visa on arrival',
 'tourist registration',
 'visa on arrival',
 'visa on arrival (EASE)',
 'visa on arrival / eVisa',
 'visa required',
 'visa-free'}

In [83]:
country_data.query('ISO2 == "BY"').travel.iloc[0]

[{'code': 'AF',
  'dur': '',
  'text': 'visa required',
  'text_col': 0,
  'text_url': ''},
 {'code': 'AL',
  'dur': '',
  'text': 'visa required',
  'text_col': 0,
  'text_url': ''},
 {'code': 'DZ',
  'dur': '',
  'text': 'visa required',
  'text_col': 0,
  'text_url': ''},
 {'code': 'AD',
  'dur': '',
  'text': 'visa required',
  'text_col': 0,
  'text_url': ''},
 {'code': 'AO',
  'dur': '',
  'text': 'pre-visa on arrival',
  'text_col': 0,
  'text_url': ''},
 {'code': 'AG',
  'dur': '180',
  'text': 'visa-free',
  'text_col': 2,
  'text_url': ''},
 {'code': 'AR',
  'dur': '',
  'text': 'visa required',
  'text_col': 0,
  'text_url': ''},
 {'code': 'AM',
  'dur': '',
  'text': 'visa required',
  'text_col': 0,
  'text_url': ''},
 {'code': 'AU', 'dur': '', 'text': 'eVisa', 'text_col': 0, 'text_url': ''},
 {'code': 'AT',
  'dur': '',
  'text': 'visa required',
  'text_col': 0,
  'text_url': ''},
 {'code': 'AZ',
  'dur': '',
  'text': 'COVID-19 ban',
  'text_col': 0,
  'text_url': ''},


## Clean up the data

In [77]:
obj = {}

for passport in country_data.ISO2.tolist():
    
    # Add passport to the object
    if passport not in obj:
        obj[passport] = {}
    
    # Add destinations for the given passport
    for dest in country_data.query(f'ISO2 == "{passport}"').iloc[0]['travel']:
        
        text = dest['text'].lower()
        res = ''
        
        # ** Visa required, incl Cuba's tourist card **
        if text == 'visa required' or text == 'tourist card':
            res = 'visa required'
        
        # ** Visa on arrival **
        elif 'visa on arrival' in text:
            res = 'visa on arrival'
            
        # ** Covid-19 ban ** 
        elif text == 'covid-19 ban':
            res = 'covid ban'
            
        # ** Visa-free, incl. Seychelles' tourist registration **
        elif 'visa-free' in text or 'tourist registration' in text or 'visa waiver' in text:
            res = dest['dur'] if dest['dur'] != '' else 'visa free'
            
        # ** eVisas, incl eVisitors (Australia), eTourist cards (Suriname),
        # eTA (US), and pre-enrollment (Ivory Coast), or EVW (UK) **
        elif 'evis' in text or 'etourist' in text or text == 'eta' or text == 'pre-enrollment' or text == 'evw':
            res = 'e-visa'
            
        # ** No admission, including Trump ban **
        elif text == 'trump ban' or text == 'not admitted':
            res = 'no admission'
        
        # Update the result!
        obj[passport][ fix_iso2(dest['code']) ] = res if res != '' else dest['text']

In [78]:
matrix

Unnamed: 0,AL,DZ,AD,AO,AG,AR,AM,AU,AT,AZ,...,UY,UZ,VU,VA,VE,VN,YE,ZM,ZW,AF
AF,visa required,visa required,visa required,visa on arrival,e-visa,visa required,visa required,e-visa,visa required,covid ban,...,visa required,e-visa,visa required,visa required,visa required,visa required,visa required,e-visa,e-visa,-1
AL,-1,visa required,90,visa on arrival,180,visa required,visa required,e-visa,90,covid ban,...,visa required,e-visa,visa required,90,visa required,visa required,visa required,visa on arrival,visa on arrival,visa required
DZ,visa required,-1,visa required,visa on arrival,e-visa,visa required,visa required,e-visa,visa required,covid ban,...,visa required,e-visa,visa required,visa required,visa required,visa required,visa on arrival,e-visa,visa on arrival,visa required
AD,90,visa required,-1,visa on arrival,180,visa required,visa required,visa required,90,covid ban,...,visa required,30,30,90,90,visa required,visa required,visa on arrival,visa on arrival,visa required
AO,visa required,visa required,visa required,-1,e-visa,visa required,visa required,e-visa,visa required,covid ban,...,visa required,e-visa,visa required,visa required,visa required,visa required,visa required,30,90,visa required
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VE,90,visa required,visa required,visa on arrival,180,visa required,visa on arrival,e-visa,visa required,covid ban,...,visa required,e-visa,visa required,visa required,-1,visa required,visa required,visa on arrival,visa on arrival,visa required
VN,visa required,visa required,visa required,visa on arrival,e-visa,visa required,visa required,e-visa,visa required,covid ban,...,visa required,e-visa,visa required,visa required,visa required,-1,visa required,visa on arrival,e-visa,visa required
YE,visa required,visa required,visa required,visa on arrival,e-visa,visa required,visa required,e-visa,visa required,covid ban,...,visa required,e-visa,visa required,visa required,visa required,visa required,-1,e-visa,e-visa,visa required
ZM,visa required,visa required,visa required,30,180,visa required,visa required,e-visa,visa required,covid ban,...,visa required,e-visa,30,visa required,visa required,visa required,visa required,-1,90,visa required


## Save

In [79]:
# ISO-2: Matrix
matrix = pd.DataFrame(obj).T.fillna(-1)
matrix.to_csv('passport-index-matrix-iso2.csv', index_label='Passport')

# ISO-2: Tidy
matrix.stack().to_csv(
    'passport-index-tidy-iso2.csv',
    index_label=['Passport', 'Destination'],
    header=['Requirement'])

# ISO-3: Matrix
iso2to3 =  { x:y['ISO3'] for x,y in codes.iterrows() }
matrix.rename(columns=iso2to3, index=iso2to3).to_csv('passport-index-matrix-iso3.csv', index_label='Passport')

# ISO-3: Tidy
matrix.rename(columns=iso2to3, index=iso2to3).stack().to_csv(
    'passport-index-tidy-iso3.csv',
    index_label=['Passport', 'Destination'],
    header=['Requirement'])


# Country names: Matrix
iso2name =  { x:y['Country'] for x,y in codes.iterrows() }
matrix.rename(columns=iso2name, index=iso2name).to_csv('passport-index-matrix.csv', index_label='Passport')

# Country names: Tidy
matrix.rename(columns=iso2name, index=iso2name).stack().to_csv(
    'passport-index-tidy.csv',
    index_label=['Passport', 'Destination'],
    header=['Requirement'])

In [80]:
# Print all values
tidy = matrix.rename(columns=iso2to3, index=iso2to3).stack()
tidy.value_counts()

visa required      23522
visa on arrival     5520
e-visa              3558
90                  3216
visa free           1794
30                   962
180                  302
-1                   199
covid ban            198
120                   69
60                    69
42                    60
15                    54
14                    35
28                    20
no admission           9
360                    7
21                     4
7                      2
31                     1
Name: count, dtype: int64

In [81]:
tidy[ tidy == 'no admission' ]

AZE  ARM    no admission
ISR  BGD    no admission
XKX  ARM    no admission
LBY  USA    no admission
SOM  USA    no admission
SYR  USA    no admission
TWN  GEO    no admission
YEM  IRN    no admission
     USA    no admission
dtype: object

In [82]:
tidy.loc['BLR', 'RUS']

'visa required'

### Difference with previous run

In [55]:
tidy_old = pd.read_csv('legacy/2024-05-08/passport-index-tidy-iso3.csv')

In [56]:
(tidy
 .to_frame()
 .reset_index()
 .merge(
     tidy_old, how='inner',
     left_on=['level_0', 'level_1'],
     right_on=['Passport', 'Destination']
 )
 .assign(
     is_different=lambda df_: df_[0].ne(df_.Requirement)
 )
 .query('is_different & (Passport != Destination)')
)

Unnamed: 0,level_0,level_1,0,Passport,Destination,Requirement,is_different
0,AFG,ALB,visa required,AFG,ALB,e-visa,True
3,AFG,AGO,visa on arrival,AFG,AGO,visa required,True
9,AFG,AZE,covid ban,AFG,AZE,visa required,True
10,AFG,BHS,visa required,AFG,BHS,e-visa,True
11,AFG,BHR,visa required,AFG,BHR,e-visa,True
...,...,...,...,...,...,...,...
39570,ZWE,SUR,e-visa,ZWE,SUR,visa required,True
39578,ZWE,TLS,visa required,ZWE,TLS,visa on arrival,True
39588,ZWE,ARE,e-visa,ZWE,ARE,visa required,True
39592,ZWE,UZB,e-visa,ZWE,UZB,visa required,True
