# Generate Passport Index datasets
* Data by Passport Index 2025: https://www.passportindex.org/
* In both tidy and matrix formats
* Using ISO-2, ISO-3, and full country names

In [1]:
import pandas as pd
import json
import time

# Load tqdm!
from tqdm import tqdm
tqdm.pandas()

### In your browser,

1. Navigate to https://www.passportindex.org/comparebyPassport.php?p1=sa&y1=2024
1. Open Dev tools > Network
1. Force-reload the page (e.g. Cmd+Shift+R)
1. Find a POST request to *compare2.php*, right-click, and select 'Copy Value' -> 'Copy as cURL'
1. Paste below

In [8]:
curl_command = """
curl 'https://www.passportindex.org/incl/compare2.php' --compressed -X POST -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0' -H 'Accept: */*' -H 'Accept-Language: en-GB,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'X-Requested-With: XMLHttpRequest' -H 'Origin: https://www.passportindex.org' -H 'DNT: 1' -H 'Sec-GPC: 1' -H 'Alt-Used: www.passportindex.org' -H 'Connection: keep-alive' -H 'Referer: https://www.passportindex.org/comparebyPassport.php?p1=sa&y1=2024' -H 'Cookie: _ga_7Z48X951ET=GS1.1.1736685598.11.1.1736685665.0.0.0; _ga=GA1.1.982642551.1730071472; PHPSESSID=0833ebcdbd2b18a09ae244ed8b0328e8; cf_clearance=I0SN.n0ZiCMxR4aM2IwJAq1ESx9sMKHQaa7YhTDMG_4-1736685598-1.2.1.1-mxcnq4_cJGgYgjItlC_CexouejcHcCD00RJHolLNS0dZAi3P4jdmNWDb2b5csULp.uHCvDvgnHrt_EMJWjCI8LRuRwTr7KavcAYacdl4_3_mLV0YeLpcRlxa1hUSfFLQW3srRytg_P6L4bdDgQyGqIUOIyTjxsH4F9EwqShjaeJlv3rDFruOiXb4mzrD6EdwjO9dkkLaZU0mn8DLqa_qAs30Gke3bm8HFo.LYwDpiz1T.Q6rE5H1Z5OHyEedlO4Q7PXBFRsMYTR0u3FTHPErXtA1uBHeu.oZJA36J0Lp5QA' -H 'Sec-Fetch-Dest: empty' -H 'Sec-Fetch-Mode: cors' -H 'Sec-Fetch-Site: same-origin' -H 'TE: trailers' --data-raw 'compare=3&year=2024&cc=sa&csrf_token=4ab79b7e02105aacd734f86704658c07919dbfce85da8661e8cece8345009e64'
"""

In [9]:
# Read all country codes

country_data = (
    pd.read_csv(
        'https://gist.githubusercontent.com/ilyankou/b2580c632bdea4af2309dcaa69860013/raw/420fb417bcd17d833156efdf64ce8a1c3ceb2691/country-codes',
        dtype=str
    )
    .fillna('NA')
)

In [10]:
def get_data(country_code):

    time.sleep(6.1)

    curl_command_ = (curl_command
                     .replace('curl ', 'curl -s ')
                     .replace(', br, zstd', '')
                     .replace('cc=sa', f'cc={str.lower(country_code)}')
                    )

    res = !{curl_command_}
    return json.loads(res[0])

In [11]:
country_data['travel'] = country_data.ISO2.progress_apply(get_data)

100%|█████████████████████████████████████████| 199/199 [21:31<00:00,  6.49s/it]


## Get data from PassportIndex

In [12]:
assert country_data['travel'].isna().sum() == 0, \
    f"Didn't fetch {country_data['travel'].isna().sum()} rows!"

## Clean up the data

In [53]:
obj = {}

for passport in country_data.ISO2.tolist():
    
    # Add passport to the object
    if passport not in obj:
        obj[passport] = {}
    
    # Add destinations for the given passport
    for dest in country_data.query(f'ISO2 == "{passport}"').iloc[0]['travel']:
        
        text = dest['text'].lower()
        res = ''
        
        # ** Visa required, incl Cuba's tourist card or China's Exit Entry Permit for Macau/HK **
        if text == 'visa required' or text == 'tourist card' or text == 'exit-entry permit':
            res = 'visa required'
        
        # ** Visa on arrival **
        elif 'visa on arrival' in text:
            res = 'visa on arrival'
            
        # ** Covid-19 ban ** 
        elif text == 'covid-19 ban':
            res = 'covid ban'
            
        # ** Visa-free, incl. Seychelles' tourist registration **
        elif 'visa-free' in text or 'tourist registration' in text or 'visa waiver' in text:
            res = dest['dur'] if dest['dur'] != '' else 'visa free'

        # ** eTA such as ESTA (US), eVisitor (Australia), EVW (UK), eTourist cards (Suriname)
        # pre-enrollment (Ivory Coast)
        elif text in ['eta', 'pre-enrollment', 'evw', 'evisitors'] or 'etourist' in text:
            res = 'eta'
        
        # ** e-visas (not Esta/ETA style)
        elif 'evisa' in text:
            res = 'e-visa'
            
        # ** No admission, including Trump ban **
        elif text == 'trump ban' or text == 'not admitted':
            res = 'no admission'

        elif 'arrival card' in text or 'e-ticket' in text:
            res = 'visa free'
        
        # Update the result!
        obj[passport][ dest['code'] ] = res if res != '' else dest['text']

In [54]:
for x, y in country_data.head(5).iterrows():
    print(x, y['ISO3'])

0 AFG
1 ALB
2 DZA
3 AND
4 AGO


## Save

In [55]:
# ISO-2: Matrix
matrix = pd.DataFrame(obj).T.fillna(-1)
matrix.to_csv('passport-index-matrix-iso2.csv', index_label='Passport')

# ISO-2: Tidy
matrix.stack().to_csv(
    'passport-index-tidy-iso2.csv',
    index_label=['Passport', 'Destination'],
    header=['Requirement'])

# ISO-3: Matrix
iso2to3 =  { y['ISO2']:y['ISO3'] for _,y in country_data.iterrows() }
matrix.rename(columns=iso2to3, index=iso2to3).to_csv('passport-index-matrix-iso3.csv', index_label='Passport')

# ISO-3: Tidy
matrix.rename(columns=iso2to3, index=iso2to3).stack().to_csv(
    'passport-index-tidy-iso3.csv',
    index_label=['Passport', 'Destination'],
    header=['Requirement'])


# Country names: Matrix
iso2name =  { y['ISO2']:y['Country'] for _,y in country_data.iterrows() }
matrix.rename(columns=iso2name, index=iso2name).to_csv('passport-index-matrix.csv', index_label='Passport')

# Country names: Tidy
matrix.rename(columns=iso2name, index=iso2name).stack().to_csv(
    'passport-index-tidy.csv',
    index_label=['Passport', 'Destination'],
    header=['Requirement'])

In [56]:
# Print all values
tidy = matrix.rename(columns=iso2to3, index=iso2to3).stack()
tidy.value_counts()

visa required      12921
90                  7651
e-visa              6704
visa on arrival     6053
visa free           1883
30                  1698
eta                  819
180                  685
120                  233
-1                   199
60                   192
14                   107
21                   106
360                   97
15                    94
42                    60
no admission          34
28                    23
240                   15
45                    15
10                     6
7                      4
150                    1
31                     1
Name: count, dtype: int64

In [57]:
#tidy[tidy == "Exit-entry Permit"]

In [58]:
# Which countries have no admission? (Typically war zones etc)
tidy[ tidy == 'no admission' ]

ARM  AZE    no admission
BGD  IRQ    no admission
     LBY    no admission
HTI  SUR    no admission
IRN  LBY    no admission
ISR  DZA    no admission
     BGD    no admission
     BRN    no admission
     IRN    no admission
     LBN    no admission
     LBY    no admission
     MYS    no admission
     PAK    no admission
     SAU    no admission
     SYR    no admission
     YEM    no admission
XKX  ARM    no admission
     KHM    no admission
     CUB    no admission
     HKG    no admission
     SYC    no admission
PRK  JPN    no admission
     LKA    no admission
PAK  LBY    no admission
PSE  MDG    no admission
     SYR    no admission
PHL  KWT    no admission
SOM  AUS    no admission
     CAN    no admission
SDN  LBY    no admission
SYR  LBY    no admission
TWN  GEO    no admission
TJK  KGZ    no admission
YEM  LBY    no admission
dtype: object

In [59]:
assert tidy.loc[('BLR', 'RUS')] == '90', "Check data!"

In [68]:
# Test Australia - make sure e-visa and eTA and eVisitor are reclassed properly
assert tidy.loc[('RUS', 'AUS')] == 'e-visa'
assert tidy.loc[('GBR', 'AUS')] == tidy.loc[('USA', 'AUS')] == 'eta'

### Difference with previous run
* Typically the difference between two consecutive months would be in low hundreds.
* If difference is higher, double-check the data!

In [69]:
x = (tidy
 .reset_index()
 .rename(
     columns={'level_0': 'Passport', 'level_1': 'Destination', 0: 'Requirement'}
 )
 .merge(
     # SELECT PREVIOUS VERSION TO COMPARE!
     pd.read_csv('legacy/2025-01-02/passport-index-tidy-iso3.csv'),
     how='left',
     left_on=['Passport', 'Destination'],
     right_on=['Passport', 'Destination'],
     suffixes=('_new', '_old')
 )
 .assign(
     is_different=lambda df_: df_.Requirement_old.ne(df_.Requirement_new)
 )
 .query('is_different & (Passport != Destination)')
 .drop(columns=['is_different'])
)

x

Unnamed: 0,Passport,Destination,Requirement_new,Requirement_old
40,AFG,CIV,eta,e-visa
87,AFG,KEN,eta,e-visa
239,ALB,CIV,eta,e-visa
286,ALB,KEN,eta,e-visa
361,ALB,KOR,eta,e-visa
...,...,...,...,...
39290,ZMB,KEN,eta,e-visa
39335,ZMB,PAK,eta,e-visa
39339,ZMB,PNG,eta,e-visa
39442,ZWE,CIV,eta,e-visa


In [70]:
# What changed for the UK?
x.query('Passport == "GBR"')

Unnamed: 0,Passport,Destination,Requirement_new,Requirement_old
37419,GBR,AUS,eta,e-visa
37441,GBR,CAN,eta,e-visa
37452,GBR,CIV,eta,e-visa
37499,GBR,KEN,eta,e-visa
37536,GBR,NZL,eta,e-visa
37544,GBR,PAK,eta,e-visa
37548,GBR,PNG,eta,e-visa
37600,GBR,USA,eta,e-visa


In [72]:
x.query('Destination == "AUS"')

Unnamed: 0,Passport,Destination,Requirement_new,Requirement_old
604,AND,AUS,eta,e-visa
1798,AUT,AUS,eta,e-visa
3191,BEL,AUS,eta,e-visa
4783,BRN,AUS,eta,e-visa
4982,BGR,AUS,eta,e-visa
5977,CAN,AUS,eta,e-visa
8365,HRV,AUS,eta,e-visa
8763,CYP,AUS,eta,e-visa
8962,CZE,AUS,eta,e-visa
9161,DNK,AUS,eta,e-visa
