In [1]:
from collections import defaultdict

import pandas as pd
import numpy as np
import pycountry

from src.datasources.safety import get_safety_crime_index

PASSPORT_INDEX_CSV = "https://raw.githubusercontent.com/ilyankou/passport-index-dataset/master/passport-index-matrix-iso2.csv"

def load_safety_crime():
    return pd.DataFrame(get_safety_crime_index())

def load_passport_index():
    return pd.read_csv(PASSPORT_INDEX_CSV)

def get_country_name(alpha_2):
    cc = pycountry.countries.get(alpha_2=alpha_2)
    if cc:
        return cc.name
    return ''

def make_destination_df(df_passport_index, df_safety_crime, participants_ccs):
    good_values = [
        '7-360',
        'visa free',
        'visa on arrival',
        'e-visa',
        '-1'
    ]
    bad_values = [
        'visa required',
        'covid ban',
        'no admission',
        'Hayya Entry Permit'
    ]
    freetravel_dst_map = defaultdict(set)
    for _, row in df_passport_index.iterrows():
        src_cc = row['Passport']
        for key in row.keys():
            if key == 'Passport':
                continue
            if row[key] in bad_values:
                continue
            freetravel_dst_map[key].add(src_cc)

    ok_list = []
    for dst_cc, ok_cc_set in freetravel_dst_map.items():
        ok_count = 0
        for ok_cc in ok_cc_set:
            ok_count += participants_ccs.count(ok_cc)
        ok_list.append(
            {'visa_free_count': ok_count, 'alpha_2': dst_cc}
        )

    df_scores = pd.DataFrame(ok_list)
    df_scores['country_name'] = df_scores['alpha_2'].apply(get_country_name)
    return df_scores.merge(df_safety_crime, how='left', on='alpha_2')

In [11]:
df_safety_crime = load_safety_crime()
df_passport_index = load_passport_index()

In [15]:
participants_ccs = [
    # Repeat twice for multiple participants
    "IT",
    "IT",
    "IR",
    "US",
    "CN",
    "RU"
]

In [13]:
df_scores = make_destination_df(
        df_passport_index=df_passport_index,
        df_safety_crime=df_safety_crime,
        participants_ccs=participants_ccs
)

In [14]:
df_scores.sort_values(by='visa_free_count', ascending=False).head(n=10)

Unnamed: 0,visa_free_count,alpha_2,country_name,crime_index,safety_index,raw_name,name
0,5,AL,Albania,45.67,54.33,Albania,Albania
54,5,RW,Rwanda,27.62,72.38,Rwanda,Rwanda
62,5,KR,"Korea, Republic of",25.49,74.51,South Korea,"Korea, Republic of"
61,5,SO,Somalia,68.6,31.4,Somalia,Somalia
60,5,SG,Singapore,29.25,70.75,Singapore,Singapore
59,5,SL,Sierra Leone,,,,
58,5,SC,Seychelles,,,,
57,5,ST,Sao Tome and Principe,,,,
56,5,WS,Samoa,,,,
55,5,KN,Saint Kitts and Nevis,,,,
