### Disclosure: A tutorial from [jump.dev](https://jump.dev/JuMP.jl/stable/tutorials/getting_started/getting_started_with_data_and_plotting/#Example:-the-passport-problem)

## Data Cleaning

In [1]:
const DATA_DIR = joinpath(dirname(@__DIR__), "data")

"C:\\Users\\isossa\\Documents\\Mathematical Programming\\Passport Index Dataset\\passport-index-dataset\\data"

In [2]:
import DataFrames
import CSV

In [3]:
passport_df = CSV.read(
    joinpath(DATA_DIR, "passport-index-matrix.csv"), 
    DataFrames.DataFrame,
    missingstring = "NA"
)

Unnamed: 0_level_0,Passport,Afghanistan,Albania,Algeria,Andorra,Angola
Unnamed: 0_level_1,String,String,String,String,String,String
1,Afghanistan,-1,e-visa,visa required,visa required,visa on arrival
2,Albania,visa required,-1,visa required,90,visa on arrival
3,Algeria,visa required,e-visa,-1,visa required,visa on arrival
4,Andorra,visa required,90,visa required,-1,visa on arrival
5,Angola,visa required,e-visa,visa required,visa required,-1
6,Antigua and Barbuda,visa required,90,visa required,90,visa on arrival
7,Argentina,visa required,90,visa required,90,visa on arrival
8,Armenia,visa required,90,visa required,visa required,visa on arrival
9,Australia,visa required,90,visa required,90,visa on arrival
10,Austria,visa required,90,visa required,visa free,visa on arrival


In [4]:
DataFrames.size(passport_df)

(199, 200)

In [5]:
DataFrames.describe(passport_df)

Unnamed: 0_level_0,variable,mean,min,median,max,nunique,nmissing
Unnamed: 0_level_1,Symbol,Union…,Any,Union…,Any,Union…,Nothing
1,Passport,,Afghanistan,,Zimbabwe,199,
2,Afghanistan,,-1,,visa required,2,
3,Albania,,-1,,visa required,6,
4,Algeria,,-1,,visa required,3,
5,Andorra,,-1,,visa required,4,
6,Angola,,-1,,visa on arrival,4,
7,Antigua and Barbuda,,-1,,visa free,6,
8,Argentina,,-1,,visa required,4,
9,Armenia,,-1,,visa required,7,
10,Australia,,-1,,visa free,3,


In [6]:
DataFrames.names(passport_df)

200-element Vector{String}:
 "Passport"
 "Afghanistan"
 "Albania"
 "Algeria"
 "Andorra"
 "Angola"
 "Antigua and Barbuda"
 "Argentina"
 "Armenia"
 "Australia"
 "Austria"
 "Azerbaijan"
 "Bahamas"
 ⋮
 "United Arab Emirates"
 "United Kingdom"
 "United States"
 "Uruguay"
 "Uzbekistan"
 "Vanuatu"
 "Vatican"
 "Venezuela"
 "Vietnam"
 "Yemen"
 "Zambia"
 "Zimbabwe"

In [7]:
eltype.(passport_df)

Unnamed: 0_level_0,Passport,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda
Unnamed: 0_level_1,DataType,DataType,DataType,DataType,DataType,DataType,DataType
1,Char,Char,Char,Char,Char,Char,Char
2,Char,Char,Char,Char,Char,Char,Char
3,Char,Char,Char,Char,Char,Char,Char
4,Char,Char,Char,Char,Char,Char,Char
5,Char,Char,Char,Char,Char,Char,Char
6,Char,Char,Char,Char,Char,Char,Char
7,Char,Char,Char,Char,Char,Char,Char
8,Char,Char,Char,Char,Char,Char,Char
9,Char,Char,Char,Char,Char,Char,Char
10,Char,Char,Char,Char,Char,Char,Char


In [8]:
function converter(x)
    try
        return parse(Int64, x)
    catch
        return if isa(x, String) x end
    end
end

converter (generic function with 1 method)

In [9]:
function modifier(x)
    if isa(x, Int)
        return 1
    elseif x in ["visa free", "visa on arrival"]
        return 1
    else
        return 0
    end
end

modifier (generic function with 1 method)

In [10]:
for country in passport_df.Passport
    passport_df[!, country] = converter.(passport_df[!, country])
end

In [11]:
passport_df

Unnamed: 0_level_0,Passport,Afghanistan,Albania,Algeria,Andorra,Angola
Unnamed: 0_level_1,String,Any,Any,Any,Any,Any
1,Afghanistan,-1,e-visa,visa required,visa required,visa on arrival
2,Albania,visa required,-1,visa required,90,visa on arrival
3,Algeria,visa required,e-visa,-1,visa required,visa on arrival
4,Andorra,visa required,90,visa required,-1,visa on arrival
5,Angola,visa required,e-visa,visa required,visa required,-1
6,Antigua and Barbuda,visa required,90,visa required,90,visa on arrival
7,Argentina,visa required,90,visa required,90,visa on arrival
8,Armenia,visa required,90,visa required,visa required,visa on arrival
9,Australia,visa required,90,visa required,90,visa on arrival
10,Austria,visa required,90,visa required,visa free,visa on arrival


In [12]:
for country in passport_df.Passport
    passport_df[!, country] = modifier.(passport_df[!, country])
end

In [13]:
DataFrames.head(passport_df, 20)

Unnamed: 0_level_0,Passport,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda
Unnamed: 0_level_1,String,Int64,Int64,Int64,Int64,Int64,Int64
1,Afghanistan,1,0,0,0,1,0
2,Albania,0,1,0,1,1,1
3,Algeria,0,0,1,0,1,0
4,Andorra,0,1,0,1,1,1
5,Angola,0,0,0,0,1,0
6,Antigua and Barbuda,0,1,0,1,1,1
7,Argentina,0,1,0,1,1,1
8,Armenia,0,1,0,0,1,1
9,Australia,0,1,0,1,1,1
10,Austria,0,1,0,1,1,1


The value of the cells above represent:
- 1 = no visa required for travel
- 0 = visa required for travel or can't travel to

## Modeling

Let **c** represent a country. Then **x<sub>c</sub>**, a binary variable is 1 if we select **c** and 0 otherwise.

In [14]:
using JuMP
import HiGHS

In [15]:
# Set of all countries
C = passport_df.Passport;

In [16]:
# Create a model and initialize the decision variables
model = Model(HiGHS.Optimizer)
range = 1:155
@variable(model, x[C[range]], Bin)
@objective(model, Min, sum(x))
@constraint(model, [d in C[range]], passport_df[range, d]' * x >= 1)
model

A JuMP Model
Minimization problem with:
Variables: 155
Objective function type: AffExpr
`AffExpr`-in-`MathOptInterface.GreaterThan{Float64}`: 155 constraints
`VariableRef`-in-`MathOptInterface.ZeroOne`: 155 constraints
Model mode: AUTOMATIC
CachingOptimizer state: EMPTY_OPTIMIZER
Solver name: HiGHS
Names registered in the model: x

In [None]:
# Now optimize
optimize!(model)

In [None]:
solution_summary(model)

In [None]:
# Solution
println("Minimum number of passports needed: ", objective_value(model))

In [None]:
println("Optimal passports:")
for c in C[range]
    if value(x[c]) > 0.5
        println(" * ", c)
    end
end