In [1]:
using JuMP, Gurobi, CSV, Random, StatsBase, LinearAlgebra, Distributions, DataFrames, Plots

In [30]:
# Read data
train = CSV.read("../data/3. Housing/housing_train.csv")
test = CSV.read("../data/3. Housing/housing_test.csv")

#From DataFrames to Matrices
#input data
X = convert(Matrix, train[:,1:13])
X_test = convert(Matrix, test[:,1:13])

#demand
Y = train[:, 14]
Y_test = test[:,14]

n, p = size(X)

(354, 13)

## Regularized Least Squares

In [31]:
function regularized_least_squares(X, Y, λ)
    n,p = size(X)
    model_rls = Model(solver=GurobiSolver(OutputFlag = 0))
    @variable(model_rls, w[1:p])
    @objective(
        model_rls,
        Min,
        sum((Y[i] - dot(w, X[i,:]))^2 for i=1:n) + λ*w'w
    )
    solve(model_rls)
    w_opt_rls = getvalue(w)
    opt_obj_rls = getobjectivevalue(model_rls)
    return w_opt_rls, opt_obj_rls
end

regularized_least_squares (generic function with 1 method)

## Stable Regression

In [32]:
function stable_reg_l2(X, Y, λ, k) 
    # k: number of points in training, K: number of classes, λ: Regularization coeff
    n,p = size(X)
    model_stable_reg = Model(solver=GurobiSolver(OutputFlag = 0))
    @variable(model_stable_reg, w[1:p])
    @variable(model_stable_reg, θ)
    @variable(model_stable_reg, u[1:n] >= 0)
    
    @objective(
        model_stable_reg, 
        Min, 
        λ*w'w + (k*θ + sum(u[i] for i=1:n))
    )
    
    @constraint(
        model_stable_reg, 
        [i=1:n], 
        u[i] + θ >= -(Y[i]-w'X[i,:])
    )
    @constraint(
        model_stable_reg, 
        [i=1:n], 
        u[i] + θ >= (Y[i]-w'X[i,:])
    )
    solve(model_stable_reg)
    w_opt = getvalue(w)
    u_opt = getvalue(u)
    θ_opt = getvalue(θ)
    opt_obj = getobjectivevalue(model_stable_reg)
    
    return w_opt, u_opt, θ_opt, opt_obj
end

stable_reg_l2 (generic function with 1 method)

## EVaR Regression Regularized
$$
\min_{s, w} \lambda ||w||^2 + s \log \left(\frac{1}{n\alpha} \sum_{i=1}^n \exp \left(\frac{(Y_i - w^\top X_i)^2}{s}\right) \right)
$$

### $\ell_2$ EVaR

In [33]:
function objective_ℓ2(X,Y,s,w,α, λ)
    """
    α: fraction of data that constitutes the training set ∈ [0,1]
    """
    n = size(X)[1]
    return λ*dot(w, w) + s*log(sum(exp(1/s*(dot(w,X[i,:])-Y[i])^2) for i=1:n)/(n*α))
end

function ∇sobjective_ℓ2(X,Y,s,w,α,λ) # Not affected bY λ but still putting it for function homogeneity issue
    n = size(X)[1]
    return log(sum(exp(1/s*(dot(w,X[i,:])-Y[i])^2) for i=1:n)/(n*α)) - sum(((dot(w,X[i,:])-Y[i])^2)*exp(1/s*(dot(w,X[i,:])-Y[i])^2) for i=1:n)/sum(exp(1/s*(dot(w,X[i,:])-Y[i])^2) for i=1:n)/s
end

function ∇wobjective_ℓ2(X,Y,s,w,α,λ)
    n = size(X)[1]
    return 2*λ*w + 2*sum(((dot(w,X[i,:])-Y[i]))*exp(1/s*(dot(w,X[i,:])-Y[i])^2)*X[i,:] for i=1:n)/sum(exp(1/s*(dot(w,X[i,:])-Y[i])^2) for i=1:n)
end

∇wobjective_ℓ2 (generic function with 1 method)

In [34]:
function gradient_descent_EVaR_ℓ2(X, Y, s_0, w_0, α, λ, c_s, c_w, ε)
    """
    c_s: constant learning rate associated to the ∇ w.r.t s
    c_w: constant learning rate associated to the ∇ w.r.t w
    ε: 
    """
    #println(objective_ℓ2(X,Y,s_0,w_0,α,λ))
    list_f = [objective_ℓ2(X,Y,s_0,w_0,α,λ)]
    s=s_0; w=w_0
    n_grad = dot(∇wobjective_ℓ2(X,Y,s,w,α,λ), ∇wobjective_ℓ2(X,Y,s,w,α,λ)) + dot(∇sobjective_ℓ2(X,Y,s,w,α,λ),∇sobjective_ℓ2(X,Y,s,w,α,λ))
    k = 1
    while n_grad > ε
        s = s - c_s*∇sobjective_ℓ2(X,Y,s,w,α,λ)
        s = max(0.000000001,s)
        w = w - c_w*∇wobjective_ℓ2(X,Y,s,w,α,λ)
        n_grad = dot(∇wobjective_ℓ2(X,Y,s,w,α,λ), ∇wobjective_ℓ2(X,Y,s,w,α,λ)) + dot(∇sobjective_ℓ2(X,Y,s,w,α,λ),∇sobjective_ℓ2(X,Y,s,w,α,λ))
        push!(list_f, objective_ℓ2(X,Y,s,w,α,λ))
        if k == 30000
            break
        end
        k += 1
    end
    list_f, s, w
end

gradient_descent_EVaR_ℓ2 (generic function with 1 method)

In [35]:
# To be implemented
function nesterov_gradient_descent_EVaR_ℓ2()
    
end

nesterov_gradient_descent_EVaR_ℓ2 (generic function with 1 method)

### $\ell_1$ EVaR

In [36]:
function objective_ℓ1(X,Y,s,w,α, λ)
    """
    α: fraction of data that constitutes the training set ∈ [0,1]
    """
    n = size(X)[1]
    return λ*dot(w, w) + s*log(sum(exp(1/s*abs(dot(w,X[i,:])-Y[i])) for i=1:n)/(n*α))
end

function ∇sobjective_ℓ1(X,Y,s,w,α,λ) # Not affected bY λ but still putting it for function homogeneity issue
    n = size(X)[1]
    return log(sum(exp(1/s*abs(dot(w,X[i,:])-Y[i])) for i=1:n)/(n*α)) - sum(abs(dot(w,X[i,:])-Y[i])*exp(1/s*(abs(dot(w,X[i,:])-Y[i]))) for i=1:n)/sum(exp(1/s*abs(dot(w,X[i,:])-Y[i])) for i=1:n)/s
end

function ∇wobjective_ℓ1(X,Y,s,w,α,λ)
    n = size(X)[1]
    return 2*λ*w + sum(sign.((dot(w,X[i,:])-Y[i]))*exp(1/s*(abs(dot(w,X[i,:])-Y[i])))*X[i,:] for i=1:n)/sum(exp(1/s*(abs(dot(w,X[i,:])-Y[i]))) for i=1:n)
end

∇wobjective_ℓ1 (generic function with 1 method)

In [37]:
function gradient_descent_EVaR_ℓ1(X, Y, s_0, w_0, α, λ, c_s, c_w, ε)
    """
    c_s: constant learning rate associated to the ∇ w.r.t s
    c_w: constant learning rate associated to the ∇ w.r.t w
    ε: 
    """
    #println(objective_ℓ1(X,Y,s_0,w_0,α,λ))
    list_f = [objective_ℓ1(X,Y,s_0,w_0,α,λ)]
    s=s_0; w=w_0
    n_grad = dot(∇wobjective_ℓ1(X,Y,s,w,α,λ), ∇wobjective_ℓ1(X,Y,s,w,α,λ)) + dot(∇sobjective_ℓ1(X,Y,s,w,α,λ),∇sobjective_ℓ1(X,Y,s,w,α,λ))
    k=1
    while n_grad > ε
        s = s - c_s*∇sobjective_ℓ1(X,Y,s,w,α,λ)
        s = max(0.000000001,s)
        w = w - c_w*∇wobjective_ℓ1(X,Y,s,w,α,λ)
        n_grad = dot(∇wobjective_ℓ1(X,Y,s,w,α,λ), ∇wobjective_ℓ1(X,Y,s,w,α,λ)) + dot(∇sobjective_ℓ1(X,Y,s,w,α,λ),∇sobjective_ℓ1(X,Y,s,w,α,λ))
        push!(list_f, objective_ℓ1(X,Y,s,w,α,λ))
        if k == 30000
            break
        end
        k += 1
    end
    list_f, s, w
end

gradient_descent_EVaR_ℓ1 (generic function with 1 method)

In [38]:
# To be implemented
function nesterov_gradient_descent_EVaR_ℓ1()
    
end

nesterov_gradient_descent_EVaR_ℓ1 (generic function with 1 method)

## Validation & Hyperparameter tuning Functions

In [39]:
function fit_least_squares(X_i, Y_i, w_opt)
    return (Y_i - dot(w_opt, X_i))^2
end

function fit_least_absolute_values(X_i, Y_i, w_opt)
    return abs(Y_i - dot(w_opt, X_i))
end

function calculate_MSE(X, Y, w_opt)
    return mean((X*w_opt-Y).^2)
end

function calculate_MAE(X, Y, w_opt)
    return mean(broadcast(abs, X*w_opt-Y))
end

calculate_MAE (generic function with 1 method)

In [40]:
function get_training_and_validation_indices(X, Y, w_opt, train_is_worse, k)
    """
    train_is_worse: boolean, true if the training set contains the worst errors, false if it's the validation
    k: number of observations in the training set
    """
    n,p = size(X)
    least_squares_df = DataFrame()
    least_squares_df.Obs_Index = 1:n
    least_squares_df.LS_Value = [fit_least_squares(X[i,:], Y[i], w_opt) for i=1:n]
    least_squares_sorted = sort!(least_squares_df, :LS_Value, rev=true)
    if train_is_worse == true
        train_indices = least_squares_sorted[1:k, :Obs_Index]
        val_indices = least_squares_sorted[k+1:n, :Obs_Index]
    else
        val_indices = least_squares_sorted[1:k, :Obs_Index]
        train_indices = least_squares_sorted[k+1:n, :Obs_Index]
    end
    
    return train_indices, val_indices
end

get_training_and_validation_indices (generic function with 1 method)

## Performing the Regressions (Housing Dataset) - 70/30 Train/Val splits

### Finding best RLS - Random Train/Val Split

In [44]:
(X_train_RLS, Y_train_RLS), (X_val_RLS, Y_val_RLS) = IAI.split_data(
    :classification, convert(DataFrame, X), Y, train_proportion=0.50, seed=7
)
(X_train_RLS, X_val_RLS) = (convert(Matrix, X_train_RLS), convert(Matrix, X_val_RLS))
println("Obtained 70/30 train/val random split for RLS")
best_λ_RLS = 0
best_MSE_RLS = 1000000
for λ_RLS in [0, 0.01, 0.1, 0.5, 1, 5, 10, 20, 30, 40, 50, 60, 65, 70, 100, 500, 1000]
    w_opt_RLS, obj_opt_RLS = regularized_least_squares(X_train_RLS, Y_train_RLS, λ_RLS)
    MSE_RLS = calculate_MSE(X_val_RLS, Y_val_RLS, w_opt_RLS)
    println("λ=", λ_RLS, "\t MSE=", MSE_RLS)
    if MSE_RLS < best_MSE_RLS
        best_MSE_RLS = MSE_RLS
        best_λ_RLS = λ_RLS
    end
end
println("Best λ_RLS = ", best_λ_RLS)
w_opt_RLS, obj_opt_RLS = regularized_least_squares(
        vcat(X_train_RLS, X_val_RLS), 
        vcat(Y_train_RLS, Y_val_RLS), 
        best_λ_RLS
)

Obtained 70/30 train/val random split for RLS
Academic license - for non-commercial use only
λ=0.0	 MSE=0.3366626272045661
Academic license - for non-commercial use only
λ=0.01	 MSE=0.3366491298677666
Academic license - for non-commercial use only
λ=0.1	 MSE=0.3365297101205204
Academic license - for non-commercial use only
λ=0.5	 MSE=0.33604093190937495
Academic license - for non-commercial use only
λ=1.0	 MSE=0.33551534750013484
Academic license - for non-commercial use only
λ=5.0	 MSE=0.33351238545740275
Academic license - for non-commercial use only
λ=10.0	 MSE=0.33409683439503274
Academic license - for non-commercial use only
λ=20.0	 MSE=0.3402322857248903
Academic license - for non-commercial use only
λ=30.0	 MSE=0.3496109941058661
Academic license - for non-commercial use only
λ=40.0	 MSE=0.3604313883641149
Academic license - for non-commercial use only
λ=50.0	 MSE=0.3718550570800111
Academic license - for non-commercial use only
λ=60.0	 MSE=0.3834474851624372
Academic license - 

([-0.0874847635742329, 0.11584669107976685, -0.04320872279280865, 0.11899830175790778, -0.19000516655162503, 0.26686549239629803, 0.006943883753696587, -0.30021961115213497, 0.2578951074213603, -0.1619402439619094, -0.20852863268689673, 0.0763891540178019, -0.43795154700687033], 91.30975972428882)

### Finding best Stable Regression - Optimal Train/Val Split

In [45]:
k = floor(Int, 0.5*size(X, 1))
best_λ_SR = 0
best_MSE_SR = 1000000
for λ_SR in [0, 0.01, 0.1, 0.2, 1, 10, 20, 50, 100]
    w_opt_SR, u_opt_SR, θ_opt_SR, opt_obj_SR = stable_reg_l2(X, Y, λ_SR, k)
    train_indices_SR, val_indices_SR = get_training_and_validation_indices(
        X, Y, w_opt_SR, true, k
    )
    X_train_SR, Y_train_SR = X[train_indices_SR, :], Y[train_indices_SR]
    X_val_SR, Y_val_SR = X[val_indices_SR, :], Y[val_indices_SR]
    MSE_SR = calculate_MSE(X_val_SR, Y_val_SR, w_opt_SR)
    println("λ=", λ_SR, "\t MSE=", MSE_SR)
    if MSE_SR < best_MSE_SR
        best_λ_SR = λ_SR
        best_MSE_SR = MSE_SR
    end
end
println("Best λ_SR = ", best_λ_SR)

# Fitting Stable regression with best λ
w_opt_SR, u_opt_SR, θ_opt_SR, opt_obj_SR = stable_reg_l2(X, Y, best_λ_SR, k)
train_indices_SR, val_indices_SR = get_training_and_validation_indices(
    X, Y, w_opt_SR, true, k
)
X_train_SR, Y_train_SR = X[train_indices_SR, :], Y[train_indices_SR]
X_val_SR, Y_val_SR = X[val_indices_SR, :], Y[val_indices_SR]
println("Fitted Stable Regression with best possible λ_SR=", best_λ_SR)

Academic license - for non-commercial use only
λ=0.0	 MSE=0.018811690494316226
Academic license - for non-commercial use only
λ=0.01	 MSE=0.018811690492093015
Academic license - for non-commercial use only
λ=0.1	 MSE=0.018811690487779583
Academic license - for non-commercial use only
λ=0.2	 MSE=0.018811690488389415
Academic license - for non-commercial use only
λ=1.0	 MSE=0.019116407403981545
Academic license - for non-commercial use only
λ=10.0	 MSE=0.019330328131172348
Academic license - for non-commercial use only
λ=20.0	 MSE=0.019245244215488788
Academic license - for non-commercial use only
λ=50.0	 MSE=0.020372933968512214
Academic license - for non-commercial use only
λ=100.0	 MSE=0.024665537846142786
Best λ_SR = 0.1
Academic license - for non-commercial use only
Fitted Stable Regression with best possible λ_SR=0.1


In [46]:
println("########## The training scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_train_RLS, Y_train_RLS, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_train_SR, Y_train_SR, w_opt_SR))

println("########## The validation scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_val_RLS, Y_val_RLS, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_val_SR, Y_val_SR, w_opt_SR))

println("########## The test scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_test, Y_test, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_test, Y_test, w_opt_SR))

########## The training scores are: ##########
The MSE for the Regularized Least Squares is : 0.25741454132357167
The MSE for the Stable Regression is : 0.5226158756772503
########## The validation scores are: ##########
The MSE for the Regularized Least Squares is : 0.2451690579836505
The MSE for the Stable Regression is : 0.018811690487779583
########## The test scores are: ##########
The MSE for the Regularized Least Squares is : 0.3370607729413801
The MSE for the Stable Regression is : 0.31936287842130967


### Finding best EVaRegression $\ell_2$ with random warm start

In [15]:
n, p = size(X);   k = floor(Int, 0.7*n);   α = k/n;   ε=0.001
# Initial values, we use RLS weights as warm start
w_0 = ones(p)./100
s_0 = 0.9


best_λ_EVaR = 0
best_c_s_EVaR = 0
best_c_w_EVaR = 0
best_MSE_EVaR = 1000000
for λ_EVaR in [0, 0.01, 0.1, 0.2, 1, 10, 20, 50]
    for c_s_EVaR in [0.0001, 0.001, 0.01, 0.1]
        for c_w_EVaR in [0.0001, 0.001, 0.01, 0.1]
            list_f_opt, s_opt, w_opt_EVaR = gradient_descent_EVaR_ℓ2(X,Y,s_0,w_0,α,λ_EVaR,c_s_EVaR,c_w_EVaR,ε)
            train_indices_EVaR, val_indices_EVaR = get_training_and_validation_indices(
                X, Y, w_opt_EVaR, true, k
            )
            X_train_EVaR, Y_train_EVaR = X[train_indices_EVaR, :], Y[train_indices_EVaR]
            X_val_EVaR, Y_val_EVaR = X[val_indices_EVaR, :], Y[val_indices_EVaR]
            MSE_EVaR = calculate_MSE(X_val_EVaR, Y_val_EVaR, w_opt_EVaR)
            if MSE_EVaR < best_MSE_EVaR
                best_λ_EVaR = λ_EVaR
                best_c_s_EVaR = c_s_EVaR
                best_c_w_EVaR = c_w_EVaR
                best_MSE_EVaR = MSE_EVaR
                println("λ=", λ_EVaR, "\t c_s_EVaR=", c_s_EVaR, "\t c_w_EVaR=", c_w_EVaR, "\t MSE=", MSE_EVaR)
            end
        end
    end
end
println("Best λ_EVaR = ", best_λ_EVaR, "\t c_s_EVaR=", best_c_s_EVaR, "\t c_w_EVaR=", best_c_w_EVaR)

# Fitting Stable regression with best λ
list_f_opt, s_opt, w_opt_EVaR = gradient_descent_EVaR_ℓ2(
    X, Y, s_0, w_0, α, best_λ_EVaR, best_c_s_EVaR, best_c_w_EVaR, ε
)
train_indices_EVaR, val_indices_EVaR = get_training_and_validation_indices(
    X, Y, w_opt_EVaR, true, k
)
X_train_EVaR, Y_train_EVaR = X[train_indices_EVaR, :], Y[train_indices_EVaR]
X_val_EVaR, Y_val_EVaR = X[val_indices_EVaR, :], Y[val_indices_EVaR]
println("Fitted EVaRegression ℓ_2 with best possible λ_EVaR=", best_λ_EVaR, 
    "  c_s_EVaR=", best_c_s_EVaR, "  c_w_EVaR=", best_c_w_EVaR)

λ=0.0	 c_s_EVaR=0.0001	 c_w_EVaR=0.0001	 MSE=0.011353493879241089
λ=0.01	 c_s_EVaR=0.0001	 c_w_EVaR=0.0001	 MSE=0.011215496590247703
λ=0.1	 c_s_EVaR=0.0001	 c_w_EVaR=0.0001	 MSE=0.010552636038916046
λ=0.2	 c_s_EVaR=0.0001	 c_w_EVaR=0.0001	 MSE=0.010327368309117289
λ=0.2	 c_s_EVaR=0.01	 c_w_EVaR=0.01	 MSE=0.010319655031776632
λ=1.0	 c_s_EVaR=0.0001	 c_w_EVaR=0.0001	 MSE=0.008290100137572016
λ=1.0	 c_s_EVaR=0.001	 c_w_EVaR=0.0001	 MSE=0.00821687797294475
Best λ_EVaR = 1.0	 c_s_EVaR=0.001	 c_w_EVaR=0.0001
Fitted EVaRegression ℓ_2 with best possible λ_EVaR=1.0  c_s_EVaR=0.001  c_w_EVaR=0.0001


### Finding best EVaRegression  $ℓ_2$  with RLS weights warm start

In [16]:
n, p = size(X);   k = floor(Int, 0.7*n);   α = k/n;   ε=0.001
# Initial values, we use RLS weights as warm start
w_0 = w_opt_RLS
s_0 = 0.9


best_λ_EVaR_with_warmstart = 0
best_c_s_EVaR_with_warmstart = 0
best_c_w_EVaR_with_warmstart = 0
best_MSE_EVaR_with_warmstart = 1000000
for λ_EVaR_with_warmstart in [0, 0.01, 0.1, 0.2, 1, 10, 20, 50]
    for c_s_EVaR_with_warmstart in [0.0001, 0.001, 0.01, 0.1]
        for c_w_EVaR_with_warmstart in [0.0001, 0.001, 0.01, 0.1]
            list_f_opt, s_opt, w_opt_EVaR_with_warmstart = gradient_descent_EVaR_ℓ2(
                X,Y,s_0,w_0,α,λ_EVaR_with_warmstart,c_s_EVaR_with_warmstart,c_w_EVaR_with_warmstart,ε
            )
            train_indices_EVaR_with_warmstart, val_indices_EVaR_with_warmstart = get_training_and_validation_indices(
                X, Y, w_opt_EVaR_with_warmstart, true, k
            )
            X_train_EVaR_with_warmstart, Y_train_EVaR_with_warmstart = X[train_indices_EVaR_with_warmstart, :], Y[train_indices_EVaR_with_warmstart]
            X_val_EVaR_with_warmstart, Y_val_EVaR_with_warmstart = X[val_indices_EVaR_with_warmstart, :], Y[val_indices_EVaR_with_warmstart]
            MSE_EVaR_with_warmstart = calculate_MSE(X_val_EVaR_with_warmstart, Y_val_EVaR_with_warmstart, w_opt_EVaR_with_warmstart)
            if MSE_EVaR_with_warmstart < best_MSE_EVaR_with_warmstart
                best_λ_EVaR_with_warmstart = λ_EVaR_with_warmstart
                best_c_s_EVaR_with_warmstart = c_s_EVaR_with_warmstart
                best_c_w_EVaR_with_warmstart = c_w_EVaR_with_warmstart
                best_MSE_EVaR_with_warmstart = MSE_EVaR_with_warmstart
                println("λ=", λ_EVaR_with_warmstart, "\t c_s_EVaR_with_warmstart=", 
                    c_s_EVaR_with_warmstart, "\t c_w_EVaR_with_warmstart=", 
                    c_w_EVaR_with_warmstart, "\t MSE=", MSE_EVaR_with_warmstart)
            end
        end
    end
end
println("Best λ_EVaR_with_warmstart = ", best_λ_EVaR_with_warmstart, 
    "\t c_s_EVaR_with_warmstart=", best_c_s_EVaR_with_warmstart, 
    "\t c_w_EVaR_with_warmstart=", best_c_w_EVaR_with_warmstart)

# Fitting Stable regression with best λ
list_f_opt, s_opt, w_opt_EVaR_with_warmstart = gradient_descent_EVaR_ℓ2(
    X, Y, s_0, w_0, α, best_λ_EVaR_with_warmstart, 
    best_c_s_EVaR_with_warmstart, best_c_w_EVaR_with_warmstart, ε
)
train_indices_EVaR_with_warmstart, val_indices_EVaR_with_warmstart = get_training_and_validation_indices(
    X, Y, w_opt_EVaR_with_warmstart, true, k
)
X_train_EVaR_with_warmstart, Y_train_EVaR_with_warmstart = X[train_indices_EVaR_with_warmstart, :], Y[train_indices_EVaR_with_warmstart]
X_val_EVaR_with_warmstart, Y_val_EVaR_with_warmstart = X[val_indices_EVaR_with_warmstart, :], Y[val_indices_EVaR_with_warmstart]
println("Fitted EVaRegression ℓ_2 with best possible λ_EVaR_with_warmstart=", best_λ_EVaR_with_warmstart, 
    "  c_s_EVaR_with_warmstart=", best_c_s_EVaR_with_warmstart, 
    "  c_w_EVaR_with_warmstart=", best_c_w_EVaR_with_warmstart)

λ=0.0	 c_s_EVaR_with_warmstart=0.0001	 c_w_EVaR_with_warmstart=0.0001	 MSE=0.01197949356474254
λ=0.0	 c_s_EVaR_with_warmstart=0.001	 c_w_EVaR_with_warmstart=0.001	 MSE=0.011979471106725878
λ=0.0	 c_s_EVaR_with_warmstart=0.01	 c_w_EVaR_with_warmstart=0.01	 MSE=0.011978110786313476
λ=0.01	 c_s_EVaR_with_warmstart=0.0001	 c_w_EVaR_with_warmstart=0.0001	 MSE=0.011813477546025143
λ=0.01	 c_s_EVaR_with_warmstart=0.001	 c_w_EVaR_with_warmstart=0.001	 MSE=0.011813233762080884
λ=0.01	 c_s_EVaR_with_warmstart=0.01	 c_w_EVaR_with_warmstart=0.01	 MSE=0.011809258716127143
λ=0.1	 c_s_EVaR_with_warmstart=0.0001	 c_w_EVaR_with_warmstart=0.0001	 MSE=0.010714589149813844
λ=0.1	 c_s_EVaR_with_warmstart=0.001	 c_w_EVaR_with_warmstart=0.0001	 MSE=0.010680302288356704
λ=0.1	 c_s_EVaR_with_warmstart=0.01	 c_w_EVaR_with_warmstart=0.001	 MSE=0.01068018639445509
λ=0.1	 c_s_EVaR_with_warmstart=0.1	 c_w_EVaR_with_warmstart=0.01	 MSE=0.010676869821079621
λ=0.2	 c_s_EVaR_with_warmstart=0.0001	 c_w_EVaR_with_warmsta

### Finding best EVaRegression $\ell_1$ with random warm start

In [17]:
n, p = size(X);   k = floor(Int, 0.7*n);   α = k/n;   ε=0.001
# Initial values, we use RLS weights as warm start
w_0 = ones(p)./100
s_0 = 0.9


best_λ_EVaR_ℓ1 = 0
best_c_s_EVaR_ℓ1 = 0
best_c_w_EVaR_ℓ1 = 0
best_MSE_EVaR_ℓ1 = 1000000
for λ_EVaR_ℓ1 in [0, 0.01, 0.1, 0.2, 1, 10, 20, 50]
    for c_s_EVaR_ℓ1 in [0.0001, 0.001, 0.01, 0.1]
        for c_w_EVaR_ℓ1 in [0.0001, 0.001, 0.01, 0.1]
            list_f_opt, s_opt, w_opt_EVaR_ℓ1 = gradient_descent_EVaR_ℓ1(X,Y,s_0,w_0,α,λ_EVaR_ℓ1,c_s_EVaR_ℓ1,c_w_EVaR_ℓ1,ε)
            train_indices_EVaR_ℓ1, val_indices_EVaR_ℓ1 = get_training_and_validation_indices(
                X, Y, w_opt_EVaR_ℓ1, true, k
            )
            X_train_EVaR_ℓ1, Y_train_EVaR_ℓ1 = X[train_indices_EVaR_ℓ1, :], Y[train_indices_EVaR_ℓ1]
            X_val_EVaR_ℓ1, Y_val_EVaR_ℓ1 = X[val_indices_EVaR_ℓ1, :], Y[val_indices_EVaR_ℓ1]
            MSE_EVaR_ℓ1 = calculate_MSE(X_val_EVaR_ℓ1, Y_val_EVaR_ℓ1, w_opt_EVaR_ℓ1)
            if MSE_EVaR_ℓ1 < best_MSE_EVaR_ℓ1
                best_λ_EVaR_ℓ1 = λ_EVaR_ℓ1
                best_c_s_EVaR_ℓ1 = c_s_EVaR_ℓ1
                best_c_w_EVaR_ℓ1 = c_w_EVaR_ℓ1
                best_MSE_EVaR_ℓ1 = MSE_EVaR_ℓ1
                println("λ=", λ_EVaR_ℓ1, "\t c_s_EVaR_ℓ1=", c_s_EVaR_ℓ1, "\t c_w_EVaR_ℓ1=", c_w_EVaR_ℓ1, "\t MSE=", MSE_EVaR_ℓ1)
            else
                println(λ_EVaR_ℓ1, "    ",c_s_EVaR_ℓ1, "    ", c_w_EVaR_ℓ1)
            end
        end
    end
end
println("Best λ_EVaR_ℓ1 = ", best_λ_EVaR_ℓ1, "\t c_s_EVaR_ℓ1=", best_c_s_EVaR_ℓ1, "\t c_w_EVaR_ℓ1=", best_c_w_EVaR_ℓ1)

λ=0.0	 c_s_EVaR_ℓ1=0.0001	 c_w_EVaR_ℓ1=0.0001	 MSE=0.005524026983371302
0.0    0.0001    0.001
0.0    0.0001    0.01
0.0    0.0001    0.1
0.0    0.001    0.0001
0.0    0.001    0.001
0.0    0.001    0.01
0.0    0.001    0.1
0.0    0.01    0.0001
0.0    0.01    0.001
0.0    0.01    0.01
0.0    0.01    0.1
0.0    0.1    0.0001
0.0    0.1    0.001
0.0    0.1    0.01
0.0    0.1    0.1
0.01    0.0001    0.0001
0.01    0.0001    0.001
0.01    0.0001    0.01
0.01    0.0001    0.1
0.01    0.001    0.0001
0.01    0.001    0.001
0.01    0.001    0.01
0.01    0.001    0.1
0.01    0.01    0.0001
0.01    0.01    0.001
0.01    0.01    0.01
0.01    0.01    0.1
0.01    0.1    0.0001
0.01    0.1    0.001
0.01    0.1    0.01
0.01    0.1    0.1
0.1    0.0001    0.0001
0.1    0.0001    0.001
0.1    0.0001    0.01
0.1    0.0001    0.1
0.1    0.001    0.0001
0.1    0.001    0.001
0.1    0.001    0.01
0.1    0.001    0.1
0.1    0.01    0.0001
0.1    0.01    0.001
0.1    0.01    0.01
0.1    0.01    0.1
0.1   

In [18]:
# Fitting Stable regression with best λ
list_f_opt, s_opt, w_opt_EVaR_ℓ1 = gradient_descent_EVaR_ℓ1(
    X, Y, s_0, w_0, α, best_λ_EVaR_ℓ1, best_c_s_EVaR_ℓ1, best_c_w_EVaR_ℓ1, ε
)
train_indices_EVaR_ℓ1, val_indices_EVaR_ℓ1 = get_training_and_validation_indices(
    X, Y, w_opt_EVaR_ℓ1, true, k
)
X_train_EVaR_ℓ1, Y_train_EVaR_ℓ1 = X[train_indices_EVaR_ℓ1, :], Y[train_indices_EVaR_ℓ1]
X_val_EVaR_ℓ1, Y_val_EVaR_ℓ1 = X[val_indices_EVaR_ℓ1, :], Y[val_indices_EVaR_ℓ1]
println("Fitted EVaR_ℓ1egression ℓ_1 with best possible λ_EVaR_ℓ1=", best_λ_EVaR_ℓ1, 
    "  c_s_EVaR_ℓ1=", best_c_s_EVaR_ℓ1, "  c_w_EVaR_ℓ1=", best_c_w_EVaR_ℓ1)

Fitted EVaR_ℓ1egression ℓ_1 with best possible λ_EVaR_ℓ1=0.0  c_s_EVaR_ℓ1=0.0001  c_w_EVaR_ℓ1=0.0001


### Finding best EVaRegression  $ℓ_1$  with RLS weights warm start

In [19]:
n, p = size(X);   k = floor(Int, 0.7*n);   α = k/n;   ε=0.001
# Initial values, we use RLS weights as warm start
w_0 = w_opt_RLS
s_0 = 0.9


best_λ_EVaR_ℓ1_with_warmstart = 0
best_c_s_EVaR_ℓ1_with_warmstart = 0
best_c_w_EVaR_ℓ1_with_warmstart = 0
best_MSE_EVaR_ℓ1_with_warmstart = 1000000
for λ_EVaR_ℓ1_with_warmstart in [0, 0.01, 0.1, 0.2, 1, 10, 20, 50]
    for c_s_EVaR_ℓ1_with_warmstart in [0.0001, 0.001, 0.01, 0.1]
        for c_w_EVaR_ℓ1_with_warmstart in [0.0001, 0.001, 0.01, 0.1]
            list_f_opt, s_opt, w_opt_EVaR_ℓ1_with_warmstart = gradient_descent_EVaR_ℓ1(
                X,Y,s_0,w_0,α,λ_EVaR_ℓ1_with_warmstart,c_s_EVaR_ℓ1_with_warmstart,c_w_EVaR_ℓ1_with_warmstart,ε
            )
            train_indices_EVaR_ℓ1_with_warmstart, val_indices_EVaR_ℓ1_with_warmstart = get_training_and_validation_indices(
                X, Y, w_opt_EVaR_ℓ1_with_warmstart, true, k
            )
            X_train_EVaR_ℓ1_with_warmstart, Y_train_EVaR_ℓ1_with_warmstart = X[train_indices_EVaR_ℓ1_with_warmstart, :], Y[train_indices_EVaR_ℓ1_with_warmstart]
            X_val_EVaR_ℓ1_with_warmstart, Y_val_EVaR_ℓ1_with_warmstart = X[val_indices_EVaR_ℓ1_with_warmstart, :], Y[val_indices_EVaR_ℓ1_with_warmstart]
            MSE_EVaR_ℓ1_with_warmstart = calculate_MSE(X_val_EVaR_ℓ1_with_warmstart, Y_val_EVaR_ℓ1_with_warmstart, w_opt_EVaR_ℓ1_with_warmstart)
            if MSE_EVaR_ℓ1_with_warmstart < best_MSE_EVaR_ℓ1_with_warmstart
                best_λ_EVaR_ℓ1_with_warmstart = λ_EVaR_ℓ1_with_warmstart
                best_c_s_EVaR_ℓ1_with_warmstart = c_s_EVaR_ℓ1_with_warmstart
                best_c_w_EVaR_ℓ1_with_warmstart = c_w_EVaR_ℓ1_with_warmstart
                best_MSE_EVaR_ℓ1_with_warmstart = MSE_EVaR_ℓ1_with_warmstart
                println("λ=", λ_EVaR_ℓ1_with_warmstart, "\t c_s_EVaR_ℓ1_with_warmstart=", 
                    c_s_EVaR_ℓ1_with_warmstart, "\t c_w_EVaR_ℓ1_with_warmstart=", 
                    c_w_EVaR_ℓ1_with_warmstart, "\t MSE=", MSE_EVaR_ℓ1_with_warmstart)
            else
                println(λ_EVaR_ℓ1_with_warmstart, "    ", 
                    c_s_EVaR_ℓ1_with_warmstart, "    ", 
                    c_w_EVaR_ℓ1_with_warmstart)
            end
        end
    end
end
println("Best λ_EVaR_ℓ1_with_warmstart = ", best_λ_EVaR_ℓ1_with_warmstart, 
    "\t c_s_EVaR_ℓ1_with_warmstart=", best_c_s_EVaR_ℓ1_with_warmstart, 
    "\t c_w_EVaR_ℓ1_with_warmstart=", best_c_w_EVaR_ℓ1_with_warmstart)

λ=0.0	 c_s_EVaR_ℓ1_with_warmstart=0.0001	 c_w_EVaR_ℓ1_with_warmstart=0.0001	 MSE=0.005756717047838035
λ=0.0	 c_s_EVaR_ℓ1_with_warmstart=0.0001	 c_w_EVaR_ℓ1_with_warmstart=0.001	 MSE=0.005580877545561708
λ=0.0	 c_s_EVaR_ℓ1_with_warmstart=0.0001	 c_w_EVaR_ℓ1_with_warmstart=0.01	 MSE=0.005553410801608024
0.0    0.0001    0.1
0.0    0.001    0.0001
0.0    0.001    0.001
0.0    0.001    0.01
0.0    0.001    0.1
0.0    0.01    0.0001
0.0    0.01    0.001
0.0    0.01    0.01
0.0    0.01    0.1
0.0    0.1    0.0001
0.0    0.1    0.001
0.0    0.1    0.01
0.0    0.1    0.1
0.01    0.0001    0.0001
0.01    0.0001    0.001
0.01    0.0001    0.01
0.01    0.0001    0.1
0.01    0.001    0.0001
0.01    0.001    0.001
0.01    0.001    0.01
0.01    0.001    0.1
0.01    0.01    0.0001
0.01    0.01    0.001
0.01    0.01    0.01
0.01    0.01    0.1
0.01    0.1    0.0001
0.01    0.1    0.001
0.01    0.1    0.01
0.01    0.1    0.1
0.1    0.0001    0.0001
0.1    0.0001    0.001
0.1    0.0001    0.01
0.1    0.

In [20]:
# Fitting Stable regression with best λ
list_f_opt, s_opt, w_opt_EVaR_ℓ1_with_warmstart = gradient_descent_EVaR_ℓ1(
    X, Y, s_0, w_0, α, best_λ_EVaR_ℓ1_with_warmstart, 
    best_c_s_EVaR_ℓ1_with_warmstart, best_c_w_EVaR_ℓ1_with_warmstart, ε
)
train_indices_EVaR_ℓ1_with_warmstart, val_indices_EVaR_ℓ1_with_warmstart = get_training_and_validation_indices(
    X, Y, w_opt_EVaR_ℓ1_with_warmstart, true, k
)
X_train_EVaR_ℓ1_with_warmstart, Y_train_EVaR_ℓ1_with_warmstart = X[train_indices_EVaR_ℓ1_with_warmstart, :], Y[train_indices_EVaR_ℓ1_with_warmstart]
X_val_EVaR_ℓ1_with_warmstart, Y_val_EVaR_ℓ1_with_warmstart = X[val_indices_EVaR_ℓ1_with_warmstart, :], Y[val_indices_EVaR_ℓ1_with_warmstart]
println("Fitted EVaR_ℓ1egression ℓ_1 with best possible λ_EVaR_ℓ1_with_warmstart=", best_λ_EVaR_ℓ1_with_warmstart, 
    "  c_s_EVaR_ℓ1_with_warmstart=", best_c_s_EVaR_ℓ1_with_warmstart, 
    "  c_w_EVaR_ℓ1_with_warmstart=", best_c_w_EVaR_ℓ1_with_warmstart)

Fitted EVaR_ℓ1egression ℓ_1 with best possible λ_EVaR_ℓ1_with_warmstart=0.0  c_s_EVaR_ℓ1_with_warmstart=0.0001  c_w_EVaR_ℓ1_with_warmstart=0.01


### Performance Showcase train/validation/test

#### MSE

In [21]:
println("########## The training scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_train_RLS, Y_train_RLS, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_train_SR, Y_train_SR, w_opt_SR))
println("The MSE for the EVaR Regression is for ε = 0.001 : ", 
    calculate_MSE(X_train_EVaR, Y_train_EVaR, w_opt_EVaR))
println("The MSE for the EVaR Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MSE(X_train_EVaR_with_warmstart, Y_train_EVaR_with_warmstart, w_opt_EVaR_with_warmstart))
println("The MSE for the EVaR_ℓ1 Regression is for ε = 0.001 : ", 
    calculate_MSE(X_train_EVaR_ℓ1, Y_train_EVaR_ℓ1, w_opt_EVaR_ℓ1))
println("The MSE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MSE(X_train_EVaR_ℓ1_with_warmstart, Y_train_EVaR_ℓ1_with_warmstart, w_opt_EVaR_ℓ1_with_warmstart))

########## The training scores are: ##########
The MSE for the Regularized Least Squares is : 0.24939957072262187
The MSE for the Stable Regression is : 0.3891561632813787
The MSE for the EVaR Regression is for ε = 0.001 : 0.44587952890294147
The MSE for the EVaR Regression with RLS warm start is for ε = 0.001 : 0.4433744426143271
The MSE for the EVaR_ℓ1 Regression is for ε = 0.001 : 0.36965720683890785
The MSE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : 0.3706927110481816


In [22]:
println("########## The validation scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_val_RLS, Y_val_RLS, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_val_SR, Y_val_SR, w_opt_SR))
println("The MSE for the EVaR Regression is for ε = 0.001 : ", 
    calculate_MSE(X_val_EVaR, Y_val_EVaR, w_opt_EVaR))
println("The MSE for the EVaR Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MSE(X_val_EVaR_with_warmstart, Y_val_EVaR_with_warmstart, w_opt_EVaR_with_warmstart))
println("The MSE for the EVaR_ℓ1 Regression is for ε = 0.001 : ", 
    calculate_MSE(X_val_EVaR_ℓ1, Y_val_EVaR_ℓ1, w_opt_EVaR_ℓ1))
println("The MSE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MSE(X_val_EVaR_ℓ1_with_warmstart, Y_val_EVaR_ℓ1_with_warmstart, w_opt_EVaR_ℓ1_with_warmstart))

########## The validation scores are: ##########
The MSE for the Regularized Least Squares is : 0.24958703550063438
The MSE for the Stable Regression is : 0.0067152173017977455
The MSE for the EVaR Regression is for ε = 0.001 : 0.00821687797294475
The MSE for the EVaR Regression with RLS warm start is for ε = 0.001 : 0.008280372766621486
The MSE for the EVaR_ℓ1 Regression is for ε = 0.001 : 0.005524026983371302
The MSE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : 0.005553410801608024


In [23]:
println("########## The test scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_test, Y_test, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_test, Y_test, w_opt_SR))
println("The MSE for the EVaR Regression is for ε = 0.001 : ", 
    calculate_MSE(X_test, Y_test, w_opt_EVaR))
println("The MSE for the EVaR Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MSE(X_test, Y_test, w_opt_EVaR_with_warmstart))
println("The MSE for the EVaR_ℓ1 Regression is for ε = 0.001 : ", 
    calculate_MSE(X_test, Y_test, w_opt_EVaR_ℓ1))
println("The MSE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MSE(X_test, Y_test, w_opt_EVaR_ℓ1_with_warmstart))

########## The test scores are: ##########
The MSE for the Regularized Least Squares is : 0.336139105961579
The MSE for the Stable Regression is : 0.32602997802296224
The MSE for the EVaR Regression is for ε = 0.001 : 0.4473116642845203
The MSE for the EVaR Regression with RLS warm start is for ε = 0.001 : 0.44569005317815713
The MSE for the EVaR_ℓ1 Regression is for ε = 0.001 : 0.3701251291858531
The MSE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : 0.368397352039304


#### MAE

In [24]:
println("########## The training scores are: ##########")
println("The MAE for the Regularized Least Squares is : ", 
    calculate_MAE(X_train_RLS, Y_train_RLS, w_opt_RLS))
println("The MAE for the Stable Regression is : ", 
    calculate_MAE(X_train_SR, Y_train_SR, w_opt_SR))
println("The MAE for the EVaR Regression is for ε = 0.001 : ", 
    calculate_MAE(X_train_EVaR, Y_train_EVaR, w_opt_EVaR))
println("The MAE for the EVaR Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MAE(X_train_EVaR_with_warmstart, Y_train_EVaR_with_warmstart, w_opt_EVaR_with_warmstart))
println("The MAE for the EVaR_ℓ1 Regression is for ε = 0.001 : ", 
    calculate_MAE(X_train_EVaR_ℓ1, Y_train_EVaR_ℓ1, w_opt_EVaR_ℓ1))
println("The MAE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MAE(X_train_EVaR_ℓ1_with_warmstart, Y_train_EVaR_ℓ1_with_warmstart, w_opt_EVaR_ℓ1_with_warmstart))

########## The training scores are: ##########
The MAE for the Regularized Least Squares is : 0.3516200726221749
The MAE for the Stable Regression is : 0.45405267687334855
The MAE for the EVaR Regression is for ε = 0.001 : 0.5448673046523884
The MAE for the EVaR Regression with RLS warm start is for ε = 0.001 : 0.5439425559456769
The MAE for the EVaR_ℓ1 Regression is for ε = 0.001 : 0.49416648266580687
The MAE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : 0.5045190900273672


In [25]:
println("########## The validation scores are: ##########")
println("The MAE for the Regularized Least Squares is : ", 
    calculate_MAE(X_val_RLS, Y_val_RLS, w_opt_RLS))
println("The MAE for the Stable Regression is : ", 
    calculate_MAE(X_val_SR, Y_val_SR, w_opt_SR))
println("The MAE for the EVaR Regression is for ε = 0.001 : ", 
    calculate_MAE(X_val_EVaR, Y_val_EVaR, w_opt_EVaR))
println("The MAE for the EVaR Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MAE(X_val_EVaR_with_warmstart, Y_val_EVaR_with_warmstart, w_opt_EVaR_with_warmstart))
println("The MAE for the EVaR_ℓ1 Regression is for ε = 0.001 : ", 
    calculate_MAE(X_val_EVaR_ℓ1, Y_val_EVaR_ℓ1, w_opt_EVaR_ℓ1))
println("The MAE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MAE(X_val_EVaR_ℓ1_with_warmstart, Y_val_EVaR_ℓ1_with_warmstart, w_opt_EVaR_ℓ1_with_warmstart))

########## The validation scores are: ##########
The MAE for the Regularized Least Squares is : 0.34530884741499285
The MAE for the Stable Regression is : 0.07218983785787313
The MAE for the EVaR Regression is for ε = 0.001 : 0.07968070659186796
The MAE for the EVaR Regression with RLS warm start is for ε = 0.001 : 0.08003049406523387
The MAE for the EVaR_ℓ1 Regression is for ε = 0.001 : 0.0643100128769752
The MAE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : 0.06154147408153959


In [26]:
println("########## The test scores are (MAE): ##########")
println("The MAE for the Regularized Least Squares is : ", 
    calculate_MAE(X_test, Y_test, w_opt_RLS))
println("The MAE for the Stable Regression is : ", 
    calculate_MAE(X_test, Y_test, w_opt_SR))
println("The MAE for the EVaR Regression is for ε = 0.001 : ", 
    calculate_MAE(X_test, Y_test, w_opt_EVaR))
println("The MAE for the EVaR Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MAE(X_test, Y_test, w_opt_EVaR_with_warmstart))
println("The MAE for the EVaR_ℓ1 Regression is for ε = 0.001 : ", 
    calculate_MAE(X_test, Y_test, w_opt_EVaR_ℓ1))
println("The MAE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : ", 
    calculate_MAE(X_test, Y_test, w_opt_EVaR_ℓ1_with_warmstart))

########## The test scores are (MAE): ##########
The MAE for the Regularized Least Squares is : 0.4159548259149143
The MAE for the Stable Regression is : 0.398252535412881
The MAE for the EVaR Regression is for ε = 0.001 : 0.4767091032858155
The MAE for the EVaR Regression with RLS warm start is for ε = 0.001 : 0.4757064328478016
The MAE for the EVaR_ℓ1 Regression is for ε = 0.001 : 0.43315211313231244
The MAE for the EVaR_ℓ1 Regression with RLS warm start is for ε = 0.001 : 0.4431392613400392


In [183]:
println("########## The training scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_train_RLS, Y_train_RLS, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_train_SR, Y_train_SR, w_opt_SR))
println("The MSE for the EVaR Regression is for ε = 0.1 : ", 
    calculate_MSE(X_train_EVaR, Y_train_EVaR, w_opt_EVaR))
println("The MSE for the EVaR Regression with RLS warm start is for ε = 0.1 : ", 
    calculate_MSE(X_train_EVaR_with_warmstart, Y_train_EVaR_with_warmstart, w_opt_EVaR_with_warmstart))

The training scores are: 
The MSE for the Regularized Least Squares is : 0.28237743438596236
The MSE for the Stable Regression is : 0.38160426678251924
The MSE for the EVaR Regression is for ε = 0.1 : 0.47225361837674856
The MSE for the EVaR Regression with RLS warm start is for ε = 0.1 : 0.46733409736144577


In [185]:
println("########## The validation scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_val_RLS, Y_val_RLS, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_val_SR, Y_val_SR, w_opt_SR))
println("The MSE for the EVaR Regression is for ε = 0.1 : ", 
    calculate_MSE(X_val_EVaR, Y_val_EVaR, w_opt_EVaR))
println("The MSE for the EVaR Regression with RLS warm start is for ε = 0.1 : ", 
    calculate_MSE(X_val_EVaR_with_warmstart, Y_val_EVaR_with_warmstart, w_opt_EVaR_with_warmstart))

The validation scores are:
The MSE for the Regularized Least Squares is : 0.22807812578839193
The MSE for the Stable Regression is : 0.0068670814996991516
The MSE for the EVaR Regression is for ε = 0.1 : 0.009770436299351223
The MSE for the EVaR Regression with RLS warm start is for ε = 0.1 : 0.009786778984419287


In [186]:
println("########## The test scores are: ##########")
println("The MSE for the Regularized Least Squares is : ", 
    calculate_MSE(X_test, Y_test, w_opt_RLS))
println("The MSE for the Stable Regression is : ", 
    calculate_MSE(X_test, Y_test, w_opt_SR))
println("The MSE for the EVaR Regression is for ε = 0.1 : ", 
    calculate_MSE(X_test, Y_test, w_opt_EVaR))
println("The MSE for the EVaR Regression with RLS warm start is for ε = 0.1 : ", 
    calculate_MSE(X_test, Y_test, w_opt_EVaR_with_warmstart))

The test scores are:
The MSE for the Regularized Least Squares is : 0.40677205711878867
The MSE for the Stable Regression is : 0.3769294123159734
The MSE for the EVaR Regression is for ε = 0.1 : 0.6126187879218311
The MSE for the EVaR Regression with RLS warm start is for ε = 0.1 : 0.6016956516704023


## Performing the Regressions

In [83]:
#Stable Reg Params and regression
k = 100; λ=.1
w_opt, u_opt, θ_opt, opt_obj = stable_reg_l2_reg(X, Y, λ, k)

#EVaR Params and regression
α = 1 - k/n; c_s=0.001; c_w=0.0001; ε=0.1; w_0=ones(p)./100; s_0 = .9
list_f, s, w_EVaR = gradient_descent_EVaR_ℓ2(X,Y,s_0,w_0,α,c_s,c_w,ε)

c_s=0.0001; c_w=0.0001; w_0=w_EVaR; s_0 = s; ε = 0.01
list_f2, s2, w_EVaR2 = gradient_descent_EVaR_ℓ2(X,Y,s_0,w_0,α,c_s,c_w,ε)

Academic license - for non-commercial use only
55669.86224993145
202954.57523898082
465807.7944702865
571777.629694811
649372.3805206266
657711.9404003171
657724.9043249263
654487.4375204031
645880.5971714322
641509.4710914962
632510.6203716445
628185.3419095788
619651.4299285613
615487.7301913105
607460.3065848325
603457.3143498494
595898.6664486566
592044.7124320026
584915.7360894731
581199.3953057412
574465.8930382177
570876.731423585
564508.5028002661
561036.8191874583
555006.3864612373
551642.9345553475
545924.5636888716
542660.3421086599
537229.3672970065
534055.4713944199
528887.8646410621
525795.3849264857
520867.50880704256
517847.46124352125
513135.94838628784
510179.2235601897
505660.9344289479
502758.2546696834
498410.27440626704
495552.14999618987
491351.7942481315
488528.4716846908
484453.27959323645
481654.6762317032
477682.37545838073
474897.99561683345
471006.4290981776
468225.25672799477
464392.2636420062
461602.62584298797
457805.87003345083
454995.2638672927
451212.

0.5594157796479656
0.5573492228695406
0.5552931619563722
0.5532475364790868
0.5512122864050993
0.5491873520955671
0.547172674302416
0.5451681941653258
0.5431738532088413
0.5411895933394076
0.5392153568424732
0.5372510863796582
0.5352967249858696
0.5333522160664944
0.53141750339462
0.5294925311082607
0.5275772437075883
0.5256715860522359
0.5237755033585919
0.5218889411971385
0.5200118454897723
0.5181441625071955
0.516285838866337
0.5144368215277241
0.5125970577929518
0.5107664953021344
0.5089450820314052
0.5071327662904103
0.5053294967198249
0.5035352222889444
0.501749892293223
0.49997345635186524
0.4982058644054444
0.4964470667135589
0.4946970138524425
0.4929556567126775
0.49122294649685433
0.48949883471730654
0.48778327319386194
0.48607621405153684
0.4843776097183594
0.4826874129231576
0.4810055766933252
0.4793320543526885
0.4776667995193484
0.4760097661035332
0.47436090830548083
0.4727201806133559
0.4710875378011276
0.46946293492657115
0.46784632732914344
0.466237670628016
0.46463692

0.17736058037579702
0.17706699321673847
0.1767745409345319
0.17648321831609992
0.17619302017427238
0.17590394134766102
0.17561597670051882
0.17532912112258886
0.17504336952899135
0.17475871686007316
0.17447515808127778
0.1741926881830091
0.17391130218050022
0.17363099511368701
0.17335176204706867
0.17307359806957545
0.1727964982944535
0.1725204578591145
0.1722454719250266
0.17197153567757667
0.17169864432593887
0.17142679310297046
0.17115597726505427
0.17088619209200395
0.1706174328869182
0.1703496949760706
0.17008297370877992
0.16981726445729434
0.16955256261666185
0.169288863604623
0.16902616286147681
0.16876445584996255
0.16850373805516317
0.16824400498435263
0.16798525216691326
0.16772747515419126
0.16747066951939693
0.16721483085748873
0.166959954785051
0.16670603694019134
0.16645307298241124
0.16620105859251283
0.16594998947247586
0.16569986134534512
0.16545066995512256
0.16520241106666367
0.1649550804655558
0.1647086739580211
0.16446318737079724
0.164218616551045
0.1639749573662

0.11192598908918243
0.1118519176142622
0.11177803013127623
0.1117043259360646
0.11163080432753304
0.11155746460763544
0.11148430608136498
0.1114113280567325
0.11133852984476227
0.11126591075946902
0.11119347011785068
0.11112120723986722
0.11104912144843454
0.1109772120694074
0.1109054784315644
0.11083391986659659
0.11076253570909286
0.11069132529652799
0.11062028796924624
0.11054942307045103
0.11047872994619057
0.11040820794534484
0.1103378564196104
0.11026767472349279
0.11019766221428814
0.11012781825206927
0.11005814219968202
0.10998863342272043
0.10991929128952202
0.10985011517115331
0.10978110444139456
0.10971225847673204
0.10964357665634092
0.1095750583620747
0.1095067029784537
0.10943850989265255
0.10937047849448413
0.10930260817639523
0.10923489833344684
0.10916734836330441
0.10909995766622844
0.1090327256450599
0.10896565170520744
0.10889873525463867
0.10883197570386786
0.10876537246593954
0.10869892495642304
0.10863263259339839
0.10856649479744258
0.10850051099162286
0.1084346

([1.4786387208196003, 2.0320563576637283, 4.72523869171243, 12.31866173903713, 18.97919911802692, 17.660759956511438, 22.669259879669397, 17.921098216933053, 22.46384112254556, 17.60610490168276  …  0.5721430992455032, 0.5721323657290442, 0.5721216378986319, 0.5721109157440384, 0.5721001992550658, 0.5720894884215548, 0.5720787832333766, 0.5720680836804327, 0.5720573897526589, 0.5720467014400251], 1.3983071749485456, [0.049992270636727065, 0.04876532269536386, 0.0909640088380426, 0.09338610109724525, 0.06097162552894454, 0.06965974608673746, 0.07435580638727404, 0.006849242226453433, -0.004000789149839729, 0.001451913508163908  …  0.0052837076300908635, 0.008339922858016264, 0.03678101547109689, 0.0011553402787611038, 0.04882193061764839, -0.0006343845805752229, 0.011566862806944892, 0.006476098579456614, 0.012492317936925462, 0.009297300554850297])

In [104]:
println(" The training scores are : ")
println(" The RMSE for the EVaR Regression is for ε = 0.1 : ", sqrt(mean((X*w_EVaR-Y).^2)))
println(" The RMSE for the EVaR Regression is for ε = 0.01 : ", sqrt(mean((X*w_EVaR2-Y).^2)))
println(" The RMSE for the Stable Regression is : ", sqrt(mean((X*w_opt-Y).^2)))
println("\n The test scores are : ")
println(" The RMSE for the EVaR Regression is for ε = 0.1 : ", sqrt(mean((X_test*w_EVaR-Y_test).^2)))
println(" The RMSE for the EVaR Regression is for ε = 0.01 : ", sqrt(mean((X_test*w_EVaR2-Y_test).^2)))
println(" The RMSE for the Stable Regression is : ", sqrt(mean((X_test*w_opt-Y_test).^2)))

 The training scores are : 
 The RMSE for the EVaR Regression is for ϵ = 0.1 : 0.5341719564132272
 The RMSE for the EVaR Regression is for ϵ = 0.01 : 0.5062091960729718
 The RMSE for the Stable Regression is : 0.5181819613399516

 The test scores are : 
 The RMSE for the EVaR Regression is for ϵ = 0.1 : 0.540600241274734
 The RMSE for the EVaR Regression is for ϵ = 0.01 : 0.549493304579194
 The RMSE for the Stable Regression is : 0.5694343039816557


### Some observations

We can see below that the optimal vectors are quite different, we sometimes obtain regression coefficients with ≠ signs! However the MSE etc... Are quite close.

In [108]:
w_opt

42-element Array{Float64,1}:
  0.10207108061902605   
 -0.021066134651967227  
  0.015463953398501989  
 -0.008722611234819355  
  0.11526387636966555   
 -0.03960455868062739   
  0.20063829414568468   
  0.003577525539958179  
 -0.009101022984684905  
 -0.08939386826088667   
 -0.14103048284917014   
 -0.21210166390817953   
 -0.05831920108269209   
  ⋮                     
 -0.11110545171028952   
 -0.14015344067599445   
  0.2369004488529454    
  0.3764612544687055    
  0.33908034088253514   
  0.36874225544267947   
  0.3183121867082784    
  0.32907957452564957   
  0.45709321047677387   
  0.32989866464434553   
  0.41200196634720054   
 -1.4888701985398634e-10

In [107]:
w_EVaR2

42-element Array{Float64,1}:
  0.1702023147968051   
  0.017206104988115957 
  0.05522081449134421  
  0.09137378849723386  
  0.09667402740296399  
 -0.004231651263063173 
  0.1397252252215143   
  0.006227402978673826 
 -0.007961425194564878 
 -0.014369307409649656 
  0.06419308150310499  
  0.030636275833428157 
 -0.01030161594355138  
  ⋮                    
 -0.03459013400772386  
 -0.022672574338347958 
  0.012134617096319356 
 -0.005041273290073722 
  0.11936902466349952  
  0.051221627965438485 
  0.07499036327851458  
 -0.05410996064198774  
  0.03918873336538251  
 -0.010292303550626221 
  0.053872239261088556 
  0.0019082706771125773

## Craft

In [10]:
K = 10000 #iter_max
function cutting_planes(X,Y,α,ε)
    list_s=[.9]; list_w=[ones(p)./100]
    list_f = [objective_ℓ2(X,Y,list_s[1],list_w[1],α)]
    
    model = Model(solver=GurobiSolver())

        # Optimization variables
        @variable(model, s>=0.000001)
        @variable(model, t>=0)
        @variable(model, w[1:p])

        # objective_ℓ2
        @objective_ℓ2(model, Min, t)

        #we use the ε formulation because otherwise we do not obtain results
        @constraint(model, t >= objective_ℓ2(X,Y,list_s[1],list_w[1],α)+ ∇sobjective_ℓ2(X,Y,list_s[1],list_w[1],α)*(s-list_s[1]) + dot(∇wobjective_ℓ2(X,Y,list_s[1],list_w[1],α),w-list_w[1]))
        @constraint(model, [j=1:p], w[j] >= -1)
        @constraint(model, [j=1:p], 1 >= w[j])
        solve(model)
        push!(list_s, getvalue(s))
        push!(list_w, getvalue(w))
        t_opt = getvalue(t)
        println(getvalue(s))
        for k in 2:K
            push!(list_f, objective_ℓ2(X,Y,list_s[k],list_w[k],α))
            if t_opt + ε < list_f[k]
                println(list_f)
                @constraint(model, t >=objective_ℓ2(X,Y,list_s[k],list_w[k],α) + ∇sobjective_ℓ2(X,Y,list_s[k],list_w[k],α)*(s-list_s[k]) + dot(∇wobjective_ℓ2(X,Y,list_s[k],list_w[k],α),w-list_w[k]))
                solve(model)
                push!(list_s, getvalue(s))
                push!(list_w, getvalue(w))
                t_opt = getvalue(t)
            else break
            end
        end
            
    
end



cutting_planes (generic function with 1 method)

In [11]:
cutting_planes(X,Y,.95,0.0001)

Academic license - for non-commercial use only
Optimize a model with 87 rows, 45 columns and 131 nonzeros
Coefficient statistics:
  Matrix range     [2e-03, 1e+02]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e-06, 1e-06]
  RHS range        [1e+00, 6e+00]
Presolve removed 87 rows and 45 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    0.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 0 iterations and 0.00 seconds
Optimal objective  0.000000000e+00
85.5352368782963
[1.3250372588602282, 18061.681987688222]
Optimize a model with 88 rows, 45 columns and 165 nonzeros
Coefficient statistics:
  Matrix range     [2e-13, 2e+04]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e-06, 1e-06]
  RHS range        [1e+00, 2e+04]
         Consider reformulating model or setting NumericFocus parameter
         to avoid numerical issues.
Iteration    Objective       Primal Inf.    D

[1.3250372588602282, 18061.681987688222, 5251.37367731691, 2221.7996186785185, 1427.4566693411584, 552.7091906537727, 395.69072075713365, 426.66173619905277, 156.8388916901739, 91.38850161850175, 54.38881472530736, 439.28218765419535, 123.37633185770054, 56.89619496160457]
Optimize a model with 100 rows, 45 columns and 680 nonzeros
Coefficient statistics:
  Matrix range     [1e-13, 2e+04]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e-06, 1e-06]
  RHS range        [1e+00, 2e+04]
         Consider reformulating model or setting NumericFocus parameter
         to avoid numerical issues.
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    0.0000000e+00   7.112024e+00   0.000000e+00      0s
       1    0.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.00 seconds
Optimal objective  0.000000000e+00
[1.3250372588602282, 18061.681987688222, 5251.37367731691, 2221.7996186785185, 1427.4566693411584, 552.7091906537727, 395.69072075

[1.3250372588602282, 18061.681987688222, 5251.37367731691, 2221.7996186785185, 1427.4566693411584, 552.7091906537727, 395.69072075713365, 426.66173619905277, 156.8388916901739, 91.38850161850175, 54.38881472530736, 439.28218765419535, 123.37633185770054, 56.89619496160457, 40.354591497270064, 51.30020421975063, 27.470682201246007, 68.4981957861147, 29.137225584128096, 56.429660458023584, 20.371163746267527, 14.98391882658607, 92.9974325479721, 48.92366053817601, 50.0319725961191, 35.013179711249606, 55.66533727366833, 32.10459979485586, 26.14341295266092, 20.860264617668584, 35.80490922251698]
Optimize a model with 117 rows, 45 columns and 1445 nonzeros
Coefficient statistics:
  Matrix range     [1e-13, 2e+04]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e-06, 1e-06]
  RHS range        [1e+00, 2e+04]
         Consider reformulating model or setting NumericFocus parameter
         to avoid numerical issues.
Iteration    Objective       Primal Inf.    Dual Inf.      Time
      

[1.3250372588602282, 18061.681987688222, 5251.37367731691, 2221.7996186785185, 1427.4566693411584, 552.7091906537727, 395.69072075713365, 426.66173619905277, 156.8388916901739, 91.38850161850175, 54.38881472530736, 439.28218765419535, 123.37633185770054, 56.89619496160457, 40.354591497270064, 51.30020421975063, 27.470682201246007, 68.4981957861147, 29.137225584128096, 56.429660458023584, 20.371163746267527, 14.98391882658607, 92.9974325479721, 48.92366053817601, 50.0319725961191, 35.013179711249606, 55.66533727366833, 32.10459979485586, 26.14341295266092, 20.860264617668584, 35.80490922251698, 13.41941632526409, 43.499107658567105, 23.480262032716507, 22.25801761989144, 46.139841424302816, 24.28501513323152, 16.12307279159856, 11.05819257001516, 12.518065983363751, 8.796216091672004, 7.950001276326284]
Optimize a model with 128 rows, 45 columns and 1940 nonzeros
Coefficient statistics:
  Matrix range     [1e-13, 2e+04]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e-06, 1e-06]

Optimize a model with 134 rows, 45 columns and 2210 nonzeros
Coefficient statistics:
  Matrix range     [1e-13, 2e+04]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e-06, 1e-06]
  RHS range        [1e+00, 2e+04]
         Consider reformulating model or setting NumericFocus parameter
         to avoid numerical issues.
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    0.0000000e+00   1.505066e+00   0.000000e+00      0s
      12    0.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 12 iterations and 0.00 seconds
Optimal objective  0.000000000e+00
[1.3250372588602282, 18061.681987688222, 5251.37367731691, 2221.7996186785185, 1427.4566693411584, 552.7091906537727, 395.69072075713365, 426.66173619905277, 156.8388916901739, 91.38850161850175, 54.38881472530736, 439.28218765419535, 123.37633185770054, 56.89619496160457, 40.354591497270064, 51.30020421975063, 27.470682201246007, 68.4981957861147, 29.137225584128096, 56.429660458023584, 20.37116374

[1.3250372588602282, 18061.681987688222, 5251.37367731691, 2221.7996186785185, 1427.4566693411584, 552.7091906537727, 395.69072075713365, 426.66173619905277, 156.8388916901739, 91.38850161850175, 54.38881472530736, 439.28218765419535, 123.37633185770054, 56.89619496160457, 40.354591497270064, 51.30020421975063, 27.470682201246007, 68.4981957861147, 29.137225584128096, 56.429660458023584, 20.371163746267527, 14.98391882658607, 92.9974325479721, 48.92366053817601, 50.0319725961191, 35.013179711249606, 55.66533727366833, 32.10459979485586, 26.14341295266092, 20.860264617668584, 35.80490922251698, 13.41941632526409, 43.499107658567105, 23.480262032716507, 22.25801761989144, 46.139841424302816, 24.28501513323152, 16.12307279159856, 11.05819257001516, 12.518065983363751, 8.796216091672004, 7.950001276326284, 7.187076992301392, 10.394955532298345, 11.140295807174782, 9.927657108921931, 7.91151932388772, 6.020263506487884, 8.842414527998924, 10.483858164412354, 6.102830349608161, 30.1028468715

ErrorException: Invalid coefficient NaN on variable s

In [None]:
lis