# Robust Ridge and LASSO Regressions

In [1]:
# Import relevant libraries
using JuMP, Gurobi, CSV

In [2]:
# L1-norm robust linear regression
reg_l1 = function(X, Y, ρ) # ρ is the penalty parameter
        P = size(X,2)
        model = Model(solver=GurobiSolver(OutputFlag=0))
            @variable(model, t)
            @variable(model, a[1:P])
            @variable(model, β[1:P])
            @objective(model, Min, t + ρ * (sum(a[i] for i=1:P)))
            @constraint(model, norm(Y - X * β, 2) <= t)
            @constraint(model, β[i = 1:P] .<= a[i = 1:P])
            @constraint(model, -β[i = 1:P] .<= a[i = 1:P])
        solve(model);  
        return getvalue(β) 
end;

In [3]:
# L2-norm robust linear regression
reg_l2 = function(X, Y, ρ) # where ρ is the penalty parameter
        P = size(X,2)
        model = Model(solver=GurobiSolver(OutputFlag=0))
            @variable(model, t)
            @variable(model, s)
            # @variable(model, a[1:P])
            @variable(model, β[1:P])
            @objective(model, Min, t + s*ρ)
            @constraint(model, norm(β) <= s)
            @constraint(model, norm(Y - X * β) <= t)
            # @constraint(model, β[i = 1:P] .<= a[i = 1:P])
            # @constraint(model, -β[i = 1:P] .<= a[i = 1:P])
        solve(model)
        return getvalue(β)     
end;

## Housing Dataset

In [4]:
# Importing the Data
df = CSV.read("housing.csv", header=false)
df = convert(Array,df)
print("The dataset has dimension ", size(df))

The dataset has dimension (506, 14)

In [5]:
# Split into training, validation and test sets (50%/25%/25%)
n = size(df)[1]
val_start = round(Int, 0.5 * n) 
test_start = round(Int, 0.75 * n) 
Y = df[:,end]
X = df[:,1:end-1]
Y_train = Y[1:val_start-1,]
Y_val = Y[val_start:test_start-1,]
Y_test = Y[test_start:end,]
X_train = X[1:val_start-1, :]
X_val = X[val_start:test_start-1, :]
X_test = X[test_start:end, :]
println(size(Y_train), " ", size(Y_val), " ", size(Y_test))
println(size(X_train), " ", size(X_val), " ", size(X_test))

(252,) (127,) (127,)
(252, 13) (127, 13) (127, 13)


In [6]:
# Tuning ρ for L1 Regression
for ρ = [0.001, 0.01, 0.1, 1, 2]
    beta_hat_l1 = reg_l1(X_train, Y_train, ρ)
    val_error = norm(Y_val - X_val * beta_hat_l1, 2)
    println("When ρ is ", ρ,", val error is ", val_error)
    println()
end

Academic license - for non-commercial use only
When ρ is 0.001, val error is 94.41338707927206

Academic license - for non-commercial use only
When ρ is 0.01, val error is 93.3820765291332

Academic license - for non-commercial use only
When ρ is 0.1, val error is 86.77821787208819

Academic license - for non-commercial use only
When ρ is 1.0, val error is 89.75307324034216

Academic license - for non-commercial use only
When ρ is 2.0, val error is 91.33528392931979



In [7]:
# Therefore optimal ρ is 0.1
beta_hat_l1 = reg_l1([X_train;X_val], [Y_train;Y_val], 0.1)
error1 = norm(Y_test - X_test * beta_hat_l1, 2)
print("L1 Regression's score: ", error1)

Academic license - for non-commercial use only
L1 Regression's score: 89.46433726768113

In [8]:
# Tuning ρ for L2 Regression
for ρ = [0.001, 0.01, 0.1, 1, 2]
    beta_hat_l2 = reg_l2(X_train, Y_train, ρ)
    val_error = norm(Y_val - X_val * beta_hat_l2, 2)
    println("When ρ is ", ρ,", val error is ", val_error)
    println()
end

Academic license - for non-commercial use only
When ρ is 0.001, val error is 94.44685109513979

Academic license - for non-commercial use only
When ρ is 0.01, val error is 93.72958529755765

Academic license - for non-commercial use only
When ρ is 0.1, val error is 88.7744280677687

Academic license - for non-commercial use only
When ρ is 1.0, val error is 83.28692584141854

Academic license - for non-commercial use only
When ρ is 2.0, val error is 80.87886453711148



In [9]:
# Therefore optimal ρ is 2.0
beta_hat_l2 = reg_l2([X_train;X_val], [Y_train;Y_val], 2)
error2 = norm(Y_test - X_test * beta_hat_l2)
print("L2 Regression's score: ", error2)

Academic license - for non-commercial use only
L2 Regression's score: 83.59529329250292

In [10]:
# Comparison with Linear Regression (ρ = 0)
beta_hat = reg_l2([X_train;X_val], [Y_train;Y_val], 0)
print("Normal Regression's score: ", norm(Y_test - X_test * beta_hat, 2))

Academic license - for non-commercial use only
Normal Regression's score: 91.31811641049084

In [11]:
# Comparison with Baseline Model
Y_train_mean = mean([Y_train;Y_val])
print("Baseline Model's score: ", norm(Y_test - Y_train_mean))

Baseline Model's score: 129.2265350398843

### SUMMARY

In [12]:
println("L1 Regression's score: ", error1)
println("L2 Regression's score: ", error2)
println("Normal Regression's score: ", norm(Y_test - X_test * beta_hat, 2))
println("Baseline Model's score: ", norm(Y_test - Y_train_mean))

L1 Regression's score: 89.46433726768113
L2 Regression's score: 83.59529329250292
Normal Regression's score: 91.31811641049084
Baseline Model's score: 129.2265350398843


## Communities and Crime Dataset

In [13]:
# Importing the Data
df = CSV.read("communities-and-crime.csv", header=false)
df = convert(Array,df)
print("The dataset has dimension ", size(df))

The dataset has dimension (122, 123)

In [14]:
# Split into training, validation and test sets (50%/25%/25%)
n = size(df)[1]
val_start = round(Int, 0.5 * n) 
test_start = round(Int, 0.75 * n) 
Y = df[:,end]
X = df[:,1:end-1]
Y_train = Y[1:val_start-1,]
Y_val = Y[val_start:test_start-1,]
Y_test = Y[test_start:end,]
X_train = X[1:val_start-1, :]
X_val = X[val_start:test_start-1, :]
X_test = X[test_start:end, :]
println(size(Y_train), " ", size(Y_val), " ", size(Y_test))
println(size(X_train), " ", size(X_val), " ", size(X_test))

(60,) (31,) (31,)
(60, 122) (31, 122) (31, 122)


In [15]:
# Tuning ρ for L1 Regression
for ρ = [0.001, 0.01, 0.1, 1, 2]
    beta_hat_l1 = reg_l1(X_train, Y_train, ρ)
    val_error = norm(Y_val - X_val * beta_hat_l1, 2)
    println("When ρ is ", ρ,", val error is ", val_error)
    println()
end

Academic license - for non-commercial use only
When ρ is 0.001, val error is 1.743908092948006

Academic license - for non-commercial use only
When ρ is 0.01, val error is 1.7426339975897596

Academic license - for non-commercial use only
When ρ is 0.1, val error is 0.88641339286839

Academic license - for non-commercial use only
When ρ is 1.0, val error is 0.6699333542201944

Academic license - for non-commercial use only
When ρ is 2.0, val error is 0.878701524516093



In [16]:
# Therefore optimal ρ is 1.0
beta_hat_l1 = reg_l1([X_train;X_val], [Y_train;Y_val], 1.0)
error1 = norm(Y_test - X_test * beta_hat_l1, 2)
print("L1 Regression's score: ", error1)

Academic license - for non-commercial use only
L1 Regression's score: 0.7834136806858949

In [23]:
# Tuning ρ for L2 Regression
for ρ = [0.001, 0.01, 0.1, 1, 2]
    beta_hat_l2 = reg_l2(X_train, Y_train, ρ)
    val_error = norm(Y_val - X_val * beta_hat_l2, 2)
    println("When ρ is ", ρ,", val error is ", val_error)
    println()
end

Academic license - for non-commercial use only
When ρ is 0.001, val error is 1.712282147013243

Academic license - for non-commercial use only
When ρ is 0.01, val error is 1.7122821470096032

Academic license - for non-commercial use only
When ρ is 0.1, val error is 1.7122821123814254

Academic license - for non-commercial use only
When ρ is 1.0, val error is 0.8418771745970673

Academic license - for non-commercial use only
When ρ is 2.0, val error is 0.7060072335570459



In [18]:
# Therefore optimal ρ is 2.0
beta_hat_l2 = reg_l2([X_train;X_val], [Y_train;Y_val], 2.0)
error2 = norm(Y_test - X_test * beta_hat_l2, 2)
print("L2 Regression's score: ", error2)

Academic license - for non-commercial use only
L2 Regression's score: 0.7508218451811334

In [19]:
# Comparison with Linear Regression (ρ = 0)
beta_hat = reg_l2([X_train;X_val], [Y_train;Y_val], 0)
print("Normal Regression's score: ", norm(Y_test - X_test * beta_hat, 2))

Academic license - for non-commercial use only
Normal Regression's score: 2.526386031751493

In [20]:
# Comparison with Baseline Model
Y_train_mean = mean([Y_train;Y_val])
print("Baseline Model's score: ", norm(Y_test - Y_train_mean))

Baseline Model's score: 1.4589372704759014

### SUMMARY


In [21]:
println("L1 Regression's score: ", error1)
println("L2 Regression's score: ", error2)
println("Normal Regression's score: ", norm(Y_test - X_test * beta_hat, 2))
println("Baseline Model's score: ", norm(Y_test - Y_train_mean))

L1 Regression's score: 0.7834136806858949
L2 Regression's score: 0.7508218451811334
Normal Regression's score: 2.526386031751493
Baseline Model's score: 1.4589372704759014
