In [1]:
using JuMP, Gurobi
using DataFrames, CSV
using LinearAlgebra
using Random

Helper Function to Compute MSE

In [1]:
function compute_mse(X, y, beta)
    n,p = size(X)
    return sum((y .- X*beta[1:p] .- beta[0]).^2)/n
end

compute_mse (generic function with 1 method)

Read in Data

In [3]:
Xtrain = CSV.read("stableX_train_and_valid.csv", DataFrame, header=0);
ytrain = CSV.read("stabley_train_and_valid.csv", DataFrame, header=0)[:,1];
Xtest = CSV.read("stableX_test.csv", DataFrame, header=0);
ytest = CSV.read("stabley_test.csv", DataFrame, header=0)[:,1];

## PART A

In [4]:
function oneThreeA(X,y,rho;solver_output=0)

    n,p = size(X)
    
    # Build model
    model = Model(Gurobi.Optimizer)
    set_optimizer_attribute(model, "OutputFlag", solver_output) 
    
    # Insert variables
    @variable(model,beta[i=0:p])
    @variable(model,a[j=0:p]>=0) #need greater than or equal to 0 here?
    @variable(model,z[k=1:n]>=0) #need greater than or equal to 0 here?
        
    #Insert constraints
    @constraint(model,[j=0:p], beta[j]<=a[j])
    @constraint(model,[j=0:p], -beta[j]<=a[j])
    @constraint(model,[k=1:n], y[k] - beta[0] - dot(beta[1:p],X[k,:])<=z[k])
    @constraint(model,[k=1:n], - y[k] + beta[0] + dot(beta[1:p],X[k,:])<=z[k])
    
    #Objective
    @objective(model,Min, sum(z[i] for i=1:n) + rho*sum(a[j] for j=0:p))
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(beta))
    
end

oneThreeA (generic function with 1 method)

## PART B

In [5]:
function regression_valid_cv(X, y, rho_vals, rseed; method=oneThreeA, split_at=0.7, solver_output=0, seed=1)
    n,p = size(X)
    split = convert(Int,floor(split_at*n)) #floor takes the integer part
    #To create train and validation data, we will define the indices of each data.
    Random.seed!(rseed)
    permuted_indices = randperm(n)
    train_indices, valid_indices = permuted_indices[1:split], permuted_indices[split+1:end]
    X_train, y_train = X[train_indices,:], y[train_indices]
    X_valid, y_valid = X[valid_indices,:], y[valid_indices]
    #we create an array to hold the results
    errors = zeros(length(rho_vals))
    for (i,rho) in enumerate(rho_vals)
        #get the beta coefficients from the Lasso or Ridge regression
        beta = method(X_train,y_train,rho,solver_output=solver_output)
        #compute the MSE with the optimal beta we just found
        errors[i] = compute_mse(Matrix(X_valid), y_valid, beta)
    end

    #get the best performing rho
    i_best = argmin(errors)
    beta_best = method(X,y,rho_vals[i_best])
    return beta_best, rho_vals[i_best], errors
end

regression_valid_cv (generic function with 1 method)

In [6]:
rho_vals = [0.01,0.03,0.08,0.1,0.3,0.8,1,3]
beta_3b_cv, rho_3b_cv, errors_3b = regression_valid_cv(Xtrain,ytrain,rho_vals,1;method=oneThreeA)

println("MSE: ", compute_mse(Matrix(Xtest),ytest,beta_3b_cv))
println("Cross-validated rho: ", rho_3b_cv)

Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
MSE: 5.024122859285738
Cross-validated rho: 3.0


In [7]:
rho_vals = [0.01,0.03,0.08,0.1,0.3,0.8,1,3]
iterations = 5

mses = zeros(iterations)
for i=1:iterations
    println("Iteration " * string(i) * ": ")
    beta_3b_cv, rho_3b_cv, errors_3b = regression_valid_cv(Xtrain,ytrain,rho_vals,i;method=oneThreeA)
    mse = compute_mse(Matrix(Xtest),ytest,beta_3b_cv)
    println("MSE: ", mse)
    mses[i] = mse
    println("Cross-validated rho: ", rho_3b_cv)
    println("")
    println("")
end

Iteration 1: 
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
MSE: 5.024122859285738
Cross-validated rho: 3.0


Iteration 2: 
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial 

In [8]:
i_best = argmin(mses)
i_worst = argmax(mses)
println("Best MSE: ", mses[i_best])
println("Worst MSE: ", mses[i_worst])

Best MSE: 4.887885853459811
Worst MSE: 5.024122859285738


## PART D

In [26]:
function oneThreeD(X,y,rho;split_at=0.7,solver_output=0)

    n,p = size(X)
    
    # Build model
    model = Model(Gurobi.Optimizer)
    set_optimizer_attribute(model, "OutputFlag", solver_output) 
    
    # Insert variables
    @variable(model,beta[i=0:p])
    @variable(model,theta)
    @variable(model,u[k=1:n]>=0)
    @variable(model,a[j=0:p]>=0)
        
    #Insert constraints
    @constraint(model,[i=1:n], theta + u[i] >= y[i] - beta[0] - dot(beta[1:p],X[i,:]))
    @constraint(model,[i=1:n], theta + u[i] >= -y[i] + beta[0] + dot(beta[1:p],X[i,:]))
    @constraint(model,[j=0:p], beta[j]<=a[j])
    @constraint(model,[j=0:p], -beta[j]<=a[j])
    
    k = convert(Int,floor(split_at*n))
    
    #Objective
    @objective(model,Min, k*theta + sum(u[i] for i=1:n) + rho*sum(a[i] for i=0:p))
    
    # Optimize
    optimize!(model)
    
    # Return estimated betas
    return (value.(u),value.(beta))
    
end 

oneThreeD (generic function with 1 method)

In [27]:
oneThreeD(Xtrain,ytrain,0.01)

Academic license - for non-commercial use only - expires 2022-08-19


([0.0, 3.3525737277298937, 0.24727056194576935, 0.0, 1.6112077929667317, 0.0, 2.9720330715336027, 0.0, 0.0, 0.0938708160189794  …  1.9931326130373566, 0.0, 0.33942941837271334, 0.0, 0.0, 0.39361816864914845, 0.05791865697885834, 0.0, 3.0547207892411907, 0.8239339025494821], 1-dimensional DenseAxisArray{Float64,1,...} with index sets:
    Dimension 1, 0:7
And data, a 8-element Vector{Float64}:
   2.4980882084886944
  -0.8393572875627823
  11.295205378642434
  17.19432390986091
   9.048004653278822
 -18.417638050389154
  -9.117122908694878
   5.481237986890524)

In [30]:
rho_vals = [0.01,0.03,0.08,0.1,0.3,0.8,1,3]
errors = zeros(length(rho_vals))
for (i,rho) in enumerate(rho_vals)
    (u,beta) = oneThreeD(Xtrain,ytrain,rho,solver_output=0)
    validation_indicies = (u .== 0)
    Xvalid = Xtrain[validation_indicies,:]
    yvalid = ytrain[validation_indicies,:]
    errors[i] = compute_mse(Matrix(Xvalid), yvalid, beta)
end

i_best = argmin(errors)
println("Best Rho Value: ", rho_vals[i_best])
println("Best MSE Value: ", errors[i_best])

Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Academic license - for non-commercial use only - expires 2022-08-19
Best Rho Value: 0.01
Best MSE Value: 0.1340072636642836


In [32]:
beta_final = oneThreeA(Xtrain,ytrain,0.01)

Academic license - for non-commercial use only - expires 2022-08-19


1-dimensional DenseAxisArray{Float64,1,...} with index sets:
    Dimension 1, 0:7
And data, a 8-element Vector{Float64}:
   2.4636518957528164
  -0.45787550200138105
  10.054386044591087
  18.88974340764618
   9.054858218924496
 -18.06448759078263
 -10.02468984890087
   5.778924433654609

In [20]:
compute_mse(Matrix(Xtest), ytest, beta_final)

4.890474182705654