In [2]:
using JuMP 
using Gurobi 
using CSV 
using LinearAlgebra
using DataFrames
using Random

In [3]:
gurobi_env = Gurobi.Env()

Academic license - for non-commercial use only


Gurobi.Env(Ptr{Nothing} @0x00007fa4a01c0e00)

### Data Preparation

In [170]:
df = CSV.read("framingham.csv")
df = df[setdiff(names(df), [:education])]

y = df.TenYearCHD
X = Matrix(df[setdiff(names(df), [:TenYearCHD])])


│   caller = top-level scope at In[170]:2
└ @ Core In[170]:2
│   caller = top-level scope at In[170]:5
└ @ Core In[170]:5


3658×14 Array{Float64,2}:
 1.0  39.0  0.0   0.0  0.0  0.0  0.0  …  106.0   70.0  26.97  80.0   77.0
 0.0  46.0  0.0   0.0  0.0  0.0  0.0     121.0   81.0  28.73  95.0   76.0
 1.0  48.0  1.0  20.0  0.0  0.0  0.0     127.5   80.0  25.34  75.0   70.0
 0.0  61.0  1.0  30.0  0.0  0.0  1.0     150.0   95.0  28.58  65.0  103.0
 0.0  46.0  1.0  23.0  0.0  0.0  0.0     130.0   84.0  23.1   85.0   85.0
 0.0  43.0  0.0   0.0  0.0  0.0  1.0  …  180.0  110.0  30.3   77.0   99.0
 0.0  63.0  0.0   0.0  0.0  0.0  0.0     138.0   71.0  33.11  60.0   85.0
 0.0  45.0  1.0  20.0  0.0  0.0  0.0     100.0   71.0  21.68  79.0   78.0
 1.0  52.0  0.0   0.0  0.0  0.0  1.0     141.5   89.0  26.36  76.0   79.0
 1.0  43.0  1.0  30.0  0.0  0.0  1.0     162.0  107.0  23.61  93.0   88.0
 0.0  50.0  0.0   0.0  0.0  0.0  0.0  …  133.0   76.0  22.91  75.0   76.0
 0.0  43.0  0.0   0.0  0.0  0.0  0.0     131.0   88.0  27.64  72.0   61.0
 1.0  46.0  1.0  15.0  0.0  0.0  1.0     142.0   94.0  26.31  98.0   64.0
 ⋮          

In [172]:
y[y.<1].=-1

3101-element view(::Array{Int64,1}, [1, 2, 3, 5, 6, 8, 9, 10, 11, 12  …  3645, 3647, 3648, 3650, 3651, 3652, 3655, 3656, 3657, 3658]) with eltype Int64:
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
  ⋮
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1

In [174]:
function trainvalid_test_split(X, y, split_at=0.7)
    n,p = size(X)
    split = convert(Int,floor(split_at*n))
    permuted_indices = randperm(n)
    trainvalid_indices, test_indices = permuted_indices[1:split], permuted_indices[split+1:end]
    X_trainvalid, y_trainvalid = X[trainvalid_indices,:], y[trainvalid_indices]
    X_test, y_test = X[test_indices,:], y[test_indices]
    return X_trainvalid, X_test, y_trainvalid, y_test
end

trainvalid_test_split (generic function with 2 methods)

### Robust Logistic Regression

In [178]:
### Utils Functions ###
function compute_∇f(w_k, y, X, λ)
    n, p = size(X)
    temp = zeros(p)
    for i in 1:n
        t = min(exp(-y[i]*(transpose(w_k)*Array(X[i,:]))+λ*transpose(w_k)*w_k),1000000)
#         t = exp(-y[i]*(transpose(w_k)*Array(X[i,:]))+λ*transpose(w_k)*w_k)
        Δ = (1/(1+t))*t*(-y[i]*Array(X[i,:]) .+ 2*λ*w_k)
        temp = temp + Δ
    end
    ∇f_k = temp
    return ∇f_k
end

function rlr(y, X, ε, λ)
    errors = []
    n, p = size(X)
    w_0 = [0 for i in 1:p]
    f_0 = sum(log(1+exp(-y[i]*dot(X[i,:], w_0)+λ*transpose(w_0)*w_0)) for i=1:n)
    ∇f_0 = compute_∇f(w_0, y, X, λ)

    # Outer minimization problem
    outer_min_model = Model(solver=GurobiSolver(OutputFlag=0, gurobi_env))
    @variable(outer_min_model, t >= 0)
    @variable(outer_min_model, w[1:p])
    #@constraint(outer_min_model, [j=1:p], -1 <= w[j] <= 1)
    @constraint(outer_min_model, t >= f_0 + (dot(∇f_0, w)-dot(∇f_0, w_0)))
    @constraint(outer_min_model, [j=1:p], 10 >= w[j])
    @constraint(outer_min_model, [j=1:p], w[j] >= -10)
    @objective(outer_min_model, Min, t)
    k = 1 # Number of constraints in the final problem
    solve(outer_min_model)

    # New steps k
    t_k = getvalue(t)
    w_k = getvalue(w)  
    f_k = sum(min(log(1+exp(-y[i]*dot(X[i,:], w_k)+λ*transpose(w_k)*w_k)),100) for i=1:n)
    ∇f_k = compute_∇f(w_k, y, X, λ)
    
    while abs(f_k - t_k) >= ε # error

        push!(errors, f_k - t_k)
        @constraint(outer_min_model,t >= f_k +(dot(∇f_k, w)-dot(∇f_k, w_k)))
        k += 1
        solve(outer_min_model)
        # Updating all the values
        t_k = getvalue(t)
        w_k = getvalue(w)
        f_k = sum(min(log(1+exp(-y[i]*dot(X[i,:], w_k)+λ*transpose(w_k)*w_k)),10000) for i=1:n)

        ∇f_k = compute_∇f(w_k, y, X, λ)
         if k%100 == 0
             println("Number of constraints: ", k, "\t Error = ", abs(t_k - f_k))
#             println("f",f_k)
#             println("t",t_k)
#             println("∇f_k",∇f_k)
         end
        if k > 20000
            break
        end
    end
    push!(errors, f_k - t_k)
    return t_k, f_k, w_k, errors
end


rlr (generic function with 1 method)

### Stable Robust Logistic Regression

In [179]:
function classification_metrics(preds, actual)
    accuracy = sum(preds .== actual)/size(preds)[1]
    tpr = dot(
        (preds.==1),actual.==1
        )/(
        dot((preds.==1),actual.==1
            ) + dot(
            (preds.==-1),actual.==1)
    )
    fpr = dot(
        (preds.==1),actual.==-1
        )/ (
        dot((preds.==1),actual.==-1
            ) + dot(
            (preds.==-1),actual.==-1)
    )
    return accuracy, tpr, fpr
end

function compute_derivative(w, z, y, X, alpha)
    
    n, p = size(X)
    
    derivative = sum(
        (
            z[i]*(2*alpha.*w-y[i].*X[i,:])*exp(-y[i]*dot(w,X[i,:])+alpha.*dot(w,w))
            )/(
            exp(-y[i]*dot(w,X[i,:])+alpha.*dot(w,w))+1
            ) for i in 1:n
        )
    
    return derivative
    
end

function inner(w, y, X, k, alpha)
    
    n, p = size(X)
    
    model_inner = Model(solver=GurobiSolver(OutputFlag=0,gurobi_env))
    
    @variable(model_inner, z[1:n] >= 0)
    
    @constraint(model_inner, [i=1:n], 1 >= z[i])
    @constraint(model_inner, sum(z) <= k)
    
    @objective(model_inner,
        Max,
        sum(z[i]*log(1+exp(-y[i]*dot(X[i,:], w)+ alpha*dot(w,w))) for i=1:n)
    )
    
    solve(model_inner)
    
    optimal_z = getvalue(z)
    optimal_f = getobjectivevalue(model_inner)
    
    return optimal_z, optimal_f
    
end

function srlr(y, X, epsilon, k, alpha)
    deltas = []
    n, p = size(X)
    initialization_w = [0 for i in 1:p]
    initialization_z, initial_f = inner(initialization_w, y, X, k, alpha)
    initial_derivative_f = compute_derivative(initialization_w, initialization_z, y, X, alpha)
    
    model_outer = Model(solver=GurobiSolver(OutputFlag=0, gurobi_env))
    
    @variable(model_outer, t >= 0)
    @variable(model_outer, w[1:p])
    
    @constraint(
        model_outer, t >= initial_f + dot(initial_derivative_f, w)-dot(initial_derivative_f, initialization_w)
    )
    @constraint(model_outer, [j=1:p], 10 >= w[j])
    @constraint(model_outer, [j=1:p], w[j] >= -10)
    
    @objective(model_outer, Min, t)
    
    number_const = 1
    solve(model_outer)

    t_new = getvalue(t)
    w_new = getvalue(w)
    z_new, f_new = inner(w_new, y, X, k, alpha)

    derivative_f_new = compute_derivative(w_new, z_new, y, X, alpha)
    while abs(f_new - t_new) >= epsilon
        
        push!(deltas, f_new - t_new)
        
        @constraint(model_outer,t >= f_new + dot(derivative_f_new, w)-dot(derivative_f_new, w_new))
        
        number_const += 1
        solve(model_outer)
        t_new = getvalue(t)
        w_new = getvalue(w)
        z_new, f_new = inner(w_new, y, X, k, alpha)

        derivative_f_new = compute_derivative(w_new, z_new, y, X, alpha)
        
        if number_const%100 == 0
            println("Number of constraints: ", number_const, "\t Step delta = ", abs(t_new - f_new))
        end
        
        if number_const > 100000
            break
            
        end
    end
    push!(deltas, f_new - t_new)
    return t_new, f_new, w_new, z_new, deltas
end


srlr (generic function with 1 method)

#### Validation of SRLR

In [152]:
function classification_metrics(preds, actual)
    accuracy = sum(preds .== actual)/size(preds)[1]
    tpr = dot(
        (preds.==1),actual.==1
        )/(
        dot((preds.==1),actual.==1
            ) + dot(
            (preds.==-1),actual.==1)
    )
    fpr = dot(
        (preds.==1),actual.==-1
        )/ (
        dot((preds.==1),actual.==-1
            ) + dot(
            (preds.==-1),actual.==-1)
    )
    return accuracy, tpr, fpr
end


classification_metrics (generic function with 1 method)

In [153]:
function validation_srlr(X_trainvalid, y_trainvalid, alpha_values, epsilon = 0.001, persent_traindata = 0.8)
    for alpha in alpha_values
        (t_opt, f_opt, w_opt, z_opt, deltas) = srlr(y_trainvalid, X_trainvalid, epsilon, floor(Int, persent_traindata*size(X)[1]), alpha)
        
        train_index = z_opt.>0
        validation_index = z_opt.==0
        
        X_train = X_trainvalid[train_index,:]
        X_val = X_trainvalid[validation_index,:]
        y_train = y_trainvalid[train_index,:]
        y_val = y_trainvalid[validation_index,:]
        
        
    end
end

validation_srlr (generic function with 3 methods)

In [154]:
(t_opt, f_opt, w_opt, z_opt, deltas) = srlr(y, X, 0.0001, floor(Int, 0.7*size(X)[1]), 0.1)

Number of constraints: 100	 Step delta = 450.611077280118
Number of constraints: 200	 Step delta = 34.42976491763079
Number of constraints: 300	 Step delta = 12.731731705738866
Number of constraints: 400	 Step delta = 1.793293794982901
Number of constraints: 500	 Step delta = 0.8262476568622787
Number of constraints: 600	 Step delta = 0.42397979336601566
Number of constraints: 700	 Step delta = 0.4087229006754569
Number of constraints: 800	 Step delta = 0.49288165758230207
Number of constraints: 900	 Step delta = 0.07979186324337206
Number of constraints: 1000	 Step delta = 0.009430091224885473
Number of constraints: 1100	 Step delta = 0.009365729097908115
Number of constraints: 1200	 Step delta = 0.004592697619273167
Number of constraints: 1300	 Step delta = 0.0011569238658921677
Number of constraints: 1400	 Step delta = 0.0005311643324148463
Number of constraints: 1500	 Step delta = 0.00016919335303100524


(1774.4567317380743, 1774.4568251702344, [5.84426e-5, -0.000106432, -0.000123686, 0.000253578, 0.000202987, 0.000224945, -9.48432e-5, 0.000152416, 6.83622e-5, -2.63029e-5, 0.000127889, 0.000280977, -0.000119299, -4.65849e-5], [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0  …  0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], Any[4148.27, 4109.31, 3757.92, 3630.09, 3670.98, 4019.53, 3475.53, 10564.8, 4785.48, 3443.78  …  0.000779462, 0.000295679, 0.000198557, 0.000113739, 0.000691227, 0.000264811, 0.000112764, 0.000178878, 0.000145767, 9.34322e-5])

In [155]:
train_index = z_opt.>0
validation_index = z_opt.==0

X_train = X_trainvalid[train_index,:]
X_val = X_trainvalid[validation_index,:]
y_train = y_trainvalid[train_index,:]
y_val = y_trainvalid[validation_index,:]

BoundsError: BoundsError: attempt to access 2926×14 Array{Float64,2} at index [Base.LogicalIndex(Bool[true, true, true, false, true, true, false, true, true, true, true, true, true, true, false, true, false, true, true, true, true, true, true, false, true, false, true, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, false, false, true, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, true, false, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, true, true, false, true, false, true, true, false, true, true, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, false, false, true, false, true, false, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, false, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, false, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, false, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, false, true, false, true, false, true, true, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, false, false, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, false, true, true, true, true, true, true, true, false, false, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, false, false, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, true, true, true, false, false, false, true, false, true, true, false, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, false, false, true, true, false, true, true, true, false, true, true, false, true, true, false, true, false, true, true, true, true, false, true, false, true, true, false, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, false, true, true, true, true, true, true, false, false, true, true, true, false, true, true, false, true, true, false, true, true, true, true, false, true, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, false, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, false, true, true, false, true, false, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, false, true, true, true, true, false, true, true, true, true, false, true, false, true, false, true, false, true, true, false, true, true, true, true, true, true, true, true, false, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, false, true, true, true, false, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, false, true, true, true, false, true, true, true, true, true, true, false, true, false, false, true, true, true, true, true, false, true, true, true, true, true, false, false, true, true, true, true, true, true, true, true, false, false, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, true, true, true, false, true, true, false, true, true, true, true, false, false, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, false, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, false, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, true, true, true, false, true, true, true, true, false, false, false, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, false, true, false, false, true, true, false, true, false, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, false, true, true, false, false, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, false, true, false, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, false, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, false, true, true, true, true, false, true, true, true, true, false, true, false, true, false, true, true, true, true, true, true, true, true, false, false, true, false, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, false, true, true, true, false, true, false, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, true, true, true, false, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, false, true, false, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, false, false, true, false, false, true, true, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, false, false, true, false, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, false, true, true, true, true, false, false, true, false, true, true, true, true, false, false, true, true, true, false, false, true, true, true, true, false, false, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, false, true, false, true, true, true, false, true, true, true, false, false, true, false, true, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, false, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, false, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false, true, true, false, true, true, true, true, true, true, true, true, false, true, false, true, true, true, false, true, true, true, false, true, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, false, false, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, true, true, false, false, true, true, true, true, true, false, false, true, false, true, true, false, true, true, true, false, false, true, true, true, true]), Base.Slice(Base.OneTo(14))]

In [59]:
rlr(y, X, 0.00001, 0.1)

Number of constraints: 100	 Error = 16464.238273702544
Number of constraints: 200	 Error = 32287.794876331292
Number of constraints: 300	 Error = 4364.517851371021
Number of constraints: 400	 Error = 932.8089195938776
Number of constraints: 500	 Error = 374.84910259835215
Number of constraints: 600	 Error = 17.937704633541216
Number of constraints: 700	 Error = 1.9139566112585271
Number of constraints: 800	 Error = 0.06895640810398618
Number of constraints: 900	 Error = 0.002712767700813856
Number of constraints: 1000	 Error = 7.566360454802634e-5


(2149.643689369181, 2149.6436984113043, [6.72665e-5, 0.00592276, 4.55819e-5, 0.00128986, 6.63366e-5, 1.09402e-5, 7.74492e-6, -2.84327e-5, 0.0230511, 0.0146731, 0.00899947, 0.00288296, 0.00857109, 0.00889568], Any[310100.0, 403130.0, 3.91501e5, 3.59522e5, 3.41737e5, 316657.0, 3.45577e5, 2.84601e5, 276239.0, 2.36074e5  …  1.44288e-5, 1.25158e-5, 1.42291e-5, 1.45662e-5, 1.21705e-5, 1.28536e-5, 1.50384e-5, 1.6027e-5, 1.05034e-5, 9.04212e-6])

In [15]:
z_opt

3658-element Array{Float64,1}:
 1.0
 0.0
 0.0
 0.0
 1.0
 1.0
 0.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 ⋮  
 1.0
 1.0
 0.0
 1.0
 1.0
 1.0
 0.0
 0.0
 1.0
 1.0
 1.0
 1.0

### Result creation

In [180]:
X_trainvalid, X_test, y_trainvalid, y_test = trainvalid_test_split(X, y, 0.8)

([1.0 35.0 … 75.0 88.0; 0.0 63.0 … 68.0 95.0; … ; 1.0 53.0 … 112.0 113.0; 1.0 53.0 … 76.0 108.0], [1.0 41.0 … 75.0 64.0; 0.0 59.0 … 58.0 88.0; … ; 1.0 43.0 … 64.0 90.0; 1.0 67.0 … 65.0 78.0], [-1, 1, -1, -1, 1, -1, -1, -1, -1, -1  …  -1, 1, -1, -1, -1, -1, 1, -1, -1, 1], [-1, -1, -1, -1, -1, -1, 1, -1, -1, -1  …  -1, -1, -1, -1, -1, -1, -1, -1, -1, 1])

In [182]:
(t_opt, f_opt, w_opt, z_opt, deltas) = srlr(y_trainvalid, X_trainvalid, 0.0001, floor(Int, 0.8*size(X_trainvalid)[1]), 0.01)

Number of constraints: 100	 Step delta = 0.14441777389970412


(1148.6599618827693, 1148.6600589144552, [0.1, 0.0121299, -0.1, 0.0103748, 0.1, 0.1, 0.1, 0.1, -0.00208591, 0.0186503, -0.0258066, -0.0376939, -0.0173598, 0.00233043], [0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0  …  1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], Any[33138.0, 2796.93, 2062.35, 2797.09, 1754.2, 1122.56, 1025.03, 1098.77, 895.766, 755.509  …  0.000307509, 0.000224657, 0.000167106, 0.00033757, 0.000143759, 0.000255214, 0.000152124, 0.000132474, 0.000155929, 9.70317e-5])

In [183]:
train_index = z_opt.>0
validation_index = z_opt.==0

X_train = X_trainvalid[train_index,:]
X_val = X_trainvalid[validation_index,:]
y_train = y_trainvalid[train_index,:]
y_val = y_trainvalid[validation_index,:]

586×1 Array{Int64,2}:
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
  ⋮
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1

In [184]:
sum(y_val)

-586

In [185]:
sum(y_trainvalid)

-2024

In [161]:
sum(y_train)

0

In [188]:
function compute_∇f(w_k, z_k, y, X, λ)
    n, p = size(X)
    ∇f_k = sum(-z_k[i]/(1+exp(y[i]*dot(w_k,X[i,:])))*y[i].*X[i,:] for i in 1:n) + 2*λ.*w_k
    return ∇f_k
end

function solve_inner_max_problem(w_k, y, X, K, λ)
    n, p = size(X)
    model_inner_max = Model(solver=GurobiSolver(OutputFlag=0,gurobi_env))
    @variable(model_inner_max, z[1:n] >= 0)
    @constraint(model_inner_max, [i=1:n], 1 >= z[i])
    @constraint(model_inner_max, sum(z) <= K)
    @objective(
        model_inner_max,
        Max,
        sum(z[i]*log(1+exp(-y[i]*dot(X[i,:], w_k))) for i=1:n)
    )
    solve(model_inner_max)
    optimal_z_k = getvalue(z)
    optimal_f_k = getobjectivevalue(model_inner_max) + λ*dot(w_k,w_k)
    return optimal_z_k, optimal_f_k
end

function scores(preds, gt)
    acc = sum(preds .== gt)/size(preds)[1]
    TPR = dot((preds.==1),gt.==1)/(dot((preds.==1),gt.==1) + dot((preds.==-1),gt.==1))
    FPR = dot((preds.==1),gt.==-1)/ (dot((preds.==1),gt.==-1) + dot((preds.==-1),gt.==-1))
    return acc, TPR, FPR
end

### Cutting Planes Implementation ###
function stable_LR_cutting_planes(y, X, ε, K,λ)
    errors = []
    n, p = size(X)
    # Initialization values and step 0
    w_0 = [0 for i in 1:p]
    #w_0 = [rand(Uniform(-0.5, 0.5)) for i in 1:p]
    z_0, f_0 = solve_inner_max_problem(w_0, y, X, K, λ)
    ∇f_0 = compute_∇f(w_0, z_0, y, X, λ)

    # Outer minimization problem
    outer_min_model = Model(solver=GurobiSolver(OutputFlag=0, gurobi_env))
    @variable(outer_min_model, t >= 0)
    @variable(outer_min_model, w[1:p])
    #@constraint(outer_min_model, [j=1:p], -1 <= w[j] <= 1)
    @constraint(outer_min_model, t >= f_0 + dot(∇f_0, w)-dot(∇f_0, w_0))
    @constraint(outer_min_model, [j=1:p], 0.1 >= w[j])
    @constraint(outer_min_model, [j=1:p], w[j] >= -0.1)
    @objective(outer_min_model, Min, t)
    k = 1 # Number of constraints in the final problem
    solve(outer_min_model)

    # New steps k
    t_k = getvalue(t)
    w_k = getvalue(w)
    z_k, f_k = solve_inner_max_problem(w_k, y, X, K, λ)

    ∇f_k = compute_∇f(w_k, z_k, y, X, λ)
    while abs(f_k - t_k) >= ε # error
        push!(errors, f_k - t_k)
        @constraint(outer_min_model,t >= f_k + dot(∇f_k, w)-dot(∇f_k, w_k))
        k += 1
        solve(outer_min_model)
        # Updating all the values
        t_k = getvalue(t)
        print(t_k)
        print("\n")
        w_k = getvalue(w)
        z_k, f_k = solve_inner_max_problem(w_k, y, X, K, λ)

        ∇f_k = compute_∇f(w_k, z_k, y, X, λ)
        if k%100 == 0
            println("Number of constraints: ", k, "\t Error = ", abs(t_k - f_k))
        end
        if k > 20000
            break
        end
    end
    push!(errors, f_k - t_k)
    return t_k, f_k, w_k, z_k, errors
end


stable_LR_cutting_planes (generic function with 1 method)

In [189]:
(t_opt, f_opt, w_opt, z_opt, deltas) = stable_LR_cutting_planes(y_trainvalid, X_trainvalid, 1.0, floor(Int, 0.9*size(X_trainvalid)[1]), 1.0)

0.0
319.18432278103467
439.80149192693585
491.24645217327316
496.61806600481435
511.5292570878663
533.3580918453265
585.4559255940593
623.9692366970115
657.3295545250897
684.2197805322998
712.9274320737553
760.214225194249
846.8284979382622
872.8989776379389
886.2979400244232
895.4862033501323
931.3711639017648
981.7544474565683
986.950268910253
1014.0669194872156
1024.9814127763977
1037.2569585044066
1064.1544143160017
1085.098390290637
1090.4602643354533
1092.3014117298221
1095.302013371834
1098.3108485043367
1106.6085881927052
1114.6494359066464
1125.2374271904466
1131.5282126069596
1132.6028515080397
1135.8791850762536
1145.537062671298
1146.1912912648197
1146.6803752008002
1149.9618613342043
1154.884079395701
1161.1935070253733
1161.2784130391312
1165.445426288458
1165.9028331452741
1166.0455458589418
1166.1629845332832
1170.7068407119152
1171.1607154399865
1172.5841502700446
1173.4729298679722
1174.3415635151764
1175.0323633222984
1176.1839822996603
1177.6537688961455
1178.049666

(1183.408458631857, 1184.1986037906315, [0.1, 0.0179555, -0.1, 0.0149135, 0.1, 0.1, 0.1, 0.1, -0.00305162, 0.0251707, -0.0325235, -0.0432265, -0.0239695, 0.00314173], [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0  …  1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], Any[33137.5, 2133.8, 2697.06, 3261.12, 1688.36, 1492.27, 1138.8, 1061.17, 1461.61, 803.694  …  1.83653, 2.8017, 2.13146, 1.29692, 1.14674, 1.37727, 1.67488, 1.00529, 1.25581, 0.790145])

In [190]:
w_opt

14-element Array{Float64,1}:
  0.1                  
  0.01795553849364385  
 -0.1                  
  0.014913457869924275 
  0.1                  
  0.1                  
  0.1                  
  0.1                  
 -0.0030516186419622133
  0.025170698237942202 
 -0.03252345585329899  
 -0.04322650210066299  
 -0.023969484443693987 
  0.003141725281295585 

In [191]:
t_opt

1183.408458631857

In [192]:
train_index = z_opt.>0
validation_index = z_opt.==0

X_train = X_trainvalid[train_index,:]
X_val = X_trainvalid[validation_index,:]
y_train = y_trainvalid[train_index,:]
y_val = y_trainvalid[validation_index,:]

293×1 Array{Int64,2}:
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
  ⋮
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1
 -1

In [193]:
sum(y_val)

-293

In [194]:
sum(y_train)

-1731

In [195]:
length(y_train)

2633

In [196]:
length(y_val)

293