In [1]:
abstract GradFunction
abstract ProxFunction

# loss functions
type SquareLoss <: GradFunction
end

function grad(::SquareLoss, X, y, theta)
    return 2*(X*X'*theta - X*y)
end
function loss_eval(::SquareLoss, X, y, theta)
    return norm(X'*theta - y)^2
end

square_loss = SquareLoss()


# regularizers
type SquareReg <: ProxFunction
end

function prox(::SquareReg, t, z)
    return z/(t+1)
end

square_reg = SquareReg()

type TrivialReg <: ProxFunction
end

function prox(::TrivialReg, t, z)
    return z
end

trivial_reg = TrivialReg()

TrivialReg()

In [2]:
# Empirical risk minimization problem
type Erm
    X::Array{Float64, 2}
    y::Array{Float64, 1}
    loss::GradFunction
    reg::ProxFunction
    opt_val::Float64
    opt_x::Array{Float64, 1}
end

# Constructor

# without regularizer
function Erm(X::Array{Float64, 2}, y::Array{Float64, 1},
        loss::GradFunction)
    return Erm(X, y, loss, trivial_reg, 0, zeros(size(X, 1)))
end

# with regularizer
function Erm(X::Array{Float64, 2}, y::Array{Float64, 1},
        loss::GradFunction, reg::ProxFunction)
    return Erm(X, y, loss, reg, 0, zeros(size(X, 1)))
end

function solve!(erm::Erm, x0=nothing, tol=1e-5, beta=.5, max_iter=300, verbose=true)
    converged = false
    
    if x0 == nothing
        x0 = zeros(size(erm.X, 1))
    end
    
    x = x0
    prev_eval = nothing
    
    for i in 1:max_iter
        lambda = 1
        curr_grad = grad(erm.loss, erm.X, erm.y, x)
        curr_eval = loss_eval(erm.loss, erm.X, erm.y, x)
        
        # Prox iteration
        while true
            z = prox(erm.reg, lambda, x - lambda*curr_grad)
            delta = z-x
            z_loss = loss_eval(erm.loss, erm.X, erm.y, z)
            if z_loss <= curr_eval + dot(curr_grad,delta) + 1/(2*lambda)*norm(delta)^2
                x = z
                break
            end
            lambda *= beta
        end
        
        if prev_eval != nothing && abs(prev_eval - curr_eval) < tol
            converged = true
            if verbose
                info("Converged after $(i) iterations")
            end
            break
        end
        
        prev_eval = curr_eval
    end
    if !converged
        warn("Failed to converge after $(max_iter) iterations")
    end
    
    erm.opt_x = x
    erm.opt_val = loss_eval(erm.loss, erm.X, erm.y, x)
end

solve! (generic function with 6 methods)

In [9]:
X = rand(4, 400)
y = rand(400)
erm = Erm(X, y, square_loss, trivial_reg)

Erm([0.146037 0.034239 … 0.0188043 0.858352; 0.860557 0.516917 … 0.121058 0.171835; 0.120473 0.444422 … 0.229493 0.804659; 0.901488 0.237072 … 0.0855929 0.0497682],[0.84662,0.29036,0.825389,0.0518828,0.325984,0.492935,0.773159,0.022638,0.951196,0.587353  …  0.246579,0.604908,0.615748,0.049061,0.563839,0.906984,0.199507,0.891373,0.898652,0.932034],SquareLoss(),TrivialReg(),0.0,[0.0,0.0,0.0,0.0])

In [10]:
solve!(erm)

[1m[34mINFO: Converged after 24 iterations
[0m

38.05470684691566

In [11]:
norm(X'*(pinv(X')*y) - y)^2

38.0546922279277

In [12]:
pinv(X')*y

4-element Array{Float64,1}:
 0.190174
 0.23285 
 0.236116
 0.304969

In [13]:
erm.opt_x

4-element Array{Float64,1}:
 0.190587
 0.232987
 0.235945
 0.304529

In [14]:
loss_eval(square_loss, erm.X, erm.y, erm.opt_x)

38.05470684691566