Mnist data

In [22]:
using LinearAlgebra, NonNegLeastSquares, MLDatasets

function alg_ours_with_restart(C::Matrix{Float64}, b::Matrix{Float64}, ϵ::Float64 )
    extra_term_nnls = 0.5*norm(b)^2
    m, n = size(C)
    #number of times we restart
    K = 50 #actually doesn't matter, since the restart_metric becomes really tiny pretty quickly 
    col_norm = norm.(eachcol(C))
    inv_col_norm_square = 1.0 ./(col_norm.^2)
    idx_seq = 1:n

    x0 = zeros(n)
    y0 = zeros(m)
    z0 = zeros(m)

    gamma = 30 #chosen after experiments on synthetic data
    obj = 0 
    
    init_time = time()
    for i=1:K

        xktilde, yktilde, zktilde, new_metric, obj = alg_our_core(C, x0, y0, z0, m, n, inv_col_norm_square, idx_seq, ϵ, gamma, init_time, extra_term_nnls)

        if (new_metric < ϵ)
            break
        end
        x0[:] = xktilde[:]
        y0[:] = yktilde[:]
        z0[:] = zktilde[:]

        gamma/= 2 # chosen after experiments on synthetic data 
    end
    return obj
end

function alg_our_core(C, x0, y0, z0, m, n, inv_col_norm_square, idx_seq, ϵ, gamma, init_time, extra_term_nnls)
        # reset all the scaling factors
        previous_A = 1.0/n
        previous_a = previous_A #a_1, A_1
        a = 1.0/(n*n) # a_2
        A = (n+1.0)/(n * n) # A_2


        # compute x1 using the input x0
        # we redefined phio(x) = 1/2 * ||x-x0||_A^2, hence updating x requires x0
        # the step p(j)+=1/||A:j||^2 implicitly assumes ybar_0 = 0
        # To allow for ybar_0 \neq 0, we change p(j) a bit

        ybar = copy(y0)
        j = rand(idx_seq)
        Aty0m = 1 - dot(ybar, C[:, j]) #                                     dot(̄ȳ, C[:, j])

        p = copy(x0)
        x = copy(x0)
        p[j] += inv_col_norm_square[j]*Aty0m
        x[j] = min(inv_col_norm_square[j], max(0, p[j])) #x and x0 differ only at j

        # compute y1
        # note that y0^(R) and y1^(R) are independent of each othre
        # y1^(R) = Ax1^(R) = Ax0^(R) + A*(x1^R - x0^R) = z0^R + A*(x1^R - x0^R)
        # y0^R may be chosen to be either ytildeK or 0 (our analysis uses 0)
        # Further note that if y0^R = ytildeK, then we must ALSO choose ybar_0 = ytildeK, and
        # this changes how x is init.
        previous_y = copy(y0)
        z = copy(z0)
        z += C[:, j] * (x[j] - x0[j]) # z_1 = A x_1 = A (x_0 + (x_1 - x_0))
        y = copy(z) # y_1 = A xtilde1 = A x_1 = z_1

        # compute ȳ, ỹ (because we need to return it), and some auxiliary variables
        ybar[:] = y[:] + previous_a/a * (y[:] - previous_y[:]) #ybar_1
        s = zeros(n) # need this so that xtildek = xk + sk/Ak; s_1 = 0 (see Chaobing's lemma for why this is needed)
        ỹ = copy(y) # ytildek = convex comb of yi's, so ytilde1 = y1

        # restart value init; -1^{\top}x+0.5\|Ax\|^{2}+.5*\|y\|^{2}+\frac{1}{2\epsilon}\|(-A^{\top}y+1)^{+}\|^{2}
        restart_coeff = 5000
        Atym = -C'*y0 .+ 1
        truncated_Atym = ((Atym) .> 0).*Atym
        restart_val_prev = -sum(x0)+ 0.5* norm(z0)^2 +0.5*norm(y0)^2 + restart_coeff*norm(truncated_Atym)^2
        restart_val_curr = restart_val_prev

        # inits for restart
        iter_count = 0
        Flag = true
        Ax0 = zeros(m)
        new_metric = 0 
        obj = 0 
    
        while (Flag)

            # updates related to x
            j = rand(idx_seq)
            p[j] += - n * inv_col_norm_square[j] * a * (sum(C[:,j] .* ybar) - 1)
            prev_xj = x[j]
            x[j] = min(inv_col_norm_square[j], max(0, p[j]))
            # update s so that we may return xtildek at only O(1) cost
            s[j] += ((n-1) * a -  previous_A) * (x[j] - prev_xj)

            # updates related to y
            previous_y[:] = y[:]
            z[:] += C[:, j] * (x[j] - prev_xj)
            y[:] = previous_A/A * y[:] + a/A * z[:] + (n-1) * a/A * (x[j] - prev_xj) * C[:,j]
            # need to update ytilde each time because that's what we want to return,
            # and we aren't saving all the yi's.
            ỹ[:] = previous_A/A * ỹ[:] + a/A * y[:]

            # update scaling factors
            previous_a, previous_A = a, A
            a = min(n * a/(n-1), sqrt(A)/(2*n))
            A += a

            # update ȳ (note that ȳ_k depends on a_k and a_{k+1})
            ybar[:] = y[:] + previous_a/a * (y[:] - previous_y[:])

            # restart stuff
            iter_count+=1
            # Since we are computing the restart condition without any optimizations,
            # and the restart condition likely involves (expensive) matrix-vector products,
            # we check it only after a certain number of iters have passed.

            if (iter_count % ceil(n*gamma) ==0)
                # compute the restart metric 
                # note that we DO use the restart metric to terminate the OUTER ALG, 
                # , even though we are doing fixed restarts in the INNER ALG 
                Atym = -C'*ỹ .+ 1
                truncated_Atym = ((Atym) .> 0).*Atym
                sumx0 = sum(x + (1.0/previous_A) * s)
                Ax0 = C*(x + (1.0/previous_A) * s)
                new_metric = norm(truncated_Atym)^2

                # For now, we are doing fixed restarts.
                obj = extra_term_nnls-sumx0+ 0.5* norm(Ax0)^2
                print("\n\n Obj = ", obj, ", time since init = ", time() - init_time, ", new metric = ", new_metric)
                print("\n")
                Flag = false
                # end
            end
        end
        return x + (1.0/previous_A) * s, ỹ, Ax0, new_metric, obj
end


train_x, train_y = MNIST.traindata()
A = Array{Int64}
b = Array{Int64}
A_init = reshape(train_x,7,4*28*60000)
#b = train_y
b=ones(7)
#test_x,  test_y  = MNIST.testdata()



7-element Array{Float64,1}:
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0

In [25]:
function alg_lawsonhanson(A, b)
    xnnls = nonneg_lsq(A,b;alg=:nnls)  # NNLS

    nnls_optval = 0.5*norm(A*xnnls - b)^2
    
    print("\n\n nnls package value is ", nnls_optval, ", and time is ")
end

function remove_col1(A,b)#Chenghui has an idea to optimize this for speed ("filter")
    s=A'*b # n*1
    B=A[:,vec(s.>0)] # m*b matrix where b is smaller than n
    s=s[vec(s.>0)] # s is b*1 in dimensions
    return B./s'
end


A= remove_col1(A_init,b)
A = Float64.(A)
b = Float64.(b)



@time begin
    xnnls = nonneg_lsq(A,b;alg=:nnls)  # NNLS
    nnls_optval = 0.5*norm( A*xnnls - b)^2
    print("\n\n nnls package value is ", nnls_optval, ", and time is ")
end





 nnls package value is 1.60237371373018e-31, and time is   0.781993 seconds (59 allocations: 207.629 MiB, 2.71% gc time)


In [26]:
epsilon = 0.00001 
@time begin
     our_result = alg_ours_with_restart(A, vcat(b'),epsilon)
     print("\n\n Our result with restart is ", our_result, ", and time is ")
 end



 Obj = -7.105427357601002e-15, time since init = 244.95700001716614, new metric = 7.085683213586454e-8


 Our result with restart is -7.105427357601002e-15, and time is 245.269954 seconds (1.86 G allocations: 249.524 GiB, 5.57% gc time)


In [24]:
size(A_init)

(7, 6720000)