In [None]:
# Julia version code
"""
nonnegative linear regression 
"""

# Notes
# 1. Note we set min A_ij = 1. This can be done either by dividing A by min Aij or by just adding 1. 
# The first approach is kind of cheating because the scaling automatically makes the error small
# If min Aij is not 1, then init error can be LARGE 

#2. If all xj coordinates are updated in parallel, then comparable to scipy (update_v_parllel and update_X_paralle)

#3. If coordinatewise update, then scaling down by n is better than not scaling. 

#4. Our theoreitcal alg in experiment has occasional large jumps in error

#5. The init large error issue is fixed by running a full update step first

#6. x is very very very sparse

#7. TODO m << n case 

using LinearAlgebra, BenchmarkTools, Distributions

const MultivariateDistribution{S<:ValueSupport} = Distribution{Multivariate,S}

const DiscreteMultivariateDistribution   = Distribution{Multivariate, Discrete}
const ContinuousMultivariateDistribution = Distribution{Multivariate, Continuous}

function init_all(eps,m, n)
    ybar = zeros(m) 
    ykm = zeros(m)  
    y = zeros(m) 
    ktotal = Int(ceil(n/sqrt(eps))) #note that this is just an approx ktotal
    ak = 1/(n-1) 
    akp= 1/(n-1)^2
    Ak = 1/(n-1)
    Akp= n/(n-1)^2
    akp=1/(n-1)^2
    A_sum_aik_xi = 0 #A\sum_i ai bar(x_{i-1})
    v = zeros(n) #vector with same length as x, used to obtain x 
    return ybar, ykm, y, ktotal, ak, Ak, A_sum_aik_xi, v
end

function update_v(jk, pjk, A, ykm, v, ak) 
    v[jk]+= (ak/pjk)*(A*ykm .- 1)[jk] #scaling the step (making it larger)  to "compensate" for seq vs par
    return v
end


function compute_scaling(A)
    scaling_vector = -1 ./sum(abs2.(A),dims=2)
    return scaling_vector
end
    
function update_x(v, jk, xkm, scaling) 
    x = xkm 
    x[jk]= min(max((scaling*v[jk], 0)),-scaling) # n has appeared in v
    return x      
end

function update_ybar(y, ykm, ak, akp)
    ybar = y + (ak/akp)*(y - ykm)
    return ybar 
end

function update_ak_Ak(Ak, Akp, ak, akp, n)
    akpp = min(n*akp/(n-1),sqrt(Akp)/2/n)
    ak = akp
    akp = akpp
    Ak = Akp
    Akp = Ak + akp
    return Ak, Akp, ak, akp 
end

function update_aik() # Can be optimized
    
end

function update_y(A_sum_aik_xi, A, aik, Ak, x)
    A_sum_aik_xi+= A*(aik*x) #A \sum_{i = 1}^{k} ai xbar_{i-1}
    y = (1/Ak)*A_sum_aik_xi # This is already larger than 0.
    return y, A_sum_aik_xi
end

# Oct 31: rescaling

function remove_col1(A,b)
    s=A*b'
    B=A[vec(s.>0),:]
    s=s[vec(s.>0)]
    A=B./s
    return A, B, s
end

In [None]:
# Main loop
eps = 0.000001 
n = 200 # input dimension 
m = 20 # Number of data
   
#This is A is random positive and objective is max_x ||Ax||^2/2-1^T x case.
#A = np.random.rand(200, 20) 
    
# b can also be random and negative. m>>n.
b=rand(1,m)-repeat([0.3],1,m)
A=rand(n,m)
#A=A+1
(A,B,s)=remove_col1(A,b)

# Also need to scale b
xsum = 0 

#%%
(ybar, ykm, y, ktotal, ak, akp, Ak, Akp, A_sum_aik_xi, v) = init_all(eps,m, n)
scaling_vector = compute_scaling(A)
our_result = zeros(ktotal)
x_norm = zeros(ktotal)
v_norm = zeros(ktotal)
xsum_norm = zeros(ktotal)
# Xmatrix = zeros((n, ktotal))
Akarray = zeros(ktotal)
# ktotal = 1
Akarray[1]=Ak
#%%      

for k in 1:ktotal 
        # sample jk from multinomial distribution
        randomseed=rand(Multinomial(1, ones(n)/n),1)
        jk = findall(vec(randomseed.==1)) # 
        pjk = 1/n
        
        # # update v 
        v = update_v(jk, pjk, A, ykm, v, ak) 
    
        # update x
        x = update_x(v, jk, xkm, scaling_vector[jk])
        
        # update u
        
    
        # Update y based on u
        (y, A_sum_aik_xi) = update_y(A_sum_aik_xi, A, ak, Ak, ybar) ### this step

                
        #update ybar 
        ybar = update_ybar(y, ykm, ak, akp)
    
        #update a 
        (Ak, Akp, ak, akp) = update_ak_Ak(Ak, Akp, ak, akp, n)
        Akarray[k] = Ak

#%%      
        # update xkm 
        xkm = x 
#%%                        
        #compute running sum 
        xsum+= ak*x
        xsum_norm[k] = sum(xsum==0)
        # update ak  
        ak = akp 
        
        xsol_temp = (1/(Ak-ak))*xsum 
        our_result[k] = norm(B*(xsol_temp/s)-b,2)**2
        #our_result[k] = norm(A.dot(xsol_temp),2)**2/2-sum(xsol_temp)
end

