In [1]:
using LinearAlgebra, ToeplitzMatrices, Random, IterativeSolvers, FunctionOperators, Printf

### Some helper functions

In [2]:
import Base.size
function Base.size(FO::FunctionOperator, d::Int)
    @assert d in [1, 2]
    prod(d == 1 ? FO.outDims : FO.inDims)
end

In [3]:
# This function randomly samples a $(d₁ \times d₂)$ sparse matrix with ones at $m$ randomly chosen
# coordinates (uniform without replacement). The output matrix has at least $r$ non-zero entries
# in each row and each column, where $r$ is a specified positive integer. The number of ones in the
# output matrix is exactly $m$.
function generateΦ(d₁, d₂, r, m)
    @assert max(d₁, d₂) * r ≤ m
    @assert m ≤ d₁ * d₂
    @assert r ≤ d₁
    @assert r ≤ d₂
    
    # generate a square matrix where each row and each column has exactly r ones
    initial = Circulant([fill(1, r)..., fill(0, min(d₁, d₂) - r)...])
    
    # Extend that matrix to a d₁×d₂ matrix where each row and each column has at least r ones
    # That is accomplished by repeating the "initial" matrix and then cropping
    if d₁ < d₂
        M = repeat(initial, outer = (1, ceil(Int, d₂ / d₁)))
    elseif d₁ > d₂
        M = repeat(initial, outer = (ceil(Int, d₁ / d₂), 1))
    else
        M = initial
    end
    M = M[1:d₁, 1:d₂]
    
    # Randomly switch zeros to ones until exactly m number of ones are in the matrix
    zero_places = findall(M .== 0)
    number_of_missing_ones = m - (d₁*d₂ - length(zero_places))
    number_of_missing_ones > 0 && (M[shuffle(zero_places)[1:number_of_missing_ones]] .= 1)
    
    # Then randomize matrix by permutating rows and columns a couple times
    for i in 1:10
        M .= M[shuffle(1:end), :] # shuffle rows
        M .= M[:, shuffle(1:end)] # shuffle columns
    end
    
    M
end

generateΦ (generic function with 1 method)

In [4]:
function maskToMatrix(Φᴹ)
    m = convert(Int, sum(Φᴹ))
    d₁, d₂ = size(Φᴹ)

    Φ = zeros(m, length(Φᴹ))
    non_zero_places = findall(vec(Φᴹ) .== 1)
    for i in 1:m
        Φ[i, non_zero_places[i]] = 1
    end
    return Φ
end

maskToMatrix (generic function with 1 method)

## Low-rank problem

In [12]:
Random.seed!(0)

d₁, d₂, r = 100, 100, 7
df_LR = r * (d₁ + d₂ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df_LR, d₁ * d₂))

dType = ComplexF64

# Generate a matrix with rank = r
U, Σ, V = randn(dType, d₁, r), Diagonal(randn(r)), randn(dType, d₂, r)

# Ground Truth matrix
Xᴳᵀ = U * Σ * V'

@show size(Xᴳᵀ)
@show rank(Xᴳᵀ);

Φᴹ = generateΦ(d₁, d₂, r, m)
Φ = FunctionOperator{dType}(name = "Φ", inDims = (d₁, d₂), outDims = (d₁, d₂),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)
y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (100, 100)
rank(Xᴳᵀ) = 7
rank(y) = 100


### Harmonic Mean Iteratively Reweighted Least Squares (HM-IRLS)

In [13]:
function HM_IRLS(
        Xᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Φ::FunctionOperator;                    # sampling operator
        img_size::NTuple = size(Xᴳᵀ),           # size of output matrix
        r̃::Int,                                 # rank estimate of solution
        λ::Real,
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                            # number of iterations
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = img_size
    ϵ = Inf
    X₀ = Φ' * y   # that's basically Pᵃ * P(M) as y = P(M)
    X = copy(X₀)
    ΦᵃΦ = Φ' * Φ  # i.e. Pᵃ_Omega * P_Omega
    W = I
    
    r, n, s, e = rank(X, atol = 1e-3), norm(Xᴳᵀ - X), svdvals(X)[1], ϵ
    n, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
    verbose && println("k = 0,\trank(X) = $r,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    
    for k in 1:N

        # I don't know how to calculate inverses in (29), so I use the equation right after it:
        # ((λ/μ * Wₛ + P' * P) * Wₗ + λ * Wₛ * P' * P)(L) = λ * Wₛ * P' * P(M)
        # Assuming that μ = ∞ and Wₛ = I, that's what I get:
        A = ΦᵃΦ * W + λ * ΦᵃΦ
        b = λ * X₀
        cg!(vec(X), reshape(A, inDims=(d₁*d₂,), outDims=(d₁*d₂,)), vec(b),
            tol=1e-8) # Solve A⋅X = b for X
        
        F = svd(X)
        U, σ, V = F.U, F.S, F.V
        
        ϵ = min(ϵ, σ[r̃+1])
        
        d = min(d₁, d₂)
        H = [1 / (max(σ[i], ϵ) * max(σ[j], ϵ))  for i in 1:d, j in 1:d]
        W = FunctionOperator{dType}(name = "W", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> convert.(dType, U * (H .* (U' * Z * V)) * V'))
        
        r, n, s, e = rank(X, atol = 1e-3), norm(Xᴳᵀ - X), σ[1], ϵ
        n, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
        verbose && println("k = $k,\trank(X) = $r,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
        
    end
    
end

HM_IRLS (generic function with 1 method)

In [14]:
function HM_IRLS_fancy(
        Xᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Φ::FunctionOperator;                    # sampling operator
        img_size::NTuple = size(Xᴳᵀ),           # size of output matrix
        r̃::Int = 0,                             # rank estimate of solution
        N::Int = 10,                            # number of iterations
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = img_size
    maxIter = maxIter isa Nothing ? r̃*(r̃+d₁+d₂) : maxIter
    ϵ = Inf
    X = Φ' * y
    
    r, n, s, e = rank(X, atol = 1e-3), norm(Xᴳᵀ - X), svdvals(X)[1], ϵ
    n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
    verbose && println("k = 0,\trank(X) = $r,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    
    for k in 1:N
        
        F = svd(X)
        U, σ, V = F.U[:, 1:r̃], F.S, F.V[:, 1:r̃]
        
        ϵ = min(ϵ, σ[r̃+1])
        
        
        H = [1 / (max(σ[i], ϵ) * max(σ[j], ϵ))  for i in 1:r̃, j in 1:r̃]
        dH = reshape([1 / (max(σ[r̃+1], ϵ) * max(σ[j], ϵ))  for j in 1:r̃], :, 1)
        P = FunctionOperator{dType}(name="P", inDims = (r̃*(r̃+d₁+d₂),), outDims = (d₁, d₂),
            forw = γ -> begin
                    γ₁ = reshape(γ[1:r̃^2], r̃, r̃)
                    γ₂ = reshape(γ[r̃^2+1:r̃*(r̃+d₂)], r̃, d₂)
                    γ₃ = reshape(γ[r̃*(r̃+d₂)+1:r̃*(r̃+d₁+d₂)], d₁, r̃)
                    (U * γ₁ + γ₃) * V' + U * γ₂
                end,
            backw = Φᵃy -> begin
                    γ₁ = U' * Φᵃy * V
                    γ₂ = U' * Φᵃy * (I - V*V')
                    γ₃ = (I - U*U') * Φᵃy * V
                    vcat(vec(γ₁), vec(γ₂), vec(γ₃))
                end)
        b = P' * Φ' * y
        𝒟⁻¹ = I / Diagonal(vcat(vec(H), vec(kron(dH, ones(1, d₂))), vec(kron(dH, ones(1, d₁))')))
        CG_op = FunctionOperator{dType}(name = "CG_op", inDims = (r̃*(r̃+d₁+d₂),), outDims = (r̃*(r̃+d₁+d₂),),
            forw = γ ->  begin
                    (ϵ^2 * I / (𝒟⁻¹ - ϵ^2 * I)) * γ + P' * Φ' * Φ * P * γ
                end)
        γ = cg(CG_op, b, maxiter = maxIter)
        r = y - Φ * P * γ
        γ_tilde = (𝒟⁻¹ / (𝒟⁻¹ - ϵ^2 * I)) * γ - P' * Φ' * r
        X = Φ' * r + P * γ_tilde
        
        r, n, s, e = rank(X, atol = 1e-3), norm(Xᴳᵀ - X), σ[1], ϵ
        n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
        verbose && println("k = $k,\trank(X) = $r,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
        
    end
end

HM_IRLS_fancy (generic function with 1 method)

In [8]:
@time HM_IRLS_fancy(Xᴳᵀ, y, Φ, N = 60, r̃ = r, verbose = true);

k = 0,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 168.674,	σ₁ = 48.873,	ϵ = Inf
k = 1,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 138.253,	σ₁ = 48.873,	ϵ = 20.208
k = 2,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 108.958,	σ₁ = 81.177,	ϵ = 15.392
k = 3,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 90.981,	σ₁ = 115.474,	ϵ = 11.159
k = 4,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 79.664,	σ₁ = 132.826,	ϵ = 7.877
k = 5,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 70.671,	σ₁ = 139.739,	ϵ = 5.936
k = 6,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 62.690,	σ₁ = 142.485,	ϵ = 4.350
k = 7,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 55.334,	σ₁ = 143.829,	ϵ = 3.041
k = 8,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 49.015,	σ₁ = 144.870,	ϵ = 2.205
k = 9,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 43.881,	σ₁ = 145.794,	ϵ = 1.792
k = 10,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 39.827,	σ₁ = 146.533,	ϵ = 1.480
k = 11,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 36.846,	σ₁ = 147.158,	ϵ = 1.214
k = 12,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 34.809,	σ₁ = 147.688,	ϵ = 0.996
k = 13,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 33.450,	σ₁ = 148.119,	ϵ = 0.831
k = 14,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 32.501,	σ₁ = 148.460,	ϵ = 0.716
k = 

*That's how it should work*

In [26]:
@time HM_IRLS(Xᴳᵀ, y, Φ, N = 60, r̃ = r, λ = 0.07, verbose = true);

k = 0,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 185.544,	σ₁ =  22.706,	ϵ =     Inf
k = 1,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 197.930,	σ₁ =   1.485,	ϵ =   0.884
k = 2,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 198.001,	σ₁ =   1.732,	ϵ =   0.859
k = 3,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 197.967,	σ₁ =   2.294,	ϵ =   0.833
k = 4,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 197.865,	σ₁ =   3.552,	ϵ =   0.808
k = 5,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 197.644,	σ₁ =   6.177,	ϵ =   0.808
k = 6,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 197.259,	σ₁ =   9.950,	ϵ =   0.808
k = 7,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 196.828,	σ₁ =  12.547,	ϵ =   0.808
k = 8,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 196.519,	σ₁ =  13.884,	ϵ =   0.808
k = 9,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 196.334,	σ₁ =  14.804,	ϵ =   0.808
k = 10,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 196.190,	σ₁ =  15.471,	ϵ =   0.808
k = 11,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 196.062,	σ₁ =  15.937,	ϵ =   0.808
k = 12,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 195.927,	σ₁ =  16.273,	ϵ =   0.808
k = 13,	rank(X) = 100,	‖Xᴳᵀ - X‖₂ = 195.779,	σ₁ =  16.576,	ϵ =   0.808
k = 14,	rank(X) 

*Too low $\lambda$, so it only optimizes for rank, even beyond the original rank (which was 7).*

In [34]:
@time HM_IRLS(Xᴳᵀ, y, Φ, N = 60, r̃ = r, λ = 0.034, verbose = true);

k = 0,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 168.674,	σ₁ =  48.873,	ϵ =     Inf
k = 1,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 197.327,	σ₁ =   1.607,	ϵ =   0.664
k = 2,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 197.578,	σ₁ =   2.371,	ϵ =   0.462
k = 3,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 197.418,	σ₁ =   4.657,	ϵ =   0.404
k = 4,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 196.124,	σ₁ =  13.316,	ϵ =   0.404
k = 5,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 194.475,	σ₁ =  28.935,	ϵ =   0.404
k = 6,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 193.457,	σ₁ =  35.118,	ϵ =   0.404
k = 7,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 192.565,	σ₁ =  36.689,	ϵ =   0.404
k = 8,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 191.701,	σ₁ =  37.068,	ϵ =   0.404
k = 9,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 190.837,	σ₁ =  38.034,	ϵ =   0.404
k = 10,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 190.108,	σ₁ =  39.172,	ϵ =   0.404
k = 11,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 189.705,	σ₁ =  40.037,	ϵ =   0.404
k = 12,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 189.472,	σ₁ =  40.230,	ϵ =   0.404
k = 13,	rank(X) = 40,	‖Xᴳᵀ - X‖₂ = 189.299,	σ₁ =  40.443,	ϵ =   0.404
k = 14,	rank(X) = 40,	‖Xᴳᵀ - X

*Too high $\lambda$, so rank and $\epsilon$ don't decrease (even after 1000 iterations there is no difference in $\epsilon$ or in the rank).*

## Low-rank + sparse problem

In [55]:
Random.seed!(1)

d₁, d₂, r, s = 100, 100, 7, 15
df_LR = r * (d₁ + d₂ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(4 * df_LR, d₁ * d₂))

dType = Float64 #ComplexF64

# Generate a matrix with rank = r
U, Σ, V = randn(dType, d₁, r), Diagonal(randn(r)), randn(dType, d₂, r)

# Generate a sparse matrix with exactly s non-zero values
Sᴳᵀ = zeros(d₁, d₂)
Sᴳᵀ[randperm(d₁*d₂)[1:s]] .= 10 #rand(s)

# Ground Truth matrix
Lᴳᵀ = U * Σ * V'
Xᴳᵀ = Lᴳᵀ + Sᴳᵀ

@show size(Xᴳᵀ)
@show rank(Xᴳᵀ);

Φᴹ = generateΦ(d₁, d₂, r, m)
Φ = FunctionOperator{dType}(name = "Φ", inDims = (d₁, d₂), outDims = (d₁, d₂),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)
y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (100, 100)
rank(Xᴳᵀ) = 19
rank(y) = 100


### Robust PCA with IRLS - Objective as it is in Christian's thesis

In [74]:
function RPCA_IRLS(
        Xᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        Lᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        Sᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Φ::FunctionOperator;                    # sampling operator (P_Omaga in the paper)
        img_size::NTuple = size(Xᴳᵀ),           # size of output matrix
        r̃::Int,                                 # rank estimate of low-rank part
        s̃::Int,                                 # sparsity estimate of sparse part
        λ::Real = 1,                            # regularization param. to balance sparsity and low-rankness
        μ::Real = 1e-3,                         # regularization param. for separation strictness
        δ::Real = 1e-3,                         # smoothing parameter for log (see eq. 5)
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                            # number of iterations
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = img_size
    maxIter = maxIter isa Nothing ? max(r̃*(r̃+d₁+d₂), s̃*(s̃+d₁+d₂)) : maxIter
    ϵ = Inf
    X₀ = Φ' * y # that's basically Pᵃ_Omega * P_Omega (M)
    Wₛ, Wₗ = I, I
    ΦᵃΦ = Φ' * Φ
    L, S = copy(X₀), rand(dType, size(X₀))
    X = L + S
    
    r, c, n, s, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), svdvals(L)[1], ϵ
    n, c, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", c), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
    verbose && println("k = 0,\trank(L) = $r,\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    
    for k in 1:N
        
        # Equation under (29)
        Aₗ = reshape((λ/μ*Wₛ + ΦᵃΦ) * Wₗ + λ * Wₛ * ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₗ = λ * Wₛ * X₀
        cg!(vec(L), Aₗ, vec(bₗ))#, maxiter = maxIter) # solve Aₗ⋅L = bₗ for L
        
        # Equation 30
        Aₛ = reshape(λ/μ*Wₛ + ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₛ = X₀ - ΦᵃΦ * L
        cg!(vec(S), Aₛ, vec(bₛ))#, maxiter = maxIter) # solve Aₛ⋅S = bₛ for S
        
        F = svd(L)
        U, σ, V = F.U, F.S, F.V
        
        # Equation 32
        Sₛ₊₁ = sort(abs.(vec(S)), rev=true)[s̃+1]
        println("\t\x1b[31m|S|₍ₛ₊₁₎/√(2λ) = ", @sprintf("%12.8g", Sₛ₊₁/sqrt(2λ)),
            ", σᵣ₊₁ = ", @sprintf("%12.8f", svdvals(L)[r̃+1]), "\x1b[0m")
        ϵ = min(max(Sₛ₊₁/sqrt(2λ), svdvals(L)[r̃+1]), ϵ)
        
        # Equation 18
        wₛ = [max(abs(S[i,j]), δ)^-2 for i in 1:d₁, j in 1:d₂]
        println(norm(wₛ))
        # Equation 24
        Wₛ = FunctionOperator{dType}(name = "Wₛ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> wₛ .* Z)
        # Equation 19
        σ̃ᵢ(i) = max(σ[i], ϵ)^-2
        # Equation between 20 and 21 (definition of H₁)
        # plus 15 with q = 0 (typo in def of H₁: mean of σ̃ᵢ and σ̃ᵢ ??)
        d = min(d₁, d₂)
        H₁ = [sqrt(σ̃ᵢ(i) * σ̃ᵢ(j)) for i in 1:d, j in 1:d]
        # Equation 25 (typo: second V⁽ᵏ⁾ should be adjoint)
        Wₗ = FunctionOperator{dType}(name = "Wₗ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> U * (H₁ .* (U' * Z * V)) * V')
        
        X = L + S

        # Print
        r, c, n, sig, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), σ[1], ϵ
        l, s = norm(Lᴳᵀ - L), norm(Sᴳᵀ - S)
        sig, e, c, n, l, s = @sprintf("%12.8f", sig), @sprintf("%12.8f", e),
            @sprintf("%7.3f", c), @sprintf("%7.3f", n), @sprintf("%7.3f", l), @sprintf("%7.3f", s)
        if verbose
            println("k = $k,\trank(L) = $r,\tσ₁(L) = $sig,\tϵ = $e,")
            println("\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\t‖Lᴳᵀ - L‖₂ = $l,\t‖Sᴳᵀ - S‖₂ = $s")
        end
    end
end

RPCA_IRLS (generic function with 2 methods)

In [76]:
@time RPCA_IRLS(Xᴳᵀ, Lᴳᵀ, Sᴳᵀ, y, Φ, N = 50, r̃ = r, s̃ = s, λ = 1, μ = Inf, δ = 1e-3, verbose = true);

k = 0,	rank(L) = 100,	‖y - Φ * X‖₂ =  42.614,	‖Xᴳᵀ - X‖₂ = 162.573,	σ₁ =  70.633,	ϵ =     Inf
	[31m|S|₍ₛ₊₁₎/√(2λ) =    3.1915505, σᵣ₊₁ =  12.00337546[0m
3.82019153659893e6
k = 1,	rank(L) = 100,	σ₁(L) =  35.31660566,	ϵ =  12.00337546,
	‖y - Φ * X‖₂ =   0.000,	‖Xᴳᵀ - X‖₂ = 156.888,	‖Lᴳᵀ - L‖₂ = 170.783,	‖Sᴳᵀ - S‖₂ =  90.071
	[31m|S|₍ₛ₊₁₎/√(2λ) =   0.70615925, σᵣ₊₁ =  23.29676036[0m
4.893557894944113e7
k = 2,	rank(L) = 100,	σ₁(L) =  69.45534116,	ϵ =  12.00337546,
	‖y - Φ * X‖₂ =   0.000,	‖Xᴳᵀ - X‖₂ = 156.888,	‖Lᴳᵀ - L‖₂ = 152.088,	‖Sᴳᵀ - S‖₂ =  39.344
	[31m|S|₍ₛ₊₁₎/√(2λ) =   0.70561428, σᵣ₊₁ =  23.98193126[0m
7.339397938533014e7
k = 3,	rank(L) = 100,	σ₁(L) =  70.62469194,	ϵ =  12.00337546,
	‖y - Φ * X‖₂ =   0.000,	‖Xᴳᵀ - X‖₂ = 156.888,	‖Lᴳᵀ - L‖₂ = 152.702,	‖Sᴳᵀ - S‖₂ =  38.906
	[31m|S|₍ₛ₊₁₎/√(2λ) =   0.70561428, σᵣ₊₁ =  24.00517353[0m
7.353453729703845e7
k = 4,	rank(L) = 100,	σ₁(L) =  70.63260867,	ϵ =  12.00337546,
	‖y - Φ * X‖₂ =   0.000,	‖Xᴳᵀ - X‖₂ = 156.888,	‖Lᴳᵀ - L‖₂ = 152.7

### Robust PCA with IRLS - "Plus epsilon" objective

In [38]:
function RPCA_IRLS_plus_epsilon(
        Xᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Φ::FunctionOperator;                    # sampling operator (P_Omaga in the paper)
        img_size::NTuple = size(Xᴳᵀ),           # size of output matrix
        r̃::Int,                                 # rank estimate of low-rank part
        s̃::Int,                                 # sparsity estimate of sparse part
        λ::Real = 1,                            # regularization param. to balance sparsity and low-rankness
        μ::Real = 1e-3,                         # regularization param. for separation strictness
        δ::Real = 1e-3,                         # smoothing parameter for log (see eq. 5)
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                            # number of iterations
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = img_size
    maxIter = maxIter isa Nothing ? max(r̃*(r̃+d₁+d₂), s̃*(s̃+d₁+d₂)) : maxIter
    ϵ = Inf
    X₀ = Φ' * y # that's basically Pᵃ_Omega * P_Omega (M)
    Wₛ, Wₗ = I, I
    ΦᵃΦ = Φ' * Φ
    L, S = copy(X₀), zeros(dType, size(X₀))
    X = L + S
    
    r, c, n, s, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), svdvals(L)[1], ϵ
    n, c, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", c), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
    verbose && println("k = 0,\trank(L) = $r,\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    
    for k in 1:N
        
        # Equation under (29)
        Aₗ = reshape((λ/μ*Wₛ + ΦᵃΦ) * Wₗ + λ * Wₛ * ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₗ = λ * Wₛ * X₀
        cg!(vec(L), Aₗ, vec(bₗ))#, maxiter = maxIter) # solve Aₗ⋅L = bₗ for L
        
        # Equation 30
        Aₛ = reshape(λ/μ*Wₛ + ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₛ = X₀ - ΦᵃΦ * L
        cg!(vec(S), Aₛ, vec(bₛ))#, maxiter = maxIter) # solve Aₛ⋅S = bₛ for S
        
        F = svd(L)
        U, σ, V = F.U, F.S, F.V
        
        # Equation 32
        Sₛ₊₁ = sort(abs.(vec(S)), rev=true)[s̃+1]
        println("\t\x1b[31m|S|₍ₛ₊₁₎/(2λ) = ", @sprintf("%7.3g", Sₛ₊₁/(2λ)),
            ", σᵣ₊₁ = ", @sprintf("%7.3f", σ[r̃+1]), "\x1b[0m")
        ϵ = min(max(Sₛ₊₁/(2λ), σ[r̃+1]), ϵ)
        
        # Equation 20
        wₛ = [1 / ((max(abs(S[i,j]), δ) + δ) * max(abs(S[i,j]), δ)) for i in 1:d₁, j in 1:d₂]
        # Equation 24
        Wₛ = FunctionOperator{dType}(name = "Wₛ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> wₛ .* Z)
        # Equation 21
        σ̃ᵢ(i) = 1 / ((max(σ[i], ϵ) + ϵ) * max(σ[i], ϵ))
        # Equation between 20 and 21 (definition of H₁)
        # plus 15 with q = 0 (typo in def of H₁: mean of σ̃ᵢ and σ̃ᵢ ??)
        d = min(d₁, d₂)
        H₁ = [sqrt(σ̃ᵢ(i) * σ̃ᵢ(j)) for i in 1:d, j in 1:d]
        # Equation 25 (typo: second V⁽ᵏ⁾ should be adjoint)
        Wₗ = FunctionOperator{dType}(name = "Wₗ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> U * (H₁ .* (U' * Z * V)) * V')
        
        X = L + S

        # Print
        r, c, n, s, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), σ[1], ϵ
        n, c, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", c), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
        verbose && println("k = $k,\trank(L) = $r,\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    end
end

RPCA_IRLS_plus_epsilon (generic function with 1 method)

In [39]:
@time RPCA_IRLS_plus_epsilon(Xᴳᵀ, y, Φ, N = 20, r̃ = r, s̃ = s, λ = 1, μ = 1, δ = 0.1, verbose = true);

k = 0,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.000,	‖Xᴳᵀ - X‖₂ = 120.816,	σ₁ =  43.166,	ϵ =     Inf
	[31m|S|₍ₛ₊₁₎/(2λ) =    1.15, σᵣ₊₁ =   4.682[0m
k = 1,	rank(L) = 40,	‖y - Φ * X‖₂ =  25.693,	‖Xᴳᵀ - X‖₂ = 123.518,	σ₁ =  14.389,	ϵ =   4.682
	[31m|S|₍ₛ₊₁₎/(2λ) =    0.14, σᵣ₊₁ =  31.437[0m
k = 2,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.609,	‖Xᴳᵀ - X‖₂ = 249.897,	σ₁ = 164.440,	ϵ =   4.682
	[31m|S|₍ₛ₊₁₎/(2λ) = 4.5e-05, σᵣ₊₁ = 140.792[0m
k = 3,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.026,	‖Xᴳᵀ - X‖₂ = 872.408,	σ₁ = 489.011,	ϵ =   4.682
	[31m|S|₍ₛ₊₁₎/(2λ) = 8.38e-06, σᵣ₊₁ =  90.585[0m
k = 4,	rank(L) = 39,	‖y - Φ * X‖₂ =   0.007,	‖Xᴳᵀ - X‖₂ = 525.047,	σ₁ = 308.655,	ϵ =   4.682
	[31m|S|₍ₛ₊₁₎/(2λ) = 1.15e-05, σᵣ₊₁ =  69.417[0m
k = 5,	rank(L) = 35,	‖y - Φ * X‖₂ =   0.009,	‖Xᴳᵀ - X‖₂ = 337.502,	σ₁ = 156.272,	ϵ =   4.682
	[31m|S|₍ₛ₊₁₎/(2λ) = 8.32e-06, σᵣ₊₁ =  84.258[0m
k = 6,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.007,	‖Xᴳᵀ - X‖₂ = 1066.271,	σ₁ = 971.150,	ϵ =   4.682
	[31m|S|₍ₛ₊₁₎/(2λ) = 7.83e-06, σᵣ₊₁ =  85.42

### Robust PCA with IRLS - "Plus epsilon squared" objective

In [40]:
function RPCA_IRLS_plus_epsilon_squared(
        Xᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Φ::FunctionOperator;                    # sampling operator (P_Omaga in the paper)
        img_size::NTuple = size(Xᴳᵀ),           # size of output matrix
        r̃::Int,                                 # rank estimate of low-rank part
        s̃::Int,                                 # sparsity estimate of sparse part
        λ::Real = 1,                            # regularization param. to balance sparsity and low-rankness
        μ::Real = 1e-3,                         # regularization param. for separation strictness
        δ::Real = 1e-3,                         # smoothing parameter for log (see eq. 5)
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                            # number of iterations
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = img_size
    maxIter = maxIter isa Nothing ? max(r̃*(r̃+d₁+d₂), s̃*(s̃+d₁+d₂)) : maxIter
    ϵ = Inf
    X₀ = Φ' * y # that's basically Pᵃ_Omega * P_Omega (M)
    Wₛ, Wₗ = I, I
    ΦᵃΦ = Φ' * Φ
    L, S = copy(X₀), zeros(dType, size(X₀))
    X = L + S
    
    r, c, n, s, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), svdvals(L)[1], ϵ
    n, c, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", c), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
    verbose && println("k = 0,\trank(L) = $r,\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    
    for k in 1:N
        
        # Equation under (29)
        Aₗ = reshape((λ/μ*Wₛ + ΦᵃΦ) * Wₗ + λ * Wₛ * ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₗ = λ * Wₛ * X₀
        cg!(vec(L), Aₗ, vec(bₗ), maxiter = maxIter) # solve Aₗ⋅L = bₗ for L
        
        # Equation 30
        Aₛ = reshape(λ/μ*Wₛ + ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₛ = X₀ - ΦᵃΦ * L
        cg!(vec(S), Aₛ, vec(bₛ), maxiter = maxIter) # solve Aₛ⋅S = bₛ for S
        
        F = svd(L)
        U, σ, V = F.U, F.S, F.V
        
        # Equation 32
        Sₛ₊₁ = sort(abs.(vec(S)), rev=true)[s̃+1]
        println("\t\x1b[31m|S|₍ₛ₊₁₎/(2λ) = ", @sprintf("%7.3g", Sₛ₊₁/(2λ)),
            ", σᵣ₊₁ = ", @sprintf("%7.3f", σ[r̃+1]), "\x1b[0m")
        ϵ = min(max(Sₛ₊₁/(2λ), σ[r̃+1]), ϵ)
        
        # Equation 22
        wₛ = [1 / (max(abs(S[i,j]), δ)^2 + δ^2) for i in 1:d₁, j in 1:d₂]
        # Equation 24
        Wₛ = FunctionOperator{dType}(name = "Wₛ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> wₛ .* Z)
        # Equation 23
        σ̃ᵢ(i) = 1 / (max(σ[i], ϵ)^2 + ϵ^2)
        # Equation between 22 and 23 (definition of H₁)
        # plus 15 with q = 0 (typo in def of H₁: mean of σ̃ᵢ and σ̃ᵢ ??)
        d = min(d₁, d₂)
        H₁ = [sqrt(σ̃ᵢ(i) * σ̃ᵢ(j)) for i in 1:d, j in 1:d]
        # Equation 25 (typo: second V⁽ᵏ⁾ should be adjoint)
        Wₗ = FunctionOperator{dType}(name = "Wₗ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> U * (H₁ .* (U' * Z * V)) * V')
        
        X = L + S

        # Print
        r, c, n, s, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), σ[1], ϵ
        n, c, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", c), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
        verbose && println("k = $k,\trank(L) = $r,\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    end
end

RPCA_IRLS_plus_epsilon_squared (generic function with 1 method)

In [41]:
@time RPCA_IRLS_plus_epsilon_squared(Xᴳᵀ, y, Φ, N = 20, r̃ = r, s̃ = s, λ = 1, μ = 0.5, δ = 0.5,
    verbose = true);

k = 0,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.000,	‖Xᴳᵀ - X‖₂ = 120.816,	σ₁ =  43.166,	ϵ =     Inf
	[31m|S|₍ₛ₊₁₎/(2λ) =   0.863, σᵣ₊₁ =   3.512[0m
k = 1,	rank(L) = 40,	‖y - Φ * X‖₂ =  38.540,	‖Xᴳᵀ - X‖₂ = 126.814,	σ₁ =  10.792,	ϵ =   3.512
	[31m|S|₍ₛ₊₁₎/(2λ) =   0.159, σᵣ₊₁ =  29.843[0m
k = 2,	rank(L) = 40,	‖y - Φ * X‖₂ =   2.380,	‖Xᴳᵀ - X‖₂ = 266.112,	σ₁ = 210.569,	ϵ =   3.512
	[31m|S|₍ₛ₊₁₎/(2λ) = 0.00104, σᵣ₊₁ = 161.417[0m
k = 3,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.063,	‖Xᴳᵀ - X‖₂ = 1062.786,	σ₁ = 720.538,	ϵ =   3.512
	[31m|S|₍ₛ₊₁₎/(2λ) = 0.000155, σᵣ₊₁ =  75.896[0m
k = 4,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.009,	‖Xᴳᵀ - X‖₂ = 446.000,	σ₁ = 243.409,	ϵ =   3.512
	[31m|S|₍ₛ₊₁₎/(2λ) = 0.000134, σᵣ₊₁ =  59.192[0m
k = 5,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.008,	‖Xᴳᵀ - X‖₂ = 329.649,	σ₁ = 180.848,	ϵ =   3.512
	[31m|S|₍ₛ₊₁₎/(2λ) = 0.000289, σᵣ₊₁ =  82.301[0m
k = 6,	rank(L) = 38,	‖y - Φ * X‖₂ =   0.018,	‖Xᴳᵀ - X‖₂ = 443.385,	σ₁ = 214.135,	ϵ =   3.512
	[31m|S|₍ₛ₊₁₎/(2λ) = 0.000207, σᵣ₊₁ =  75.33