In [1]:
using LinearAlgebra, ToeplitzMatrices, Random, IterativeSolvers, FunctionOperators,
    EllipsisNotation, Printf, BenchmarkTools
Random.seed!(1)
include("helper_functions.jl");

### IRLS for robust principal component analysis

In [204]:
function RPCA_IRLS(
        Xᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Φ::FunctionOperator;                    # sampling operator (P_Omaga in the paper)
        img_size::NTuple = size(Xᴳᵀ),           # size of output matrix
        r̃::Int,                                 # rank estimate of low-rank part
        s̃::Int,                                 # sparsity estimate of sparse part
        λ::Real = 1,                            # regularization param. to balance sparsity and low-rankness
        μ::Real = 1e-3,                         # regularization param. for separation strictness
        δ::Real = 1e-3,                         # smoothing parameter for log (see eq. 5)
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                            # number of iterations
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = img_size
    maxIter = maxIter isa Nothing ? max(r̃*(r̃+d₁+d₂), s̃*(s̃+d₁+d₂)) : maxIter
    ϵ = Inf
    X₀ = Φ' * y # that's basically Pᵃ_Omega * P_Omega (M)
    Wₛ, Wₗ = I, I
    ΦᵃΦ = Φ' * Φ
    L, S = copy(X₀), zeros(dType, size(X₀))
    X = L + S
    
    r, c, n, s, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), svdvals(L)[1], ϵ
    n, c, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", c), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
    verbose && println("k = 0,\trank(L) = $r,\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    
    for k in 1:N
        
        # Equation under (29)
        Aₗ = reshape((λ/μ*Wₛ + ΦᵃΦ) * Wₗ + λ * Wₛ * ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₗ = λ * Wₛ * X₀
        cg!(vec(L), Aₗ, vec(bₗ), maxiter = maxIter) # solve Aₗ⋅L = bₗ for L
        
        # Equation 30
        Aₛ = reshape(λ/μ*Wₛ + ΦᵃΦ, inDims=(d₁*d₂,), outDims=(d₁*d₂,))
        bₛ = X₀ - ΦᵃΦ * L
        cg!(vec(S), Aₛ, vec(bₛ), maxiter = maxIter) # solve Aₛ⋅S = bₛ for S
        
        F = svd(L)
        U, σ, V = F.U, F.S, F.V
        
        # Equation 32
        Sₛ₊₁ = sort(abs.(vec(S)), rev=true)[s̃+1]
        println("\t\x1b[31m|S|₍ₛ₊₁₎/(2λ) = ", @sprintf("%7.3g", Sₛ₊₁/(2λ)),
            ", σᵣ₊₁ = ", @sprintf("%7.3f", σ[r̃+1]), "\x1b[0m")
        ϵ = min(max(Sₛ₊₁/(2λ), σ[r̃+1]), ϵ)
        
        # Equation 22
        wₛ = [1 / max(abs(S[i,j]), δ)^2 + δ^2 for i in 1:d₁, j in 1:d₂]
        # Equation 24
        Wₛ = FunctionOperator{dType}(name = "Wₛ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> wₛ .* Z)
        # Equation 23 and 15 with q = 0 (typo in def of H₁: mean of σ̃ᵢ and σ̃ᵢ ??)
        σ̃ᵢ(i) = 1 / (max(σ[i], ϵ) + ϵ^2)
        d = min(d₁, d₂)
        H₁ = [sqrt(σ̃ᵢ(i) * σ̃ᵢ(j)) for i in 1:d, j in 1:d]
        # Equation 25 (typo: second V⁽ᵏ⁾ should be adjoint)
        Wₗ = FunctionOperator{dType}(name = "Wₗ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> U * (H₁ .* (U' * Z * V)) * V')
        
        X = L + S

        # Print
        r, c, n, s, e = rank(L, atol = 1e-3), norm(y - Φ * X), norm(Xᴳᵀ - X), σ[1], ϵ
        n, c, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", c), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
        verbose && println("k = $k,\trank(L) = $r,\t‖y - Φ * X‖₂ = $c,\t‖Xᴳᵀ - X‖₂ = $n,\tσ₁ = $s,\tϵ = $e")
    end
end

RPCA_IRLS (generic function with 2 methods)

In [205]:
@time RPCA_IRLS(Xᴳᵀ, y, Φ, N = 50, r̃ = r, s̃ = s, λ = 2, μ = 1e-3, δ = 1e-5, verbose = true);

k = 0,	rank(L) = 40,	‖y - Φ * X‖₂ =   0.000,	‖Xᴳᵀ - X‖₂ = 125.133,	σ₁ =  28.903,	ϵ =     Inf
	[31m|S|₍ₛ₊₁₎/(2λ) = 0.000765, σᵣ₊₁ =   0.016[0m
k = 1,	rank(L) = 40,	‖y - Φ * X‖₂ =  78.646,	‖Xᴳᵀ - X‖₂ = 147.795,	σ₁ =   0.029,	ϵ =   0.016
	[31m|S|₍ₛ₊₁₎/(2λ) = 7.16e-09, σᵣ₊₁ =   0.141[0m
k = 2,	rank(L) = 31,	‖y - Φ * X‖₂ =  78.762,	‖Xᴳᵀ - X‖₂ = 147.859,	σ₁ =   0.318,	ϵ =   0.016
	[31m|S|₍ₛ₊₁₎/(2λ) = 1.69e-14, σᵣ₊₁ = 108.700[0m
k = 3,	rank(L) = 20,	‖y - Φ * X‖₂ =  10.521,	‖Xᴳᵀ - X‖₂ = 554.022,	σ₁ = 248.239,	ϵ =   0.016
	[31m|S|₍ₛ₊₁₎/(2λ) = 9.16e-15, σᵣ₊₁ =  56.141[0m
k = 4,	rank(L) = 30,	‖y - Φ * X‖₂ =   5.145,	‖Xᴳᵀ - X‖₂ = 277.919,	σ₁ = 131.074,	ϵ =   0.016
	[31m|S|₍ₛ₊₁₎/(2λ) = 2.41e-14, σᵣ₊₁ = 101.949[0m
k = 5,	rank(L) = 39,	‖y - Φ * X‖₂ =  17.590,	‖Xᴳᵀ - X‖₂ = 448.184,	σ₁ = 175.618,	ϵ =   0.016
	[31m|S|₍ₛ₊₁₎/(2λ) = 2.33e-14, σᵣ₊₁ =  69.889[0m
k = 6,	rank(L) = 40,	‖y - Φ * X‖₂ =  16.926,	‖Xᴳᵀ - X‖₂ = 355.214,	σ₁ = 137.865,	ϵ =   0.016
	[31m|S|₍ₛ₊₁₎/(2λ) = 1.25e-14, σᵣ₊₁ =  43.

### Some helper functions

In [10]:
import Base.size
function Base.size(FO::FunctionOperator, d::Int)
    @assert d in [1, 2]
    prod(d == 1 ? FO.outDims : FO.inDims)
end

In [11]:
# This function randomly samples a $(d₁ \times d₂)$ sparse matrix with ones at $m$ randomly chosen
# coordinates (uniform without replacement). The output matrix has at least $r$ non-zero entries
# in each row and each column, where $r$ is a specified positive integer. The number of ones in the
# output matrix is exactly $m$.
function generateΦ(d₁, d₂, r, m)
    @assert max(d₁, d₂) * r ≤ m
    @assert m ≤ d₁ * d₂
    @assert r ≤ d₁
    @assert r ≤ d₂
    
    # generate a square matrix where each row and each column has exactly r ones
    initial = Circulant([fill(1, r)..., fill(0, min(d₁, d₂) - r)...])
    
    # Extend that matrix to a d₁×d₂ matrix where each row and each column has at least r ones
    # That is accomplished by repeating the "initial" matrix and then cropping
    if d₁ < d₂
        M = repeat(initial, outer = (1, ceil(Int, d₂ / d₁)))
    elseif d₁ > d₂
        M = repeat(initial, outer = (ceil(Int, d₁ / d₂), 1))
    else
        M = initial
    end
    M = M[1:d₁, 1:d₂]
    
    # Randomly switch zeros to ones until exactly m number of ones are in the matrix
    zero_places = findall(M .== 0)
    number_of_missing_ones = m - (d₁*d₂ - length(zero_places))
    number_of_missing_ones > 0 && (M[shuffle(zero_places)[1:number_of_missing_ones]] .= 1)
    
    # Then randomize matrix by permutating rows and columns a couple times
    for i in 1:10
        M .= M[shuffle(1:end), :] # shuffle rows
        M .= M[:, shuffle(1:end)] # shuffle columns
    end
    
    M
end

generateΦ (generic function with 1 method)

In [12]:
function maskToMatrix(Φᴹ)
    m = convert(Int, sum(Φᴹ))
    d₁, d₂ = size(Φᴹ)

    Φ = zeros(m, length(Φᴹ))
    non_zero_places = findall(vec(Φᴹ) .== 1)
    for i in 1:m
        Φ[i, non_zero_places[i]] = 1
    end
    return Φ
end

maskToMatrix (generic function with 1 method)

### Generate data

#### That's how Chirstian generated the data to compare algorithms:

In [127]:
d₁, d₂, r, s = 60, 40, 7, 15
df_LR = r * (d₁ + d₂ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df_LR, d₁ * d₂))

dType = ComplexF64
U, Σ, V = randn(dType, d₁, r), Diagonal(randn(r)), randn(dType, d₂, r)
S = zeros(d₁, d₂)
S[randperm(d₁*d₂)[1:s]] = rand(s)
Xᴳᵀ = U * Σ * V' + S # Ground Truth matrix

@show size(Xᴳᵀ)
@show rank(Xᴳᵀ);

Φᴹ = generateΦ(d₁, d₂, r, m)
Φ = FunctionOperator{dType}(name = "Φ", inDims = (d₁, d₂), outDims = (d₁, d₂),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)
y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (60, 40)
rank(Xᴳᵀ) = 21
rank(y) = 40


In [128]:
Φᴹ .* Xᴳᵀ == Φ * Xᴳᵀ

true

In [137]:
#Φ = maskToMatrix(Φᴹ)
#y = Φ * vec(Xᴳᵀ)
@time RPCA_IRLS(Xᴳᵀ, y, Φ, N = 70, r̃ = r, s̃ = s, λ = 1, μ = 5e-4, δ = 1e-5, verbose = true);

k = 70,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 125.133, σ₁ = 28.903, ϵᵏ = Inf
k = 1,	rank(L) = 39,	‖Xᴳᵀ - X‖₂ = 147.816, σ₁ = 0.014, ϵ = 0.008
k = 2,	rank(L) = 22,	‖Xᴳᵀ - X‖₂ = 147.858, σ₁ = 0.178, ϵ = 0.008
k = 3,	rank(L) = 20,	‖Xᴳᵀ - X‖₂ = 596.994, σ₁ = 308.556, ϵ = 0.008
k = 4,	rank(L) = 33,	‖Xᴳᵀ - X‖₂ = 365.009, σ₁ = 202.247, ϵ = 0.008
k = 5,	rank(L) = 39,	‖Xᴳᵀ - X‖₂ = 429.478, σ₁ = 167.384, ϵ = 0.008
k = 6,	rank(L) = 39,	‖Xᴳᵀ - X‖₂ = 342.315, σ₁ = 131.841, ϵ = 0.008
k = 7,	rank(L) = 39,	‖Xᴳᵀ - X‖₂ = 214.556, σ₁ = 88.125, ϵ = 0.008
k = 8,	rank(L) = 38,	‖Xᴳᵀ - X‖₂ = 344.309, σ₁ = 143.167, ϵ = 0.008
k = 9,	rank(L) = 39,	‖Xᴳᵀ - X‖₂ = 337.265, σ₁ = 122.195, ϵ = 0.008
k = 10,	rank(L) = 38,	‖Xᴳᵀ - X‖₂ = 235.070, σ₁ = 94.327, ϵ = 0.008
k = 11,	rank(L) = 38,	‖Xᴳᵀ - X‖₂ = 351.344, σ₁ = 145.260, ϵ = 0.008
k = 12,	rank(L) = 39,	‖Xᴳᵀ - X‖₂ = 321.393, σ₁ = 119.487, ϵ = 0.008
k = 13,	rank(L) = 38,	‖Xᴳᵀ - X‖₂ = 245.166, σ₁ = 93.440, ϵ = 0.008
k = 14,	rank(L) = 37,	‖Xᴳᵀ - X‖₂ = 314.612, σ₁ = 115.901, ϵ = 0.

InterruptException: InterruptException:

#### An easy problem:

In [16]:
d = 10
v = rand(d)
Xᴳᵀ = v * v'  # Ground Truth matrix
@show size(Xᴳᵀ)
@show rank(Xᴳᵀ)

# mask that erases 5 elements:
num_of_points_to_erase = 5
Φᴹ = reshape(shuffle!([fill(0, num_of_points_to_erase)...,
            fill(1, d*d - num_of_points_to_erase)...]), d, d)
Φ = FunctionOperator{Float64}(name = "Φ", inDims = (d, d), outDims = (d, d),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)

y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (10, 10)
rank(Xᴳᵀ) = 1
rank(y) = 6


In [17]:
@time HM_IRLS(Xᴳᵀ, y, Φ, N = 10, verbose = true);

k = 0,	rank(Xᵏ) = 6,	‖Xᴳᵀ - Xᵏ‖₂ = 0.730, σ₁ = 4.028, ϵᵏ = 0.693
k = 1,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.062, σ₁ = 4.252, ϵᵏ = 0.057
k = 2,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 4.272, ϵᵏ = 0.000
k = 3,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
k = 4,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
k = 5,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
k = 6,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
k = 7,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
k = 8,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
k = 9,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
k = 10,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.000, σ₁ = 4.272, ϵᵏ = 0.000
  3.282879 seconds (6.87 M allocations: 338.124 MiB, 3.60% gc time)
