In [1]:
using LinearAlgebra, ToeplitzMatrices, Random, IterativeSolvers, FunctionOperators,
    EllipsisNotation, Printf, MATLAB

### The algorithm itself

In [114]:
function HM_IRLS(
        X·¥≥·µÄ::AbstractArray,                   # ground truth for MSE evaluation
        y::AbstractArray,                     # under-sampled data
        Œ¶::FunctionOperator,                  # sampling operator
        Œ¶·¥π::AbstractArray;
        shape::NTuple = size(X·¥≥·µÄ),            # size of output matrix
        rÃÉ::Int = 0,                           # rank estimate of solution
        maxIter::Int = 3,                     # number of CG iteration steps
        N::Int = 10,                          # number of iterations
        verbose::Bool = false)                # print rank and loss value in each iteration
    
    dType = eltype(y)
    d‚ÇÅ, d‚ÇÇ = shape
    rÃÉ == 0 && (rÃÉ = rank(X·¥≥·µÄ))
    
    œµ·µè = Inf
    X·µè = Œ¶ * y
    œÉ = nothing # I just want to make it available outside of the loop
    
    for k in 1:N
"""
    2. Find best rank-(rÃÉ + 1) approximation of X·µè to obtain
        ùíØ·µ£(X·µè) = U·µè * diag(œÉ·µ¢·µè)·µ¢‚Çå‚ÇÅ ≥ * V·µè' and œÉ·µ£‚Çä‚ÇÅ·µè 
"""
        F = svd(X·µè)
        U·µè, œÉ, V·µè = F.U[:, 1:rÃÉ], F.S, F.V[:, 1:rÃÉ]
        
"""     update smoothing:                                 (2.91) """
        œµ·µè = min(œµ·µè, œÉ[rÃÉ+1])
        
        r, n, s, e = sum(svdvals(X·µè) .> 1e-3), norm‚ÇÇ(X·¥≥·µÄ - X·µè), œÉ[1], œµ·µè
        n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
        println("k = $(k-1),\trank(X·µè) = $r,\t‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = $n, œÉ‚ÇÅ = $s, œµ·µè = $e")
        
"""
    3. Update W·µè as in (2.57), using parameters œµ = œµ·µè and p in (2.58) and (2.59), and the
        information U·µè , V·µè and œÉ‚ÇÅ·µè, ..., œÉ·µ£‚Çä‚ÇÅ·µè from item 2.

        (Lines below are based on Remark 2.3.2, the special case for p = 0)
"""
        # H·µè = [1 / (max(œÉ[i], œµ·µè) * max(œÉ[j], œµ·µè))  for i in 1:rÃÉ+1, j in 1:rÃÉ+1]
        #W·µè = FunctionOperator{dType}(name = "W·µè", inDims = (d‚ÇÅ, d‚ÇÇ), outDims = (d‚ÇÅ, d‚ÇÇ),
        #    forw = Z -> U·µè * (H·µè .* (U·µè' * Z * V·µè)) * V·µè')
        
"""
    1. Use a conjugate gradient method to solve linearly constrained quadratic program
         X·µè = arg min‚Çì ‚ü®X,W·µè‚Åª¬π(X)‚ü© s.t. Œ¶(X) = y         (2.90)
"""
        
        # the upper-left (r √ó r) block of (d‚ÇÅ √ó d‚ÇÇ) H·µè matrix:
        H·µè·µ§·µ• = [1 / (max(œÉ[i], œµ·µè) * max(œÉ[j], œµ·µè))  for i in 1:rÃÉ, j in 1:rÃÉ]
        # the first column of H·µè·µ§·µ•‚üÇ:
        dH·µè = reshape([1 / (max(œÉ[rÃÉ+1], œµ·µè) * max(œÉ[j], œµ·µè))  for j in 1:rÃÉ], :, 1)
        P·µè = FunctionOperator{dType}(name="P·µè", inDims = (rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ),), outDims = (d‚ÇÅ, d‚ÇÇ),
            forw = Œ≥ -> begin
                    Œ≥‚ÇÅ = reshape(Œ≥[1:rÃÉ^2], rÃÉ, rÃÉ)
                    Œ≥‚ÇÇ = reshape(Œ≥[rÃÉ^2+1:rÃÉ*(rÃÉ+d‚ÇÅ)], d‚ÇÅ, rÃÉ)
                    Œ≥‚ÇÉ = reshape(Œ≥[rÃÉ*(rÃÉ+d‚ÇÅ)+1:rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ)], rÃÉ, d‚ÇÇ)
                    # According to (2.169), the equation would be:
                    # U·µè * Œ≥‚ÇÅ * V·µè' + U·µè * Œ≥‚ÇÇ' * (I - V·µè*V·µè') + (I - U·µè*U·µè') * Œ≥‚ÇÉ' * V·µè'
                    # But as the columns of Œ≥‚ÇÉ are orthogonal to the ones in U·µè,
                    # the rows of Œ≥‚ÇÇ are orthogonal to the columns of V·µè,
                    # the expression can be simplified:
                    (U·µè * Œ≥‚ÇÅ + Œ≥‚ÇÇ) * V·µè' + U·µè * Œ≥‚ÇÉ
                end,
            backw = Œ¶·µÉy -> begin
                    Œ≥‚ÇÅ = U·µè' * Œ¶·µÉy * V·µè
                    Œ≥‚ÇÇ = (I - U·µè*U·µè') * Œ¶·µÉy * V·µè
                    Œ≥‚ÇÉ = U·µè' * Œ¶·µÉy * (I - V·µè*V·µè')
                    vcat(vec(Œ≥‚ÇÅ), vec(Œ≥‚ÇÇ), vec(Œ≥‚ÇÉ))
                end)
        b = P·µè' * Œ¶' * y
        ùíü‚Åª¬π = I / Diagonal(vcat(vec(H·µè·µ§·µ•), vec(kron(dH·µè, ones(1, d‚ÇÅ))'), vec(kron(dH·µè, ones(1, d‚ÇÇ)))))
        CG_op = FunctionOperator{dType}(name = "CG_op", inDims = (rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ),), outDims = (rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ),),
            forw = Œ≥ ->  begin
                    (œµ·µè^2 * I / (ùíü‚Åª¬π - œµ·µè^2 * I)) * Œ≥ + P·µè' * Œ¶' * Œ¶ * P·µè * Œ≥
                end)
        Œ≥·µè = cg(CG_op, b, maxiter = maxIter) # 2.167
        r·µè = y - Œ¶ * P·µè * Œ≥·µè
        Œ≥·µè_tilde = (ùíü‚Åª¬π / (ùíü‚Åª¬π - œµ·µè^2 * I)) * Œ≥·µè - P·µè' * Œ¶' * r·µè
        X·µè = Œ¶' * r·µè + P·µè * Œ≥·µè_tilde   # 2.168
    end
    
    r, n, s, e = sum(svdvals(X·µè) .> 1e-3), norm‚ÇÇ(X·¥≥·µÄ - X·µè), œÉ[1], œµ·µè
    n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
    println("k = $N,\trank(X·µè) = $r,\t‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = $n, œÉ‚ÇÅ = $s, œµ·µè = $e")
    
    X·µè
end

HM_IRLS (generic function with 2 methods)

### Some helper functions

In [115]:
import Base.size
function Base.size(FO::FunctionOperator, d::Int)
    @assert d in [1, 2]
    prod(d == 1 ? FO.outDims : FO.inDims)
end

In [116]:
norm‚ÇÇ = (A) -> svdvals(A)[1]

#234 (generic function with 1 method)

In [117]:
# This function randomly samples a $(d‚ÇÅ \times d‚ÇÇ)$ sparse matrix with ones at $m$ randomly chosen
# coordinates (uniform without replacement). The output matrix has at least $r$ non-zero entries
# in each row and each column, where $r$ is a specified positive integer. The number of ones in the
# output matrix is exactly $m$.
function generateŒ¶(d‚ÇÅ, d‚ÇÇ, r, m)
    @assert max(d‚ÇÅ, d‚ÇÇ) * r ‚â§ m
    @assert m ‚â§ d‚ÇÅ * d‚ÇÇ
    @assert r ‚â§ d‚ÇÅ
    @assert r ‚â§ d‚ÇÇ
    
    # generate a square matrix where each row and each column has exactly r ones
    initial = Circulant([fill(1, r)..., fill(0, min(d‚ÇÅ, d‚ÇÇ) - r)...])
    
    # Extend that matrix to a d‚ÇÅ√ód‚ÇÇ matrix where each row and each column has at least r ones
    # That is accomplished by repeating the "initial" matrix and then cropping
    if d‚ÇÅ < d‚ÇÇ
        M = repeat(initial, outer = (1, ceil(Int, d‚ÇÇ / d‚ÇÅ)))
    elseif d‚ÇÅ > d‚ÇÇ
        M = repeat(initial, outer = (ceil(Int, d‚ÇÅ / d‚ÇÇ), 1))
    else
        M = initial
    end
    M = M[1:d‚ÇÅ, 1:d‚ÇÇ]
    
    # Randomly switch zeros to ones until exactly m number of ones are in the matrix
    zero_places = findall(M .== 0)
    number_of_missing_ones = m - (d‚ÇÅ*d‚ÇÇ - length(zero_places))
    number_of_missing_ones > 0 && (M[shuffle(zero_places)[1:number_of_missing_ones]] .= 1)
    
    # Then randomize matrix by permutating rows and columns a couple times
    for i in 1:10
        M .= M[shuffle(1:end), :] # shuffle rows
        M .= M[:, shuffle(1:end)] # shuffle columns
    end
    
    M
end

generateŒ¶ (generic function with 1 method)

In [118]:
function maskToMatrix(Œ¶·¥π)
    m = convert(Int, sum(Œ¶·¥π))
    d‚ÇÅ, d‚ÇÇ = size(Œ¶·¥π)

    Œ¶ = zeros(m, length(Œ¶·¥π))
    non_zero_places = findall(vec(Œ¶·¥π) .== 1)
    for i in 1:m
        Œ¶[i, non_zero_places[i]] = 1
    end
    return Œ¶
end

maskToMatrix (generic function with 1 method)

### Generate data

#### That's how Chirstian generated the data to compare algorithms:

In [119]:
d‚ÇÅ, d‚ÇÇ, r = 40, 40, 7
df_LR = r * (d‚ÇÅ + d‚ÇÇ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df_LR, d‚ÇÅ * d‚ÇÇ))

dType = ComplexF64
U, S, V = randn(dType, d‚ÇÅ, r), Diagonal(randn(r)), randn(dType, d‚ÇÇ, r)
X·¥≥·µÄ = U * S * V' # Ground Truth matrix

@show size(X·¥≥·µÄ)
@show rank(X·¥≥·µÄ);

Œ¶·¥π = generateŒ¶(d‚ÇÅ, d‚ÇÇ, r, m)
Œ¶ = FunctionOperator{dType}(name = "Œ¶", inDims = (d‚ÇÅ, d‚ÇÇ), outDims = (d‚ÇÅ, d‚ÇÇ),
    forw = (b,x) -> b .= Œ¶·¥π .* x, backw = (b,x) -> b .= x)
y = Œ¶ * X·¥≥·µÄ
@show rank(y);

size(X·¥≥·µÄ) = (40, 40)
rank(X·¥≥·µÄ) = 7
rank(y) = 40


In [120]:
Œ¶·¥π .* X·¥≥·µÄ == Œ¶ * X·¥≥·µÄ

true

In [121]:
@time HM_IRLS(X·¥≥·µÄ, y, Œ¶, maskToMatrix(Œ¶·¥π), maxIter = 1000, N = 60);

k = 0,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 44.486, œÉ‚ÇÅ = 22.911, œµ·µè = 11.187
k = 1,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 35.378, œÉ‚ÇÅ = 36.407, œµ·µè = 7.509
k = 2,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 26.843, œÉ‚ÇÅ = 48.331, œµ·µè = 4.962
k = 3,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 21.862, œÉ‚ÇÅ = 53.713, œµ·µè = 3.306
k = 4,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 18.263, œÉ‚ÇÅ = 56.239, œµ·µè = 2.367
k = 5,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 15.126, œÉ‚ÇÅ = 57.794, œµ·µè = 1.714
k = 6,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 12.406, œÉ‚ÇÅ = 58.860, œµ·µè = 1.186
k = 7,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 10.097, œÉ‚ÇÅ = 59.621, œµ·µè = 0.779
k = 8,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 8.099, œÉ‚ÇÅ = 60.140, œµ·µè = 0.499
k = 9,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 6.361, œÉ‚ÇÅ = 60.519, œµ·µè = 0.334
k = 10,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 4.942, œÉ‚ÇÅ = 60.855, œµ·µè = 0.245
k = 11,	rank(X·µè) = 40,	‚ÄñX·¥≥·

#### An easy problem:

In [69]:
d = 10
v = rand(d)
X·¥≥·µÄ = v * v'  # Ground Truth matrix
@show size(X·¥≥·µÄ)
@show rank(X·¥≥·µÄ)

# mask that erases 5 elements:
num_of_points_to_erase = 5
Œ¶·¥π = reshape(shuffle!([fill(0, num_of_points_to_erase)...,
            fill(1, d*d - num_of_points_to_erase)...]), d, d)
Œ¶ = FunctionOperator{Float64}(name = "Œ¶", inDims = (d, d), outDims = (d, d),
    forw = (b,x) -> b .= Œ¶·¥π .* x, backw = (b,x) -> b .= x)

y = Œ¶ * X·¥≥·µÄ
@show rank(y);

size(X·¥≥·µÄ) = (10, 10)
rank(X·¥≥·µÄ) = 1
rank(y) = 5


In [71]:
@time HM_IRLS(X·¥≥·µÄ, y, Œ¶, maxIter = 100, N = 10);

k = 0,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.553, œÉ‚ÇÅ = 2.966, œµ·µè = 0.459
k = 1,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.944, œÉ‚ÇÅ = 2.891, œµ·µè = 0.459
k = 2,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.743, œÉ‚ÇÅ = 2.921, œµ·µè = 0.459
k = 3,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.830, œÉ‚ÇÅ = 2.908, œµ·µè = 0.459
k = 4,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.789, œÉ‚ÇÅ = 2.914, œµ·µè = 0.459
k = 5,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.808, œÉ‚ÇÅ = 2.912, œµ·µè = 0.459
k = 6,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.799, œÉ‚ÇÅ = 2.913, œµ·µè = 0.459
k = 7,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.803, œÉ‚ÇÅ = 2.912, œµ·µè = 0.459
k = 8,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.801, œÉ‚ÇÅ = 2.912, œµ·µè = 0.459
k = 9,	rank(X·µè) = 5,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 0.802, œÉ‚ÇÅ = 2.912, œµ·µè = 0.459
  0.023148 seconds (47.02 k allocations: 3.222 MiB)
