In [17]:
using LinearAlgebra, ToeplitzMatrices, Random, IterativeSolvers, FunctionOperators, EllipsisNotation

### The algorithm itself

In [93]:
function HM_IRLS(
        Xᴳᵀ::AbstractArray,                   # ground truth for MSE evaluation
        y::AbstractArray,                     # under-sampled data
        Φ::FunctionOperator;                  # sampling operator
        shape::NTuple = size(Xᴳᵀ),            # size of output matrix
        r̃::Int = 0,                           # rank estimate of solution
        maxIter::Int = 3,                     # number of CG iteration steps
        N::Int = 10,                          # number of iterations
        verbose::Bool = false)                # print rank and loss value in each iteration
    
    dType = eltype(y)
    d₁, d₂ = shape
    
    X⁰ = Φ' * y
    Wᵏ = I
    b = [y; zeros(dType, (d₁, d₂))]
    ϵᵏ = Inf
    Xᵏ = copy(X⁰)
    
    println("k = 0,\trank(Xᵏ) = $(rank(Xᵏ)),\t‖Xᴳᵀ - Xᵏ‖₂ = $(norm(Xᴳᵀ - Xᵏ))")
    
    for k in 1:N
        
"""
    1. Use a conjugate gradient method to solve linearly constrained quadratic program
         Xᵏ = arg minₓ ⟨X,Wᵏ⁻¹(X)⟩ s.t. Φ(X) = y         (2.90)
"""
        CG_op = FunctionOperator{dType}(name = "CG_op", inDims = (d₁*d₂*2,), outDims = (d₁*d₂*2,),
            forw = x_ext -> begin
                x = reshape(x_ext, 2, d₁, d₂)[1, ..]
                vec([Φ'Φ * x; Wᵏ * x])
            end)
        Xᵏ_ext = [Xᵏ; zeros(dType, (d₁, d₂))]
        bicgstabl!(vec(Xᵏ_ext), CG_op, vec(b), max_mv_products = maxIter) # stabilized biconjugate gradients
        Xᵏ = Xᵏ_ext[1:end÷2, :] # crop the the part we need
        
"""
    2. Find best rank-(r̃ + 1) approximation of Xᵏ to obtain
        𝒯ᵣ(Xᵏ) = Uᵏ * diag(σᵢᵏ)ᵢ₌₁ʳ * Vᵏ' and σᵣ₊₁ᵏ 
"""
        F = svd(Xᵏ)
        Uᵏ, σ, Vᵏ = F.U[:, 1:r̃+1], F.S, F.V[:, 1:r̃+1]
"""     update smoothing:                                 (2.91) """
        ϵᵏ = min(ϵᵏ, σ[r̃+1])
        
"""
    3. Update Wᵏ as in (2.57), using parameters ϵ = ϵᵏ and p in (2.58) and (2.59), and the
        information Uᵏ , Vᵏ and σ₁ᵏ, ..., σᵣ₊₁ᵏ from item 2.

        (Lines below are based on Remark 2.3.2, the special case for p = 0)
"""
        Hᵏ = [1 / (max(σ[i], ϵᵏ) * max(σ[j], ϵᵏ))  for i in 1:r̃+1, j in 1:r̃+1] # 
        Wᵏ = FunctionOperator{dType}(name = "Wᵏ", inDims = (d₁, d₂), outDims = (d₁, d₂),
            forw = Z -> Uᵏ * (Hᵏ .* (Uᵏ' * Z * Vᵏ)) * Vᵏ')
        
        println("k = $k,\trank(Xᵏ) = $(rank(Xᵏ)),\t‖Xᴳᵀ - Xᵏ‖₂ = $(norm(Xᴳᵀ - Xᵏ))")
    end
    
    Xᵏ
end

HM_IRLS (generic function with 2 methods)

### Some helper functions

In [34]:
import Base.size
function Base.size(FO::FunctionOperator, d::Int)
    @assert d in [1, 2]
    prod(d == 1 ? FO.outDims : FO.inDims)
end

In [9]:
# This function randomly samples a $(d₁ \times d₂)$ sparse matrix with ones at $m$ randomly chosen
# coordinates (uniform without replacement). The output matrix has at least $r$ non-zero entries
# in each row and each column, where $r$ is a specified positive integer. The number of ones in the
# output matrix is exactly $m$.
function generateΦ(d₁, d₂, r, m)
    @assert max(d₁, d₂) * r ≤ m
    @assert m ≤ d₁ * d₂
    @assert r ≤ d₁
    @assert r ≤ d₂
    
    # generate a square matrix where each row and each column has exactly r ones
    initial = Circulant([fill(1, r)..., fill(0, min(d₁, d₂) - r)...])
    
    # Extend that matrix to a d₁×d₂ matrix where each row and each column has at least r ones
    # That is accomplished by repeating the "initial" matrix and then cropping
    if d₁ < d₂
        M = repeat(initial, outer = (1, ceil(Int, d₂ / d₁)))
    elseif d₁ > d₂
        M = repeat(initial, outer = (ceil(Int, d₁ / d₂), 1))
    else
        M = initial
    end
    M = M[1:d₁, 1:d₂]
    
    # Randomly switch zeros to ones until exactly m number of ones are in the matrix
    zero_places = findall(M .== 0)
    number_of_missing_ones = m - (d₁*d₂ - length(zero_places))
    number_of_missing_ones > 0 && (M[shuffle(zero_places)[1:number_of_missing_ones]] .= 1)
    
    # Then randomize matrix by permutating rows and columns a couple times
    for i in 1:10
        M .= M[shuffle(1:end), :] # shuffle rows
        M .= M[:, shuffle(1:end)] # shuffle columns
    end
    
    M
end

generateΦ (generic function with 1 method)

### Generate data

#### That's how Chirstian generated the data to compare algorithms:

In [91]:
d₁, d₂, r = 100, 100, 7
df_LR = r * (d₁ + d₂ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df_LR, d₁ * d₂))

dType = ComplexF64
U, S, V = randn(dType, d₁, r), Diagonal(randn(r)), randn(dType, d₂, r)
Xᴳᵀ = U * S * V' # Ground Truth matrix

@show size(Xᴳᵀ)
@show rank(Xᴳᵀ);

Φᴹ = generateΦ(d₁, d₂, r, m)
Φ = FunctionOperator{dType}(name = "Φ", inDims = (d₁, d₂), outDims = (d₁, d₂),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)
y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (100, 100)
rank(Xᴳᵀ) = 7
rank(y) = 100


In [94]:
@time HM_IRLS(Xᴳᵀ, y, Φ, N = 10);

k = 0,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 176.8729774834078
k = 1,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 185.564976608905
k = 2,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 632.2462503242906
k = 3,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 1229.4714490421254
k = 4,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 1644.5586340854968
k = 5,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 1826.7171304714955
k = 6,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 2167.988608432281
k = 7,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 2305.884006078355
k = 8,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 3032.853767438843
k = 9,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 3111.2634495222196
k = 10,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 3312.686043584227
  0.485198 seconds (543.03 k allocations: 145.019 MiB, 8.62% gc time)


#### An easy problem:

In [103]:
d = 10
v = rand(d)
Xᴳᵀ = v * v'  # Ground Truth matrix
@show size(Xᴳᵀ)
@show rank(Xᴳᵀ)

# mask that erases 5 elements:
num_of_points_to_erase = 5
Φᴹ = reshape(shuffle!([fill(0, num_of_points_to_erase)...,
            fill(1, d*d - num_of_points_to_erase)...]), d, d)
Φ = FunctionOperator{Float64}(name = "Φ", inDims = (d, d), outDims = (d, d),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)

y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (10, 10)
rank(Xᴳᵀ) = 1
rank(y) = 4


In [104]:
@time HM_IRLS(Xᴳᵀ, y, Φ, N = 10);

k = 0,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 0.55469945286125
k = 1,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 5.020881565879722
k = 2,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 5.030105996305296
k = 3,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 6.3359699325930965
k = 4,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 77.82130976070576
k = 5,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 133.4250873941241
k = 6,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 183.80576126779502
k = 7,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 254.0177948525734
k = 8,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 293.25797761437633
k = 9,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 354.2763504623122
k = 10,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 413.53799784140466
  0.043236 seconds (34.08 k allocations: 2.203 MiB)
