In [51]:
using LinearAlgebra, ToeplitzMatrices, Random, IterativeSolvers, FunctionOperators,
    EllipsisNotation, Printf

### The algorithm itself

In [85]:
function HM_IRLS(
        Xᴳᵀ::AbstractArray,                   # ground truth for MSE evaluation
        y::AbstractArray,                     # under-sampled data
        Φ::FunctionOperator;                  # sampling operator
        shape::NTuple = size(Xᴳᵀ),            # size of output matrix
        r̃::Int = 0,                           # rank estimate of solution
        maxIter::Int = 3,                     # number of CG iteration steps
        N::Int = 10,                          # number of iterations
        verbose::Bool = false)                # print rank and loss value in each iteration
    
    dType = eltype(y)
    d₁, d₂ = shape
    r̃ == 0 && (r̃ = rank(Xᴳᵀ))
    
    ϵᵏ = Inf
    Xᵏ = Φ' * y
    
    for k in 1:N
"""
    2. Find best rank-(r̃ + 1) approximation of Xᵏ to obtain
        𝒯ᵣ(Xᵏ) = Uᵏ * diag(σᵢᵏ)ᵢ₌₁ʳ * Vᵏ' and σᵣ₊₁ᵏ 
"""
        F = svd(Xᵏ)
        Uᵏ, σ, Vᵏ = F.U[:, 1:r̃], F.S, F.V[:, 1:r̃]
        
"""     update smoothing:                                 (2.91) """
        ϵᵏ = min(ϵᵏ, σ[r̃+1])
        
        r, n, s, e = rank(Xᵏ), norm₂(Xᴳᵀ - Xᵏ), σ[1], ϵᵏ
        n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
        println("k = $(k-1),\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
        
"""
    3. Update Wᵏ as in (2.57), using parameters ϵ = ϵᵏ and p in (2.58) and (2.59), and the
        information Uᵏ , Vᵏ and σ₁ᵏ, ..., σᵣ₊₁ᵏ from item 2.

        (Lines below are based on Remark 2.3.2, the special case for p = 0)
"""
        # Hᵏ = [1 / (max(σ[i], ϵᵏ) * max(σ[j], ϵᵏ))  for i in 1:r̃+1, j in 1:r̃+1]
        #Wᵏ = FunctionOperator{dType}(name = "Wᵏ", inDims = (d₁, d₂), outDims = (d₁, d₂),
        #    forw = Z -> Uᵏ * (Hᵏ .* (Uᵏ' * Z * Vᵏ)) * Vᵏ')
        
"""
    1. Use a conjugate gradient method to solve linearly constrained quadratic program
         Xᵏ = arg minₓ ⟨X,Wᵏ⁻¹(X)⟩ s.t. Φ(X) = y         (2.90)
"""
        
        Hᵏᵤᵥ = [1 / (max(σ[i], ϵᵏ) * max(σ[j], ϵᵏ))  for i in 1:r̃, j in 1:r̃] # the upper-left (r × r) block of (d₁ × d₂) Hᵏ matrix
        dHᵏ = reshape([1 / (max(σ[r̃+1], ϵᵏ) * max(σ[j], ϵᵏ))  for j in 1:r̃], :, 1) # the first column of Hᵏᵤᵥ⟂
        Pᵏ = FunctionOperator{dType}(name="Pᵏ", inDims = (r̃*(r̃+d₁+d₂),), outDims = (d₁, d₂),
            forw = γ -> begin
                    γ₁ = reshape(γ[1:r̃^2], r̃, r̃)
                    γ₂ = reshape(γ[r̃^2+1:r̃*(r̃+d₁)], d₁, r̃)
                    γ₃ = reshape(γ[r̃*(r̃+d₁)+1:r̃*(r̃+d₁+d₂)], r̃, d₂)
                    (Uᵏ * γ₁ + γ₂) * Vᵏ' + Uᵏ * γ₃
                end,
            backw = Φᵃy -> begin
                    γ₁ = Uᵏ' * Φᵃy * Vᵏ
                    γ₂ = (I - Uᵏ*Uᵏ') * Φᵃy * Vᵏ
                    γ₃ = Uᵏ' * Φᵃy * (I - Vᵏ*Vᵏ')
                    vcat(vec(γ₁), vec(γ₂), vec(γ₃)) # is it ok to transpose γ₂ ??
                end)
        b = Pᵏ' * Φ' * y
        𝒟⁻¹ = Diagonal(vcat(vec(Hᵏᵤᵥ), vec(kron(dHᵏ, ones(1, d₁))'), vec(kron(dHᵏ, ones(1, d₂)))))
        CG_op = FunctionOperator{dType}(name = "CG_op", inDims = (r̃*(r̃+d₁+d₂),), outDims = (r̃*(r̃+d₁+d₂),),
            forw = γ ->  begin
                    (ϵᵏ^2 * I / (𝒟⁻¹ - ϵᵏ^2 * I)) * γ + Pᵏ' * Φ' * Φ * Pᵏ * γ
                end)
        γᵏ = cg(CG_op, b, maxiter = maxIter) # 2.167
        rᵏ = y - Φ * Pᵏ * γᵏ
        γᵏ_tilde = (𝒟⁻¹ / (𝒟⁻¹ - ϵᵏ^2 * I)) * γᵏ - Pᵏ' * Φ' * rᵏ
        Xᵏ = Φ' * rᵏ + Pᵏ * γᵏ_tilde   # 2.168
    end
    
    #println("k = $maxIter,\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
    
    Xᵏ
end

HM_IRLS (generic function with 1 method)

### Some helper functions

In [74]:
import Base.size
function Base.size(FO::FunctionOperator, d::Int)
    @assert d in [1, 2]
    prod(d == 1 ? FO.outDims : FO.inDims)
end

In [75]:
norm₂ = (A) -> svdvals(A)[1]

#310 (generic function with 1 method)

In [76]:
# This function randomly samples a $(d₁ \times d₂)$ sparse matrix with ones at $m$ randomly chosen
# coordinates (uniform without replacement). The output matrix has at least $r$ non-zero entries
# in each row and each column, where $r$ is a specified positive integer. The number of ones in the
# output matrix is exactly $m$.
function generateΦ(d₁, d₂, r, m)
    @assert max(d₁, d₂) * r ≤ m
    @assert m ≤ d₁ * d₂
    @assert r ≤ d₁
    @assert r ≤ d₂
    
    # generate a square matrix where each row and each column has exactly r ones
    initial = Circulant([fill(1, r)..., fill(0, min(d₁, d₂) - r)...])
    
    # Extend that matrix to a d₁×d₂ matrix where each row and each column has at least r ones
    # That is accomplished by repeating the "initial" matrix and then cropping
    if d₁ < d₂
        M = repeat(initial, outer = (1, ceil(Int, d₂ / d₁)))
    elseif d₁ > d₂
        M = repeat(initial, outer = (ceil(Int, d₁ / d₂), 1))
    else
        M = initial
    end
    M = M[1:d₁, 1:d₂]
    
    # Randomly switch zeros to ones until exactly m number of ones are in the matrix
    zero_places = findall(M .== 0)
    number_of_missing_ones = m - (d₁*d₂ - length(zero_places))
    number_of_missing_ones > 0 && (M[shuffle(zero_places)[1:number_of_missing_ones]] .= 1)
    
    # Then randomize matrix by permutating rows and columns a couple times
    for i in 1:10
        M .= M[shuffle(1:end), :] # shuffle rows
        M .= M[:, shuffle(1:end)] # shuffle columns
    end
    
    M
end

generateΦ (generic function with 1 method)

In [77]:
function maskToMatrix(Φᴹ)
    m = convert(Int, sum(Φᴹ))
    d₁, d₂ = size(Φᴹ)

    Φ = zeros(m, length(Φᴹ))
    non_zero_places = findall(vec(Φᴹ) .== 1)
    for i in 1:m
        Φ[i, non_zero_places[i]] = 1
    end
end

maskToMatrix (generic function with 1 method)

### Generate data

#### That's how Chirstian generated the data to compare algorithms:

In [78]:
d₁, d₂, r = 100, 100, 7
df_LR = r * (d₁ + d₂ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df_LR, d₁ * d₂))

dType = ComplexF64
U, S, V = randn(dType, d₁, r), Diagonal(randn(r)), randn(dType, d₂, r)
Xᴳᵀ = U * S * V' # Ground Truth matrix

@show size(Xᴳᵀ)
@show rank(Xᴳᵀ);

Φᴹ = generateΦ(d₁, d₂, r, m)
Φ = FunctionOperator{dType}(name = "Φ", inDims = (d₁, d₂), outDims = (d₁, d₂),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)
y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (100, 100)
rank(Xᴳᵀ) = 7
rank(y) = 100


In [79]:
Φᴹ .* Xᴳᵀ == Φ * Xᴳᵀ

true

In [87]:
@time HM_IRLS(Xᴳᵀ, y, Φ, maxIter = 100, N = 20);

k = 0,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 113.320, σ₁ = 22.528, ϵᵏ = 16.717
k = 1,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 129.023, σ₁ = 70.068, ϵᵏ = 16.717
k = 2,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 109.729, σ₁ = 30.946, ϵᵏ = 16.717
k = 3,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 129.067, σ₁ = 61.219, ϵᵏ = 16.717
k = 4,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 109.693, σ₁ = 30.558, ϵᵏ = 16.717
k = 5,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 129.116, σ₁ = 60.795, ϵᵏ = 16.717
k = 6,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 109.743, σ₁ = 30.543, ϵᵏ = 16.717
k = 7,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 129.124, σ₁ = 60.836, ϵᵏ = 16.717
k = 8,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 109.812, σ₁ = 30.599, ϵᵏ = 16.717
k = 9,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 129.112, σ₁ = 60.988, ϵᵏ = 16.717
k = 10,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 109.836, σ₁ = 30.741, ϵᵏ = 16.717
k = 11,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 129.108, σ₁ = 60.991, ϵᵏ = 16.717
k = 12,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 109.839, σ₁ = 30.792, ϵᵏ = 16.717
k = 13,	rank(Xᵏ) = 100,	‖Xᴳᵀ - Xᵏ‖₂ = 129.106, σ₁ = 60.978, ϵᵏ = 16.717
k 

#### An easy problem:

In [69]:
d = 10
v = rand(d)
Xᴳᵀ = v * v'  # Ground Truth matrix
@show size(Xᴳᵀ)
@show rank(Xᴳᵀ)

# mask that erases 5 elements:
num_of_points_to_erase = 5
Φᴹ = reshape(shuffle!([fill(0, num_of_points_to_erase)...,
            fill(1, d*d - num_of_points_to_erase)...]), d, d)
Φ = FunctionOperator{Float64}(name = "Φ", inDims = (d, d), outDims = (d, d),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)

y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (10, 10)
rank(Xᴳᵀ) = 1
rank(y) = 5


In [71]:
@time HM_IRLS(Xᴳᵀ, y, Φ, maxIter = 100, N = 10);

k = 0,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.553, σ₁ = 2.966, ϵᵏ = 0.459
k = 1,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.944, σ₁ = 2.891, ϵᵏ = 0.459
k = 2,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.743, σ₁ = 2.921, ϵᵏ = 0.459
k = 3,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.830, σ₁ = 2.908, ϵᵏ = 0.459
k = 4,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.789, σ₁ = 2.914, ϵᵏ = 0.459
k = 5,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.808, σ₁ = 2.912, ϵᵏ = 0.459
k = 6,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.799, σ₁ = 2.913, ϵᵏ = 0.459
k = 7,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.803, σ₁ = 2.912, ϵᵏ = 0.459
k = 8,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.801, σ₁ = 2.912, ϵᵏ = 0.459
k = 9,	rank(Xᵏ) = 5,	‖Xᴳᵀ - Xᵏ‖₂ = 0.802, σ₁ = 2.912, ϵᵏ = 0.459
  0.023148 seconds (47.02 k allocations: 3.222 MiB)
