In [1]:
using LinearAlgebra, ToeplitzMatrices, Random, IterativeSolvers, FunctionOperators,
    EllipsisNotation, Printf
include("helper_functions.jl")

_svd! (generic function with 1 method)

### The algorithm itself

In [16]:
function HM_IRLS(
        Xᴳᵀ::AbstractArray,                   # ground truth for MSE evaluation
        y::AbstractArray,                     # under-sampled data
        Φ::FunctionOperator;                  # sampling operator
        shape::NTuple = size(Xᴳᵀ),            # size of output matrix
        r̃::Int = 0,                           # rank estimate of solution
        maxIter::Int = 3,                     # number of CG iteration steps
        N::Int = 10,                          # number of iterations
        verbose::Bool = false)                # print rank and loss value in each iteration
    
    dType = eltype(y)
    d₁, d₂ = shape
    r̃ == 0 && (r̃ = rank(Xᴳᵀ))
    
    ϵᵏ = Inf
    Xᵏ = Φ' * y
    σ = nothing # I just want to make it available outside of the loop
    
    for k in 1:N
"""
    2. Find best rank-(r̃ + 1) approximation of Xᵏ to obtain
        𝒯ᵣ(Xᵏ) = Uᵏ * diag(σᵢᵏ)ᵢ₌₁ʳ * Vᵏ' and σᵣ₊₁ᵏ 
"""
        F = svd(Xᵏ)
        Uᵏ, σ, Vᵏ = F.U[:, 1:r̃], F.S, F.V[:, 1:r̃]
        
"""     update smoothing:                                 (2.91) """
        ϵᵏ = min(ϵᵏ, σ[r̃+1])
        
        r, n, s, e = sum(svdvals(Xᵏ) .> 1e-3), opnorm(Xᴳᵀ - Xᵏ, 2), σ[1], ϵᵏ
        n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
        println("k = $(k-1),\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
        
"""
    3. Update Wᵏ as in (2.57), using parameters ϵ = ϵᵏ and p in (2.58) and (2.59), and the
        information Uᵏ , Vᵏ and σ₁ᵏ, ..., σᵣ₊₁ᵏ from item 2.

        (Lines below are based on Remark 2.3.2, the special case for p = 0)
"""
        # Hᵏ = [1 / (max(σ[i], ϵᵏ) * max(σ[j], ϵᵏ))  for i in 1:r̃+1, j in 1:r̃+1]
        #Wᵏ = FunctionOperator{dType}(name = "Wᵏ", inDims = (d₁, d₂), outDims = (d₁, d₂),
        #    forw = Z -> Uᵏ * (Hᵏ .* (Uᵏ' * Z * Vᵏ)) * Vᵏ')
        
"""
    1. Use a conjugate gradient method to solve linearly constrained quadratic program
         Xᵏ = arg minₓ ⟨X,Wᵏ⁻¹(X)⟩ s.t. Φ(X) = y         (2.90)
"""
        
        # the upper-left (r × r) block of (d₁ × d₂) Hᵏ matrix:
        Hᵏᵤᵥ = [1 / (max(σ[i], ϵᵏ) * max(σ[j], ϵᵏ))  for i in 1:r̃, j in 1:r̃]
        # the first column of Hᵏᵤᵥ⟂:
        dHᵏ = reshape([1 / (max(σ[r̃+1], ϵᵏ) * max(σ[j], ϵᵏ))  for j in 1:r̃], :, 1)
        Pᵏ = FunctionOperator{dType}(name="Pᵏ", inDims = (r̃*(r̃+d₁+d₂),), outDims = (d₁, d₂),
            forw = γ -> begin
                    γ₁ = reshape(γ[1:r̃^2], r̃, r̃)
                    γ₂ = reshape(γ[r̃^2+1:r̃*(r̃+d₁)], d₁, r̃)
                    γ₃ = reshape(γ[r̃*(r̃+d₁)+1:r̃*(r̃+d₁+d₂)], r̃, d₂)
                    # According to (2.169), the equation would be:
                    # Uᵏ * γ₁ * Vᵏ' + Uᵏ * γ₂' * (I - Vᵏ*Vᵏ') + (I - Uᵏ*Uᵏ') * γ₃' * Vᵏ'
                    # But as the columns of γ₃ are orthogonal to the ones in Uᵏ,
                    # the rows of γ₂ are orthogonal to the columns of Vᵏ,
                    # the expression can be simplified:
                    (Uᵏ * γ₁ + γ₂) * Vᵏ' + Uᵏ * γ₃
                end,
            backw = Φᵃy -> begin
                    γ₁ = Uᵏ' * Φᵃy * Vᵏ
                    γ₂ = (I - Uᵏ*Uᵏ') * Φᵃy * Vᵏ
                    γ₃ = Uᵏ' * Φᵃy * (I - Vᵏ*Vᵏ')
                    vcat(vec(γ₁), vec(γ₂), vec(γ₃))
                end)
        b = Pᵏ' * Φ' * y
        𝒟⁻¹ = I / Diagonal(vcat(vec(Hᵏᵤᵥ), vec(kron(dHᵏ, ones(1, d₁))'), vec(kron(dHᵏ, ones(1, d₂)))))
        CG_op = FunctionOperator{dType}(name = "CG_op", inDims = (r̃*(r̃+d₁+d₂),), outDims = (r̃*(r̃+d₁+d₂),),
            forw = γ ->  begin
                    (ϵᵏ^2 * I / (𝒟⁻¹ - ϵᵏ^2 * I)) * γ + Pᵏ' * Φ' * Φ * Pᵏ * γ
                end)
        γᵏ = cg(CG_op, b, maxiter = maxIter) # 2.167
        rᵏ = y - Φ * Pᵏ * γᵏ
        γᵏ_tilde = (𝒟⁻¹ / (𝒟⁻¹ - ϵᵏ^2 * I)) * γᵏ - Pᵏ' * Φ' * rᵏ
        Xᵏ = Φ' * rᵏ + Pᵏ * γᵏ_tilde   # 2.168
    end
    
    r, n, s, e = sum(svdvals(Xᵏ) .> 1e-3), opnorm(Xᴳᵀ - Xᵏ, 2), σ[1], ϵᵏ
    n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
    println("k = $N,\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
    
    Xᵏ
end

HM_IRLS (generic function with 1 method)

In [3]:
function update_H!(H, σ, ϵᵏ)
    for ind in CartesianIndices(H)
        i, j = ind[1], ind[2]
        H[ind] = 1 / (max(σ[i], ϵᵏ) * max(σ[j], ϵᵏ))
    end
end

function update_dH!(dH, σ, ϵᵏ, r̃)
    for j in eachindex(dH)
        dH[j] = 1 / (max(σ[r̃+1], ϵᵏ) * max(σ[j], ϵᵏ))
    end
end

function update_𝒟⁻¹!(𝒟⁻¹, H, dH, r̃, d₁, d₂)
    𝒟⁻¹[1:r̃*r̃] .= vec(H)
    𝒟⁻¹[r̃*r̃+1:r̃*(r̃+d₁)] .= vec(kron(dH, ones(1, d₁))')
    𝒟⁻¹[r̃*(r̃+d₁)+1:r̃*(r̃+d₁+d₂)] .= vec(kron(dH, ones(1, d₂)))
    #broadcast!(*, reshape(𝒟⁻¹[r̃*r̃+1:r̃*(r̃+d₁)], d₁, r̃)', dH, 1) # a bit overcomplicated repeat with transpose
    #broadcast!(*, reshape(𝒟⁻¹[r̃*(r̃+d₁)+1:r̃*(r̃+d₁+d₂)], r̃, d₂), dH, 1) # another overcomplicated repeat
    𝒟⁻¹ .= 1 ./ 𝒟⁻¹
end

update_𝒟⁻¹! (generic function with 1 method)

In [4]:
function get_P_operator(Uᵏ, Vᵏ, Vtᵏ, tempᵈ¹ˣᵈ², r̃, d₁, d₂)
    
    tempᵈ¹ˣʳ, tempʳˣᵈ² = Array{dType}(undef, d₁, r̃), Array{dType}(undef, r̃, d₂)
    
    split(γ) = @views begin
        γ₁ = reshape(γ[1:r̃^2], r̃, r̃)
        γ₂ = reshape(γ[r̃*(r̃+d₁)+1:r̃*(r̃+d₁+d₂)], r̃, d₂)
        γ₃ = reshape(γ[r̃^2+1:r̃*(r̃+d₁)], d₁, r̃)
        γ₁, γ₂, γ₃
    end
    
    I_VV, I_UU = Array{dType}(undef, d₂, d₂), Array{dType}(undef, d₁, d₁)
    Iᵈ¹ˣᵈ¹, Iᵈ²ˣᵈ² = Diagonal(ones(d₁)), Diagonal(ones(d₂))
    
    Pᵏ = FunctionOperator{dType}(name="Pᵏ", inDims = (r̃*(r̃+d₁+d₂),), outDims = (d₁, d₂),
        forw = (b,γ) -> begin
                γ₁, γ₂, γ₃ = split(γ)
                # According to (2.169), the equation would be:
                # Uᵏ * γ₁ * Vᵏ' + Uᵏ * γ₂' * (I - Vᵏ*Vᵏ') + (I - Uᵏ*Uᵏ') * γ₃' * Vᵏ'
                # But as the columns of γ₂ are orthogonal to the ones in Uᵏ,
                # the rows of γ₃ are orthogonal to the columns of Vᵏ,
                # the expression can be simplified:
                # (Uᵏ * γ₁ + γ₃) * Vᵏ' + Uᵏ * γ₂
                # And this is implemented avoiding array re-allocations:
                mul!(tempᵈ¹ˣʳ, Uᵏ, γ₁)
                tempᵈ¹ˣʳ .+= γ₃
                mul!(b, tempᵈ¹ˣʳ, Vtᵏ)
                mul!(tempᵈ¹ˣᵈ², Uᵏ, γ₂)
                b .+= tempᵈ¹ˣᵈ²
            end,
        backw = (γ,Φᵃy) -> begin
                γ₁, γ₂, γ₃ = split(γ)
                # Things to do:
                # γ₁ .= Uᵏ' * Φᵃy * Vᵏ
                # γ₂ .= Uᵏ' * Φᵃy * (I - Vᵏ*Vᵏ')
                # γ₃ .= (I - Uᵏ*Uᵏ') * Φᵃy * Vᵏ
                # Efficient implementation:
                I_VV .= Iᵈ²ˣᵈ² .- mul!(I_VV, Vᵏ, Vtᵏ) # same as I - Vᵏ*Vtᵏ
                I_UU .= Iᵈ¹ˣᵈ¹ .- mul!(I_UU, Uᵏ, Uᵏ') # same as I - Uᵏ*Uᵏ'
                mul!(tempᵈ¹ˣʳ, Φᵃy, Vᵏ)
                mul!(γ₁, Uᵏ', tempᵈ¹ˣʳ)
                mul!(γ₃, I_UU, tempᵈ¹ˣʳ)
                mul!(tempʳˣᵈ², Uᵏ', Φᵃy)
                mul!(γ₂, tempʳˣᵈ², I_VV)
                γ
                #vcat(vec(γ₁), vec(γ₂), vec(γ₃))
            end)
    
    Pᵏ
end

get_P_operator (generic function with 1 method)

In [5]:
function get_CG_operator(PᵃΦᵃΦP, 𝒟_weighting, tempʳ⁽ʳ⁺ᵈ¹⁺ᵈ²⁾, r̃, d₁, d₂)
    FunctionOperator{dType}(name = "CG_op", inDims = (r̃*(r̃+d₁+d₂),), outDims = (r̃*(r̃+d₁+d₂),),
        forw = (b,γ) ->  begin
            # An efficient implementation for:
            # b .= (ϵᵏ^2 * I / (𝒟⁻¹ - ϵᵏ^2 * I)) * γ + Pᵏ' * Φ' * Φ * Pᵏ * γ
            mul!(tempʳ⁽ʳ⁺ᵈ¹⁺ᵈ²⁾, PᵃΦᵃΦP, γ)
            mul!(b, Diagonal(𝒟_weighting), γ)
            b .+= tempʳ⁽ʳ⁺ᵈ¹⁺ᵈ²⁾
        end)
end

get_CG_operator (generic function with 1 method)

In [6]:
function HM_IRLS_optimized(
        Xᴳᵀ::AbstractArray,                   # ground truth for MSE evaluation
        y::AbstractArray,                     # under-sampled data
        Φ::FunctionOperator;                  # sampling operator
        shape::NTuple = size(Xᴳᵀ),            # size of output matrix
        r̃::Int = 0,                           # rank estimate of solution
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                          # number of iterations
        verbose::Bool = false)                # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = shape
    r̃ == 0 && (r̃ = rank(Xᴳᵀ))
    maxIter = maxIter isa Nothing ? r̃*(r̃+d₁+d₂) : maxIter
    ϵᵏ = Inf
    Xᵏ = Φ' * y
    
    # Preallocate arrays
    F = svd(Xᵏ)
    Uᵏ, σ, Vᵏ, Vtᵏ = F.U[:, 1:r̃], F.S, F.V[:, 1:r̃], F.Vt[1:r̃, :]
    Hᵏᵤᵥ = Array{dType}(undef, r̃, r̃)
    dHᵏ = Array{dType}(undef, r̃, 1)
    𝒟⁻¹, 𝒟_weighting, b, γᵏ, tempʳ⁽ʳ⁺ᵈ¹⁺ᵈ²⁾ = [Vector{dType}(undef, r̃*(r̃+d₁+d₂)) for _ in 1:5]
    tempᵈ¹ˣᵈ² = Array{dType}(undef, d₁, d₂)
    rᵏ, γᵏ_tilde = similar(y), similar(γᵏ)
    statevars = IterativeSolvers.CGStateVariables(similar(γᵏ), similar(γᵏ), similar(γᵏ))
    
    # Create operators
    Pᵏ= get_P_operator(Uᵏ, Vᵏ, Vtᵏ, tempᵈ¹ˣᵈ², r̃, d₁, d₂)
    PᵃΦᵃΦP = Pᵏ' * Φ' * Φ * Pᵏ
    ΦP, PᵃΦᵃ = Φ * Pᵏ, Pᵏ' * Φ'
    CG_op = get_CG_operator(PᵃΦᵃΦP, 𝒟_weighting, tempʳ⁽ʳ⁺ᵈ¹⁺ᵈ²⁾, r̃, d₁, d₂)
    
    for k in 1:N
        
        svd!(tempᵈ¹ˣᵈ² .= Xᵏ, F)
        @views begin Uᵏ .= F.U[:, 1:r̃]; Vᵏ .=  F.V[:, 1:r̃]; Vtᵏ .= F.Vt[1:r̃, :]; end
        
        ϵᵏ = min(ϵᵏ, σ[r̃+1])
        
        r, n, s, e = sum(σ .> 1e-3), opnorm(tempᵈ¹ˣᵈ² .= Xᴳᵀ .- Xᵏ, 2), σ[1], ϵᵏ
        n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
        println("k = $(k-1),\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
        
        update_H!(Hᵏᵤᵥ, σ, ϵᵏ)
        update_dH!(dHᵏ, σ, ϵᵏ, r̃)
        update_𝒟⁻¹!(𝒟⁻¹, Hᵏᵤᵥ, dHᵏ, r̃, d₁, d₂)
        
        # An efficient implementation of 𝒟_weighting = ϵᵏ^2 * I / (𝒟⁻¹ - ϵᵏ^2 * I):
        𝒟_weighting .= ϵᵏ^2 ./ (𝒟⁻¹ .- ϵᵏ^2)
        
        mul!(b, PᵃΦᵃ, y) # right hand side for CG
        mul!(γᵏ, Pᵏ', Xᵏ) # initial value for CG
        
        cg!(γᵏ, CG_op, b, maxiter = maxIter, statevars = statevars) # 2.167
        
        # An efficient implementation of rᵏ = y - Φ * Pᵏ * γᵏ:
        rᵏ .= y .- mul!(rᵏ, ΦP,  γᵏ)
        
        # An efficient implementation of γᵏ_tilde = Diagonal(𝒟⁻¹ ./ (𝒟⁻¹ .- ϵᵏ^2)) * γᵏ - Pᵏ' * Φ' * rᵏ
        𝒟_weighting .= 𝒟⁻¹ ./ (𝒟⁻¹ .- ϵᵏ^2) # same as Diagonal(𝒟⁻¹ ./ (𝒟⁻¹ .- ϵᵏ^2))
        mul!(tempʳ⁽ʳ⁺ᵈ¹⁺ᵈ²⁾, PᵃΦᵃ, rᵏ)
        mul!(γᵏ_tilde, Diagonal(𝒟_weighting), γᵏ)
        γᵏ_tilde .-= tempʳ⁽ʳ⁺ᵈ¹⁺ᵈ²⁾
        
        # An efficient implementation of Xᵏ = Φ' * rᵏ + Pᵏ * γᵏ_tilde
        mul!(Xᵏ, Pᵏ, γᵏ_tilde)
        Xᵏ .+= mul!(tempᵈ¹ˣᵈ², Φ', rᵏ)   # 2.168
        
    end
    
    r, n, s, e = sum(svdvals(Xᵏ) .> 1e-3), opnorm(Xᴳᵀ - Xᵏ, 2), σ[1], ϵᵏ
    n, s, e = @sprintf("%.3f", n), @sprintf("%.3f", s), @sprintf("%.3f", e)
    println("k = $N,\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
    
    Xᵏ
end

HM_IRLS_optimized (generic function with 1 method)

### Some helper functions

In [7]:
import Base.size
function Base.size(FO::FunctionOperator, d::Int)
    @assert d in [1, 2]
    prod(d == 1 ? FO.outDims : FO.inDims)
end

In [8]:
# This function randomly samples a $(d₁ \times d₂)$ sparse matrix with ones at $m$ randomly chosen
# coordinates (uniform without replacement). The output matrix has at least $r$ non-zero entries
# in each row and each column, where $r$ is a specified positive integer. The number of ones in the
# output matrix is exactly $m$.
function generateΦ(d₁, d₂, r, m)
    @assert max(d₁, d₂) * r ≤ m
    @assert m ≤ d₁ * d₂
    @assert r ≤ d₁
    @assert r ≤ d₂
    
    # generate a square matrix where each row and each column has exactly r ones
    initial = Circulant([fill(1, r)..., fill(0, min(d₁, d₂) - r)...])
    
    # Extend that matrix to a d₁×d₂ matrix where each row and each column has at least r ones
    # That is accomplished by repeating the "initial" matrix and then cropping
    if d₁ < d₂
        M = repeat(initial, outer = (1, ceil(Int, d₂ / d₁)))
    elseif d₁ > d₂
        M = repeat(initial, outer = (ceil(Int, d₁ / d₂), 1))
    else
        M = initial
    end
    M = M[1:d₁, 1:d₂]
    
    # Randomly switch zeros to ones until exactly m number of ones are in the matrix
    zero_places = findall(M .== 0)
    number_of_missing_ones = m - (d₁*d₂ - length(zero_places))
    number_of_missing_ones > 0 && (M[shuffle(zero_places)[1:number_of_missing_ones]] .= 1)
    
    # Then randomize matrix by permutating rows and columns a couple times
    for i in 1:10
        M .= M[shuffle(1:end), :] # shuffle rows
        M .= M[:, shuffle(1:end)] # shuffle columns
    end
    
    M
end

generateΦ (generic function with 1 method)

In [9]:
function maskToMatrix(Φᴹ)
    m = convert(Int, sum(Φᴹ))
    d₁, d₂ = size(Φᴹ)

    Φ = zeros(m, length(Φᴹ))
    non_zero_places = findall(vec(Φᴹ) .== 1)
    for i in 1:m
        Φ[i, non_zero_places[i]] = 1
    end
    return Φ
end

maskToMatrix (generic function with 1 method)

### Generate data

#### That's how Chirstian generated the data to compare algorithms:

In [17]:
d₁, d₂, r = 60, 40, 7
df_LR = r * (d₁ + d₂ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df_LR, d₁ * d₂))

dType = ComplexF64
U, S, V = randn(dType, d₁, r), Diagonal(randn(r)), randn(dType, d₂, r)
Xᴳᵀ = U * S * V' # Ground Truth matrix

@show size(Xᴳᵀ)
@show rank(Xᴳᵀ);

Φᴹ = generateΦ(d₁, d₂, r, m)
Φ = FunctionOperator{dType}(name = "Φ", inDims = (d₁, d₂), outDims = (d₁, d₂),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)
y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (60, 40)
rank(Xᴳᵀ) = 7
rank(y) = 40


In [18]:
Φᴹ .* Xᴳᵀ == Φ * Xᴳᵀ

true

In [19]:
@time HM_IRLS(Xᴳᵀ, y, Φ, N = 50);

k = 0,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 52.467, σ₁ = 23.527, ϵᵏ = 13.725
k = 1,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 48.094, σ₁ = 28.085, ϵᵏ = 10.400
k = 2,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 45.267, σ₁ = 33.368, ϵᵏ = 8.350
k = 3,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 43.444, σ₁ = 38.757, ϵᵏ = 7.685
k = 4,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 44.804, σ₁ = 42.666, ϵᵏ = 7.660
k = 5,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 45.863, σ₁ = 45.539, ϵᵏ = 7.660
k = 6,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 48.779, σ₁ = 55.074, ϵᵏ = 7.660
k = 7,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 100.285, σ₁ = 122.456, ϵᵏ = 7.660
k = 8,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 3013.551, σ₁ = 3032.981, ϵᵏ = 7.660
k = 9,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 140087.650, σ₁ = 140101.683, ϵᵏ = 7.660
k = 10,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 35780.224, σ₁ = 35791.145, ϵᵏ = 7.660
k = 11,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 145750.577, σ₁ = 145761.218, ϵᵏ = 7.660
k = 12,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 169341.913, σ₁ = 169340.733, ϵᵏ = 7.660
k = 13,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 6154.689, σ₁ = 6156.734, ϵᵏ = 7.660
k 

In [13]:
@time HM_IRLS_optimized(Xᴳᵀ, y, Φ, N = 50);

k = 0,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 68.011, σ₁ = 30.281, ϵᵏ = 16.058
k = 1,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 52.576, σ₁ = 49.147, ϵᵏ = 12.027
k = 2,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 39.174, σ₁ = 70.810, ϵᵏ = 8.638
k = 3,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 34.841, σ₁ = 81.528, ϵᵏ = 6.350
k = 4,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 32.662, σ₁ = 85.622, ϵᵏ = 4.738
k = 5,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 29.900, σ₁ = 87.653, ϵᵏ = 3.608
k = 6,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 27.277, σ₁ = 88.888, ϵᵏ = 2.656
k = 7,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 25.348, σ₁ = 89.893, ϵᵏ = 2.017
k = 8,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 24.184, σ₁ = 90.703, ϵᵏ = 1.563
k = 9,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 23.371, σ₁ = 91.255, ϵᵏ = 1.257
k = 10,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 22.527, σ₁ = 91.560, ϵᵏ = 1.061
k = 11,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 21.583, σ₁ = 91.733, ϵᵏ = 0.902
k = 12,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 20.573, σ₁ = 91.886, ϵᵏ = 0.771
k = 13,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 19.513, σ₁ = 92.070, ϵᵏ = 0.661
k = 14,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ = 18.41

#### An easy problem:

In [14]:
d = 10
v = rand(d)
Xᴳᵀ = v * v'  # Ground Truth matrix
@show size(Xᴳᵀ)
@show rank(Xᴳᵀ)

# mask that erases 5 elements:
num_of_points_to_erase = 5
Φᴹ = reshape(shuffle!([fill(0, num_of_points_to_erase)...,
            fill(1, d*d - num_of_points_to_erase)...]), d, d)
Φ = FunctionOperator{Float64}(name = "Φ", inDims = (d, d), outDims = (d, d),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)

y = Φ * Xᴳᵀ
@show rank(y);

size(Xᴳᵀ) = (10, 10)
rank(Xᴳᵀ) = 1
rank(y) = 4


In [15]:
@time HM_IRLS(Xᴳᵀ, y, Φ, N = 10);

k = 0,	rank(Xᵏ) = 4,	‖Xᴳᵀ - Xᵏ‖₂ = 0.408, σ₁ = 2.158, ϵᵏ = 0.314
k = 1,	rank(Xᵏ) = 3,	‖Xᴳᵀ - Xᵏ‖₂ = 0.051, σ₁ = 2.228, ϵᵏ = 0.038
k = 2,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.002, σ₁ = 2.238, ϵᵏ = 0.001
k = 3,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
k = 4,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
k = 5,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
k = 6,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
k = 7,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
k = 8,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
k = 9,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
k = 10,	rank(Xᵏ) = 1,	‖Xᴳᵀ - Xᵏ‖₂ = 0.001, σ₁ = 2.238, ϵᵏ = 0.000
  3.114524 seconds (6.84 M allocations: 335.284 MiB, 4.30% gc time)
