In [1]:
using LinearAlgebra, NBInclude, IterativeSolvers, FunctionOperators, Printf

In [1]:
@nbinclude("helper_functions.ipynb")

LoadError: LoadError: UndefVarError: @nbinclude not defined
in expression starting at In[1]:1

In [3]:
Random.seed!(123);

### Fancy MatrixIRLS for matrix completion (PCA) with p = 0

https://mediatum.ub.tum.de/doc/1521436/1521436.pdf

_**Note:** Fancy = Conjugate gradient step (2.90) is optimized by computing it in a lower dimensional projection space._

In [4]:
function fancy_MatrixIRLS_for_PCA(
        Xᴳᵀ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Φ::FunctionOperator;                    # sampling operator
        img_size::NTuple = size(Xᴳᵀ),           # size of output matrix
        r̃::Int = 0,                             # rank estimate of solution
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                            # number of iterations
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d₁, d₂ = img_size
    r̃ == 0 && (r̃ = rank(Xᴳᵀ))
    maxIter = maxIter isa Nothing ? r̃*(r̃+d₁+d₂) : maxIter
    ϵᵏ = Inf
    Xᵏ = Φ' * y
    σ = nothing # I just want to make it available outside of the loop
    
    for k in 1:N
"""
page 84
    2. Find best rank-(r̃ + 1) approximation of Xᵏ to obtain
        𝒯ᵣ(Xᵏ) = Uᵏ * diag(σᵢᵏ)ᵢ₌₁ʳ * Vᵏ' and σᵣ₊₁ᵏ 
"""
        F = svd(Xᵏ)
        Uᵏ, σ, Vᵏ = F.U[:, 1:r̃], F.S, F.V[:, 1:r̃]
        
"""
p.84     and update smoothing:                                 (2.91)
"""
        ϵᵏ = min(ϵᵏ, σ[r̃+1])
        
        # Print iteration info
        if verbose
            r, n, s, e = rank(Xᵏ, atol = 1e-3), opnorm(Xᴳᵀ - Xᵏ, 2), σ[1], ϵᵏ
            n, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
            println("k = $(k-1),\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
        end
            
"""
p.84
    3. Update Wᵏ as in (2.57), using parameters ϵ = ϵᵏ and p in (2.58) and (2.59), and the
        information Uᵏ , Vᵏ and σ₁ᵏ, ..., σᵣ₊₁ᵏ from item 2.

    1. Use a conjugate gradient method to solve linearly constrained quadratic program
         Xᵏ = arg minₓ ⟨X,Wᵏ⁻¹(X)⟩ s.t. Φ(X) = y         (2.90)
"""
#=
p.118
    Therefore, we can summarize the following simplified outline of an implementation of
        step (2.90) of MatrixIRLS for matrix completion:
        1. Calculate P*_Tₖ Φ*(y) ∈ Tₖ
        2. Solve ((ϵₖ²⁻ᵖ I) / (𝒟⁻¹ - ϵₖ²⁻ᵖ I) + P*_Tₖ Φ* Φ P_Tₖ)γₖ = P*_Tₖ Φ*(y)
            for γₖ ∈ Tₖ by conjugate gradient method [HS52], [QSS10, Chapter 4].
        3. Calculate residual rₖ := y − Φ P_Tₖ γₖ ∈ ℝᵐ
        4. Calculate γ̃ₖ = (𝒟⁻¹ / (𝒟⁻¹ - ϵₖ²⁻ᵖ I))γₖ - P*_Tₖ Φ*(rₖ) ∈ Tₖ.
=#
        
        # the upper-left (r × r) block of (d₁ × d₂) Hᵏ matrix:
        Hᵏᵤᵥ = [1 / (max(σ[i], ϵᵏ) * max(σ[j], ϵᵏ))  for i in 1:r̃, j in 1:r̃]
        # the first column of Hᵏᵤᵥ⟂:
        dHᵏ = reshape([1 / (max(σ[r̃+1], ϵᵏ) * max(σ[j], ϵᵏ))  for j in 1:r̃], :, 1)
        Pᵏ = FunctionOperator{dType}(name="Pᵏ", inDims = (r̃*(r̃+d₁+d₂),), outDims = (d₁, d₂),
            forw = γ -> begin
                    γ₁ = reshape(γ[1:r̃^2], r̃, r̃)
                    γ₂ = reshape(γ[r̃^2+1:r̃*(r̃+d₂)], r̃, d₂)
                    γ₃ = reshape(γ[r̃*(r̃+d₂)+1:r̃*(r̃+d₁+d₂)], d₁, r̃)
                    # According to (2.169), the equation would be:
                    # Uᵏ * γ₁ * Vᵏ' + Uᵏ * γ₂' * (I - Vᵏ*Vᵏ') + (I - Uᵏ*Uᵏ') * γ₃' * Vᵏ'
                    # But as the columns of γ₃ are orthogonal to the ones in Uᵏ,
                    # the rows of γ₂ are orthogonal to the columns of Vᵏ,
                    # the expression can be simplified:
                    (Uᵏ * γ₁ + γ₃) * Vᵏ' + Uᵏ * γ₂
                end,
            backw = Φᵃy -> begin
                    γ₁ = Uᵏ' * Φᵃy * Vᵏ
                    γ₂ = Uᵏ' * Φᵃy * (I - Vᵏ*Vᵏ')
                    γ₃ = (I - Uᵏ*Uᵏ') * Φᵃy * Vᵏ
                    vcat(vec(γ₁), vec(γ₂), vec(γ₃))
                end)
        b = Pᵏ' * Φ' * y
        𝒟⁻¹ = I / Diagonal(vcat(vec(Hᵏᵤᵥ), vec(kron(dHᵏ, ones(1, d₂))), vec(kron(dHᵏ, ones(1, d₁))')))
        CG_op = FunctionOperator{dType}(name = "CG_op", inDims = (r̃*(r̃+d₁+d₂),), outDims = (r̃*(r̃+d₁+d₂),),
            forw = γ ->  begin
                    (ϵᵏ^2 * I / (𝒟⁻¹ - ϵᵏ^2 * I)) * γ + Pᵏ' * Φ' * Φ * Pᵏ * γ
                end)
        γᵏ = cg(CG_op, b, maxiter = maxIter) # 2.167
        rᵏ = y - Φ * Pᵏ * γᵏ
        γ̃ₖ = (𝒟⁻¹ / (𝒟⁻¹ - ϵᵏ^2 * I)) * γᵏ - Pᵏ' * Φ' * rᵏ
        Xᵏ = Φ' * rᵏ + Pᵏ * γ̃ₖ   # 2.168
    end
    
    r, n, s, e = rank(Xᵏ, atol = 1e-3), opnorm(Xᴳᵀ - Xᵏ, 2), σ[1], ϵᵏ
    n, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
    verbose && println("k = $N,\trank(Xᵏ) = $r,\t‖Xᴳᵀ - Xᵏ‖₂ = $n, σ₁ = $s, ϵᵏ = $e")
    
    Xᵏ
end

fancy_MatrixIRLS_for_PCA (generic function with 1 method)

# Numerical Experiments

### General parameters

In [5]:
# Matrix dimensions
d₁, d₂ = 50, 50
n = min(d₁, d₂)
# Rank and number of non-zero elements in sparse component
r, k = 7, 0
# Type of matrix elements
dType = ComplexF64;

## Understanding and Enhancing Data Recovery Algorithms

*From Noise-Blind Sparse Recovery to Reweighted Methods for Low-Rank Matrix Optimization*

*by Christian Kümmerle*

https://mediatum.ub.tum.de/doc/1521436/1521436.pdf

### Generate Data

#### Gaussian Low Rank Matrix
Corresponding Matlab function: https://github.com/ckuemmerle/hm_irls/blob/master/sample_X0_lowrank.m

In [6]:
L₀ = generateLowRankComponent_Christian(d₁, d₂, r, dType)
@show size(L₀)
@show rank(L₀);

size(L₀) = (50, 50)
rank(L₀) = 7


#### Sampling Mask ($\Phi$)
Corresponding Matlab function: https://github.com/ckuemmerle/hm_irls/blob/master/sample_phi_MatrixCompletion.m

_**Note:** There is a difference in the way how the Christian's Matlab function and my Julia function satisfies the requirement of having at least $r$ non-zero entries in each row and each column._

In [7]:
df = r * (d₁ + d₂ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df, d₁ * d₂))
Φᴹ = generateΦ(d₁, d₂, r, m)
Φ = FunctionOperator{dType}(name = "Φ", inDims = (d₁, d₂), outDims = (d₁, d₂),
    forw = (b,x) -> b .= Φᴹ .* x, backw = (b,x) -> b .= x)
@show r
println("minimum number of non-zero entries in each column: ", Int(minimum(sum(Φᴹ, dims=1))))
println("minimum number of non-zero entries in each column: ", Int(minimum(sum(Φᴹ, dims=2))))

r = 7
minimum number of non-zero entries in each column: 9
minimum number of non-zero entries in each column: 8


#### Subsampling The Ground Truth Matrix

In [8]:
y = Φ * L₀
@show rank(y);

rank(y) = 50


### Running The Reconstruction

In [9]:
@time fancy_MatrixIRLS_for_PCA(L₀, y, Φ, N = 48, verbose = true);

k = 0,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ = 174.811, σ₁ =  74.748, ϵᵏ =  33.581
k = 1,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ = 125.170, σ₁ = 136.996, ϵᵏ =  23.829
k = 2,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  88.464, σ₁ = 197.778, ϵᵏ =  15.661
k = 3,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  66.096, σ₁ = 220.748, ϵᵏ =  10.665
k = 4,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  55.641, σ₁ = 227.255, ϵᵏ =   7.563
k = 5,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  45.792, σ₁ = 229.624, ϵᵏ =   5.333
k = 6,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  36.322, σ₁ = 231.461, ϵᵏ =   3.952
k = 7,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  29.960, σ₁ = 233.286, ϵᵏ =   3.179
k = 8,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  26.327, σ₁ = 234.681, ϵᵏ =   2.635
k = 9,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  24.101, σ₁ = 235.525, ϵᵏ =   2.262
k = 10,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  22.422, σ₁ = 235.946, ϵᵏ =   1.951
k = 11,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  21.012, σ₁ = 236.123, ϵᵏ =   1.633
k = 12,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  19.781, σ₁ = 236.170, ϵᵏ =   1.327
k = 13,	rank(Xᵏ) = 50,	‖Xᴳᵀ - Xᵏ‖₂ =  18.693, σ₁ = 236.176, ϵ

# Robust Principal Component Analysis?
*by Emmanuel J. Candès, Xiaodong Li, Yi Ma, and John Wright*  
https://arxiv.org/pdf/0912.3599.pdf

#### 4.1 Exact recovery from varying fractions of error

We first verify the correct recovery phenomenon of Theorem 1.1 on randomly generated problems. We consider square matrices of varying dimension $n = 500, \ldots , 3000$. We generate a rank-$r$ matrix $L_0$ as a product $L_0 = XY^∗$ where $X$ and $Y$ are $n \times r$ matrices with entries independently sampled
from a $\mathcal{N}(0,1/n)$ distribution. $S_0$ is generated by choosing a support set $\Omega$ of size $k$ uniformly at random, and setting $S_0 = \mathcal{P}_\Omega E$, where $E$ is a matrix with independent Bernoulli $\pm 1$ entries. Table 1 (top) reports the results with $r = rank(L_0) = 0.05 \times n$ and $k = \Vert S_0 \Vert_0 = 0.05 \times n^2$. Table 1 (bottom) reports the results for a more challenging scenario, $rank(L_0) = 0.05 \times n$ and $k = 0.10 \times n^2$. In all cases, we set $\lambda = 1 \cdot \sqrt{n}$. Notice that in all cases, solving the convex PCP gives a result $(L, S)$ with the correct rank and sparsity. Moreover, the relative error $\frac{\Vert L - L_0 \Vert_F}{\Vert L_0 \Vert_F}$ is small, less than $10^{-5}$ in all examples considered.

<center><img src="table_1.png" /></center>

### Generate Data

_**Note:** In this notebook we deal only with PCA (simple matrix completion); therefore, there is no sparse component in the ground truth matrix._

In [13]:
L₀ = generateLowRankComponent_Candes(n, r, dType)
@show size(L₀)
@show rank(L₀);

size(L₀) = (50, 50)
rank(L₀) = 7


#### Sampling Mask ($\Phi$)

Using the earlier generated sampling mask

#### Subsampling The Ground Truth Matrix

In [14]:
y = Φ * L₀
@show rank(y);

rank(y) = 50


### Running The Reconstruction

In [15]:
@time fancy_MatrixIRLS_for_PCA(L₀, y, Φ, N = 80, verbose = true);

k = 0,	rank(Xᵏ) = 44,	‖Xᴳᵀ - Xᵏ‖₂ =   0.044, σ₁ =   0.020, ϵᵏ =   0.011
k = 1,	rank(Xᵏ) = 43,	‖Xᴳᵀ - Xᵏ‖₂ =   0.039, σ₁ =   0.028, ϵᵏ =   0.009
k = 2,	rank(Xᵏ) = 42,	‖Xᴳᵀ - Xᵏ‖₂ =   0.034, σ₁ =   0.039, ϵᵏ =   0.007
k = 3,	rank(Xᵏ) = 40,	‖Xᴳᵀ - Xᵏ‖₂ =   0.031, σ₁ =   0.047, ϵᵏ =   0.005
k = 4,	rank(Xᵏ) = 36,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.050, ϵᵏ =   0.004
k = 5,	rank(Xᵏ) = 31,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.052, ϵᵏ =   0.003
k = 6,	rank(Xᵏ) = 27,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.053, ϵᵏ =   0.002
k = 7,	rank(Xᵏ) = 21,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.054, ϵᵏ =   0.002
k = 8,	rank(Xᵏ) = 17,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.054, ϵᵏ =   0.002
k = 9,	rank(Xᵏ) = 14,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.055, ϵᵏ =   0.002
k = 10,	rank(Xᵏ) = 12,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.055, ϵᵏ =   0.001
k = 11,	rank(Xᵏ) = 11,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.055, ϵᵏ =   0.001
k = 12,	rank(Xᵏ) = 10,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.056, ϵᵏ =   0.001
k = 13,	rank(Xᵏ) = 9,	‖Xᴳᵀ - Xᵏ‖₂ =   0.030, σ₁ =   0.056, ϵᵏ