In [1]:
using LinearAlgebra, NBInclude, IterativeSolvers, FunctionOperators, Printf

In [1]:
@nbinclude("helper_functions.ipynb")

LoadError: LoadError: UndefVarError: @nbinclude not defined
in expression starting at In[1]:1

In [3]:
Random.seed!(123);

### Fancy MatrixIRLS for matrix completion (PCA) with p = 0

https://mediatum.ub.tum.de/doc/1521436/1521436.pdf

_**Note:** Fancy = Conjugate gradient step (2.90) is optimized by computing it in a lower dimensional projection space._

In [4]:
function fancy_MatrixIRLS_for_PCA(
        X·¥≥·µÄ::AbstractArray,                     # ground truth for MSE evaluation
        y::AbstractArray,                       # under-sampled data
        Œ¶::FunctionOperator;                    # sampling operator
        img_size::NTuple = size(X·¥≥·µÄ),           # size of output matrix
        rÃÉ::Int = 0,                             # rank estimate of solution
        maxIter::Union{Int, Nothing} = nothing, # number of CG iteration steps
        N::Int = 10,                            # number of iterations
        verbose::Bool = false)                  # print rank and loss value in each iteration
    
    # Initialize variables
    dType = eltype(y)
    d‚ÇÅ, d‚ÇÇ = img_size
    rÃÉ == 0 && (rÃÉ = rank(X·¥≥·µÄ))
    maxIter = maxIter isa Nothing ? rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ) : maxIter
    œµ·µè = Inf
    X·µè = Œ¶' * y
    œÉ = nothing # I just want to make it available outside of the loop
    
    for k in 1:N
"""
page 84
    2. Find best rank-(rÃÉ + 1) approximation of X·µè to obtain
        ùíØ·µ£(X·µè) = U·µè * diag(œÉ·µ¢·µè)·µ¢‚Çå‚ÇÅ ≥ * V·µè' and œÉ·µ£‚Çä‚ÇÅ·µè 
"""
        F = svd(X·µè)
        U·µè, œÉ, V·µè = F.U[:, 1:rÃÉ], F.S, F.V[:, 1:rÃÉ]
        
"""
p.84     and update smoothing:                                 (2.91)
"""
        œµ·µè = min(œµ·µè, œÉ[rÃÉ+1])
        
        # Print iteration info
        if verbose
            r, n, s, e = rank(X·µè, atol = 1e-3), opnorm(X·¥≥·µÄ - X·µè, 2), œÉ[1], œµ·µè
            n, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
            println("k = $(k-1),\trank(X·µè) = $r,\t‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = $n, œÉ‚ÇÅ = $s, œµ·µè = $e")
        end
            
"""
p.84
    3. Update W·µè as in (2.57), using parameters œµ = œµ·µè and p in (2.58) and (2.59), and the
        information U·µè , V·µè and œÉ‚ÇÅ·µè, ..., œÉ·µ£‚Çä‚ÇÅ·µè from item 2.

    1. Use a conjugate gradient method to solve linearly constrained quadratic program
         X·µè = arg min‚Çì ‚ü®X,W·µè‚Åª¬π(X)‚ü© s.t. Œ¶(X) = y         (2.90)
"""
#=
p.118
    Therefore, we can summarize the following simplified outline of an implementation of
        step (2.90) of MatrixIRLS for matrix completion:
        1. Calculate P*_T‚Çñ Œ¶*(y) ‚àà T‚Çñ
        2. Solve ((œµ‚Çñ¬≤‚Åª·µñ I) / (ùíü‚Åª¬π - œµ‚Çñ¬≤‚Åª·µñ I) + P*_T‚Çñ Œ¶* Œ¶ P_T‚Çñ)Œ≥‚Çñ = P*_T‚Çñ Œ¶*(y)
            for Œ≥‚Çñ ‚àà T‚Çñ by conjugate gradient method [HS52], [QSS10, Chapter 4].
        3. Calculate residual r‚Çñ := y ‚àí Œ¶ P_T‚Çñ Œ≥‚Çñ ‚àà ‚Ñù·µê
        4. Calculate Œ≥ÃÉ‚Çñ = (ùíü‚Åª¬π / (ùíü‚Åª¬π - œµ‚Çñ¬≤‚Åª·µñ I))Œ≥‚Çñ - P*_T‚Çñ Œ¶*(r‚Çñ) ‚àà T‚Çñ.
=#
        
        # the upper-left (r √ó r) block of (d‚ÇÅ √ó d‚ÇÇ) H·µè matrix:
        H·µè·µ§·µ• = [1 / (max(œÉ[i], œµ·µè) * max(œÉ[j], œµ·µè))  for i in 1:rÃÉ, j in 1:rÃÉ]
        # the first column of H·µè·µ§·µ•‚üÇ:
        dH·µè = reshape([1 / (max(œÉ[rÃÉ+1], œµ·µè) * max(œÉ[j], œµ·µè))  for j in 1:rÃÉ], :, 1)
        P·µè = FunctionOperator{dType}(name="P·µè", inDims = (rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ),), outDims = (d‚ÇÅ, d‚ÇÇ),
            forw = Œ≥ -> begin
                    Œ≥‚ÇÅ = reshape(Œ≥[1:rÃÉ^2], rÃÉ, rÃÉ)
                    Œ≥‚ÇÇ = reshape(Œ≥[rÃÉ^2+1:rÃÉ*(rÃÉ+d‚ÇÇ)], rÃÉ, d‚ÇÇ)
                    Œ≥‚ÇÉ = reshape(Œ≥[rÃÉ*(rÃÉ+d‚ÇÇ)+1:rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ)], d‚ÇÅ, rÃÉ)
                    # According to (2.169), the equation would be:
                    # U·µè * Œ≥‚ÇÅ * V·µè' + U·µè * Œ≥‚ÇÇ' * (I - V·µè*V·µè') + (I - U·µè*U·µè') * Œ≥‚ÇÉ' * V·µè'
                    # But as the columns of Œ≥‚ÇÉ are orthogonal to the ones in U·µè,
                    # the rows of Œ≥‚ÇÇ are orthogonal to the columns of V·µè,
                    # the expression can be simplified:
                    (U·µè * Œ≥‚ÇÅ + Œ≥‚ÇÉ) * V·µè' + U·µè * Œ≥‚ÇÇ
                end,
            backw = Œ¶·µÉy -> begin
                    Œ≥‚ÇÅ = U·µè' * Œ¶·µÉy * V·µè
                    Œ≥‚ÇÇ = U·µè' * Œ¶·µÉy * (I - V·µè*V·µè')
                    Œ≥‚ÇÉ = (I - U·µè*U·µè') * Œ¶·µÉy * V·µè
                    vcat(vec(Œ≥‚ÇÅ), vec(Œ≥‚ÇÇ), vec(Œ≥‚ÇÉ))
                end)
        b = P·µè' * Œ¶' * y
        ùíü‚Åª¬π = I / Diagonal(vcat(vec(H·µè·µ§·µ•), vec(kron(dH·µè, ones(1, d‚ÇÇ))), vec(kron(dH·µè, ones(1, d‚ÇÅ))')))
        CG_op = FunctionOperator{dType}(name = "CG_op", inDims = (rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ),), outDims = (rÃÉ*(rÃÉ+d‚ÇÅ+d‚ÇÇ),),
            forw = Œ≥ ->  begin
                    (œµ·µè^2 * I / (ùíü‚Åª¬π - œµ·µè^2 * I)) * Œ≥ + P·µè' * Œ¶' * Œ¶ * P·µè * Œ≥
                end)
        Œ≥·µè = cg(CG_op, b, maxiter = maxIter) # 2.167
        r·µè = y - Œ¶ * P·µè * Œ≥·µè
        Œ≥ÃÉ‚Çñ = (ùíü‚Åª¬π / (ùíü‚Åª¬π - œµ·µè^2 * I)) * Œ≥·µè - P·µè' * Œ¶' * r·µè
        X·µè = Œ¶' * r·µè + P·µè * Œ≥ÃÉ‚Çñ   # 2.168
    end
    
    r, n, s, e = rank(X·µè, atol = 1e-3), opnorm(X·¥≥·µÄ - X·µè, 2), œÉ[1], œµ·µè
    n, s, e = @sprintf("%7.3f", n), @sprintf("%7.3f", s), @sprintf("%7.3f", e)
    verbose && println("k = $N,\trank(X·µè) = $r,\t‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = $n, œÉ‚ÇÅ = $s, œµ·µè = $e")
    
    X·µè
end

fancy_MatrixIRLS_for_PCA (generic function with 1 method)

# Numerical Experiments

### General parameters

In [5]:
# Matrix dimensions
d‚ÇÅ, d‚ÇÇ = 50, 50
n = min(d‚ÇÅ, d‚ÇÇ)
# Rank and number of non-zero elements in sparse component
r, k = 7, 0
# Type of matrix elements
dType = ComplexF64;

## Understanding and Enhancing Data Recovery Algorithms

*From Noise-Blind Sparse Recovery to Reweighted Methods for Low-Rank Matrix Optimization*

*by Christian K√ºmmerle*

https://mediatum.ub.tum.de/doc/1521436/1521436.pdf

### Generate Data

#### Gaussian Low Rank Matrix
Corresponding Matlab function: https://github.com/ckuemmerle/hm_irls/blob/master/sample_X0_lowrank.m

In [6]:
L‚ÇÄ = generateLowRankComponent_Christian(d‚ÇÅ, d‚ÇÇ, r, dType)
@show size(L‚ÇÄ)
@show rank(L‚ÇÄ);

size(L‚ÇÄ) = (50, 50)
rank(L‚ÇÄ) = 7


#### Sampling Mask ($\Phi$)
Corresponding Matlab function: https://github.com/ckuemmerle/hm_irls/blob/master/sample_phi_MatrixCompletion.m

_**Note:** There is a difference in the way how the Christian's Matlab function and my Julia function satisfies the requirement of having at least $r$ non-zero entries in each row and each column._

In [7]:
df = r * (d‚ÇÅ + d‚ÇÇ - r) # Number of degrees of freedom of the setting
m = floor(Int, min(1.05 * df, d‚ÇÅ * d‚ÇÇ))
Œ¶·¥π = generateŒ¶(d‚ÇÅ, d‚ÇÇ, r, m)
Œ¶ = FunctionOperator{dType}(name = "Œ¶", inDims = (d‚ÇÅ, d‚ÇÇ), outDims = (d‚ÇÅ, d‚ÇÇ),
    forw = (b,x) -> b .= Œ¶·¥π .* x, backw = (b,x) -> b .= x)
@show r
println("minimum number of non-zero entries in each column: ", Int(minimum(sum(Œ¶·¥π, dims=1))))
println("minimum number of non-zero entries in each column: ", Int(minimum(sum(Œ¶·¥π, dims=2))))

r = 7
minimum number of non-zero entries in each column: 9
minimum number of non-zero entries in each column: 8


#### Subsampling The Ground Truth Matrix

In [8]:
y = Œ¶ * L‚ÇÄ
@show rank(y);

rank(y) = 50


### Running The Reconstruction

In [9]:
@time fancy_MatrixIRLS_for_PCA(L‚ÇÄ, y, Œ¶, N = 48, verbose = true);

k = 0,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 174.811, œÉ‚ÇÅ =  74.748, œµ·µè =  33.581
k = 1,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ = 125.170, œÉ‚ÇÅ = 136.996, œµ·µè =  23.829
k = 2,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  88.464, œÉ‚ÇÅ = 197.778, œµ·µè =  15.661
k = 3,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  66.096, œÉ‚ÇÅ = 220.748, œµ·µè =  10.665
k = 4,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  55.641, œÉ‚ÇÅ = 227.255, œµ·µè =   7.563
k = 5,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  45.792, œÉ‚ÇÅ = 229.624, œµ·µè =   5.333
k = 6,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  36.322, œÉ‚ÇÅ = 231.461, œµ·µè =   3.952
k = 7,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  29.960, œÉ‚ÇÅ = 233.286, œµ·µè =   3.179
k = 8,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  26.327, œÉ‚ÇÅ = 234.681, œµ·µè =   2.635
k = 9,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  24.101, œÉ‚ÇÅ = 235.525, œµ·µè =   2.262
k = 10,	rank(X·µè) = 50,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =  22.422, œÉ‚ÇÅ = 235.946, œµ·

# Robust Principal Component Analysis?
*by Emmanuel J. Cand√®s, Xiaodong Li, Yi Ma, and John Wright*  
https://arxiv.org/pdf/0912.3599.pdf

#### 4.1 Exact recovery from varying fractions of error

We first verify the correct recovery phenomenon of Theorem 1.1 on randomly generated problems. We consider square matrices of varying dimension $n = 500, \ldots , 3000$. We generate a rank-$r$ matrix $L_0$ as a product $L_0 = XY^‚àó$ where $X$ and $Y$ are $n \times r$ matrices with entries independently sampled
from a $\mathcal{N}(0,1/n)$ distribution. $S_0$ is generated by choosing a support set $\Omega$ of size $k$ uniformly at random, and setting $S_0 = \mathcal{P}_\Omega E$, where $E$ is a matrix with independent Bernoulli $\pm 1$ entries. Table 1 (top) reports the results with $r = rank(L_0) = 0.05 \times n$ and $k = \Vert S_0 \Vert_0 = 0.05 \times n^2$. Table 1 (bottom) reports the results for a more challenging scenario, $rank(L_0) = 0.05 \times n$ and $k = 0.10 \times n^2$. In all cases, we set $\lambda = 1 \cdot \sqrt{n}$. Notice that in all cases, solving the convex PCP gives a result $(L, S)$ with the correct rank and sparsity. Moreover, the relative error $\frac{\Vert L - L_0 \Vert_F}{\Vert L_0 \Vert_F}$ is small, less than $10^{-5}$ in all examples considered.

<center><img src="table_1.png" /></center>

### Generate Data

_**Note:** In this notebook we deal only with PCA (simple matrix completion); therefore, there is no sparse component in the ground truth matrix._

In [13]:
L‚ÇÄ = generateLowRankComponent_Candes(n, r, dType)
@show size(L‚ÇÄ)
@show rank(L‚ÇÄ);

size(L‚ÇÄ) = (50, 50)
rank(L‚ÇÄ) = 7


#### Sampling Mask ($\Phi$)

Using the earlier generated sampling mask

#### Subsampling The Ground Truth Matrix

In [14]:
y = Œ¶ * L‚ÇÄ
@show rank(y);

rank(y) = 50


### Running The Reconstruction

In [15]:
@time fancy_MatrixIRLS_for_PCA(L‚ÇÄ, y, Œ¶, N = 80, verbose = true);

k = 0,	rank(X·µè) = 44,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.044, œÉ‚ÇÅ =   0.020, œµ·µè =   0.011
k = 1,	rank(X·µè) = 43,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.039, œÉ‚ÇÅ =   0.028, œµ·µè =   0.009
k = 2,	rank(X·µè) = 42,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.034, œÉ‚ÇÅ =   0.039, œµ·µè =   0.007
k = 3,	rank(X·µè) = 40,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.031, œÉ‚ÇÅ =   0.047, œµ·µè =   0.005
k = 4,	rank(X·µè) = 36,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.030, œÉ‚ÇÅ =   0.050, œµ·µè =   0.004
k = 5,	rank(X·µè) = 31,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.030, œÉ‚ÇÅ =   0.052, œµ·µè =   0.003
k = 6,	rank(X·µè) = 27,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.030, œÉ‚ÇÅ =   0.053, œµ·µè =   0.002
k = 7,	rank(X·µè) = 21,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.030, œÉ‚ÇÅ =   0.054, œµ·µè =   0.002
k = 8,	rank(X·µè) = 17,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.030, œÉ‚ÇÅ =   0.054, œµ·µè =   0.002
k = 9,	rank(X·µè) = 14,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.030, œÉ‚ÇÅ =   0.055, œµ·µè =   0.002
k = 10,	rank(X·µè) = 12,	‚ÄñX·¥≥·µÄ - X·µè‚Äñ‚ÇÇ =   0.030, œÉ‚ÇÅ =   0.055, œµ·