Skip to content

Commit b938b8a

Browse files
author
Viktor Petukhov
committed
Renamed eigSVD, fixed types, exact_svd parameter
1 parent a5f2dc1 commit b938b8a

File tree

4 files changed

+47
-28
lines changed

4 files changed

+47
-28
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ authors = ["Viktor Petukhov <viktor.petukhov+gh@pm.me> and contributors"]
44
version = "0.1.0"
55

66
[deps]
7-
IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
7+
KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
88
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
99
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1010
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1111

1212
[compat]
13-
IterativeSolvers = "0.9"
1413
julia = "1"
14+
KrylovKit = "0.6"
1515

1616
[extras]
1717
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
Fast randomized PCA algorithms for Sparse Data. It's a julia re-implementation of Matlab [frPCA_sparse](https://github.com/XuFengthucs/frPCA_sparse).
66

7-
The package includes an implementation of two functions `eigSVD` and `pca`, which are designed to work with sparse matrices (`SparseMatrixCSC`), though can also be applied with normal ones (`Matrix`).
7+
The package includes an implementation of two functions `eig_svd` and `pca`, which are designed to work with sparse matrices (`SparseMatrixCSC`), though can also be applied with normal ones (`Matrix`).
88

99
## References
1010

src/FastRandPCA.jl

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,39 @@
11
module FastRandPCA
22

3-
import IterativeSolvers: lobpcg
3+
using KrylovKit: svdsolve, KrylovDefaults
44

55
using SparseArrays
66
using LinearAlgebra
77
using Random
88

9-
export eigSVD, pca
9+
export eig_svd, pca
1010

1111
"""
12-
Perform truncated singular value decomposition of a matrix `A` to return `k` first values.
12+
`eig_svd(A, [k]; [kwargs...])`
1313
14-
Designed to work with sparse matrices. If `k` is not specified, all values are returned.
14+
Perform singular value decomposition of a matrix `A` by estimating `svd(A' * A)`. If `k` is provided,
15+
Krylov iterative solver is used to return only `k` largest singular values and corresponding singular vectors.
16+
17+
Designed to work with sparse matrices. Keyword arguments are forwarded to `KrylovKit.svdsolve`.
1518
1619
# Examples
20+
1721
```julia-repl
1822
using SparseArrays
19-
eigSVD(sprand(100, 100, 0.2), 10)
23+
eig_svd(sprand(100, 100, 0.2), 10)
2024
```
2125
"""
22-
function eigSVD(A::AbstractMatrix{<:Number}, k::Int=-1)
26+
function eig_svd(A::AbstractMatrix{<:Number}, k::Int=-1; krylovdim::Int=-1, kwargs...)
2327
B = A' * A;
24-
if k == -1
28+
if (k == -1) || (k == size(B, 1))
2529
res = eigen(Matrix(B))
2630
V, λ = res.vectors, res.values
2731
else
28-
r = lobpcg(B, true, k)
29-
V, λ = r.X, r.λ
32+
if krylovdim <= 0
33+
krylovdim = max(KrylovDefaults.krylovdim, k + 2)
34+
end
35+
r = svdsolve(B, size(B, 1), k, krylovdim=krylovdim, kwargs...)
36+
V, λ = hcat(r[2]...), r[1]
3037
end
3138

3239
λ[λ .< 0] .= 0 # Some of zero values become negative due to numerical errors
@@ -36,49 +43,59 @@ function eigSVD(A::AbstractMatrix{<:Number}, k::Int=-1)
3643
return SVD(U, d, V)
3744
end
3845

46+
svd_wrap(A::AbstractMatrix{<:Number}, exact_svd::Bool) = (exact_svd ? svd(A) : eig_svd(A))
47+
3948
"""
49+
`pca(A, k; [q=10], [exact_svd=false], [s=5])`
50+
4051
Perform fast randomized PCA of a matrix `A` to return `k` first components.
4152
4253
Designed to work with sparse matrices.
4354
44-
Parameter `q` is the number of pass over `A`, `q` should larger than 1 and `q` times pass eqauls to `(q-2)/2` times power iteration.
55+
# Keyword arguments
56+
57+
- `exact_svd` - if `true`, use exact SVD on random projections, otherwise use `eig_svd` approximation.
58+
Setting it to `true` would improve precision of the algorithm on small eigenvalues, but would drastically decrease performance
59+
on large matrices (>1M rows).
60+
- `q` is the number of pass over `A`, `q` should larger than 1 and `q` times pass eqauls to `(q-2)/2` times power iteration
61+
- `s` is the excess dimension for the random matrix. This parameter is not supposed to be changed normally.
4562
4663
# Examples
64+
4765
```julia-repl
4866
using SparseArrays
4967
pca(sprand(100, 100, 0.2), 10; q=3)
5068
```
5169
"""
52-
function pca(A::AbstractMatrix{<:Number}, k::Int; q::Int=10)
70+
function pca(A::AbstractMatrix{<:Number}, k::Int; q::Int=10, exact_svd::Bool=false, s=5)
5371
q >= 2 || error("Pass parameter q must be larger than 1 !");
5472

55-
s = 5;
5673
if size(A, 1) > size(A, 2)
5774
r = pca(A', k; q=q)
5875
return SVD(Matrix(r.Vt), r.S, Matrix(r.U))
5976
end
6077

6178
m,n = size(A);
6279
if q % 2 == 0
63-
Q = randn(n, k+s);
80+
Q = randn(promote_type(Float32, eltype(A)), n, k+s);
6481
Q = A*Q;
6582
if q == 2
66-
Q = eigSVD(Q).U;
83+
Q = svd_wrap(Q, exact_svd).U;
6784
else
6885
Q = lu(Q).L;
6986
end
7087
else
71-
Q = randn(m, k+s);
88+
Q = randn(promote_type(Float32, eltype(A)), m, k+s);
7289
end
7390
upper = floor((q-1)/2);
7491
for i = 1:upper
7592
if i == upper
76-
Q = eigSVD(A*(A'*Q)).U;
93+
Q = svd_wrap(A*(A'*Q), exact_svd).U;
7794
else
7895
Q = lu(A*(A'*Q)).L;
7996
end
8097
end
81-
V,S,U = eigSVD(A'*Q);
98+
V,S,U = svd_wrap(A'*Q, exact_svd);
8299
ind = s+1:k+s;
83100
U = (Q * U[:, ind])';
84101
V = V[:, ind]';

test/runtests.jl

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@ using SparseArrays
66
for (n,m) in [[100, 20], [20, 100], [1000, 1000]]
77
mat = sprand(n, m, 0.2)
88

9-
eigs = eigSVD(mat)
9+
eigs = eig_svd(mat)
1010
@test all(size(eigs.V) .== size(mat, 2))
1111
@test length(eigs.S) == size(mat, 2)
1212
@test all(size(eigs.U) .== size(mat))
1313

1414
k = 10
15-
pcs = pca(mat, k; q=3)
16-
@test size(pcs.Vt, 2) == size(mat, 2)
17-
@test size(pcs.Vt, 1) == k
18-
@test length(pcs.S) == k
19-
@test size(pcs.U, 2) == size(mat, 1)
20-
@test size(pcs.U, 1) == k
21-
@test all(size(pcs.U * mat) .== size(pcs.Vt))
15+
for exact in [false, true]
16+
pcs = pca(mat, k; q=3, exact_svd=exact)
17+
@test size(pcs.Vt, 2) == size(mat, 2)
18+
@test size(pcs.Vt, 1) == k
19+
@test length(pcs.S) == k
20+
@test size(pcs.U, 2) == size(mat, 1)
21+
@test size(pcs.U, 1) == k
22+
@test all(size(pcs.U * mat) .== size(pcs.Vt))
23+
end
2224
end
2325
end

0 commit comments

Comments
 (0)