-
Notifications
You must be signed in to change notification settings - Fork 9
/
pca.jl
30 lines (25 loc) · 976 Bytes
/
pca.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""
PCA <: DimensionReducer
tol :: Float64
Use SVD to compute the PCA of the design matrix of descriptors. (using Force descriptors TBA)
If tol is a float then the number of components to keep is determined by the smallest n such that relative percentage of variance explained by keeping the leading n principle components is greater than 1 - tol. If tol is an int, then we return the components corresponding to the tol largest eigenvalues.
"""
struct PCA{T<:Real} <: DimensionReducer
tol::T
end
function PCA(; tol = 0.01)
PCA(tol)
end
"""
fit(ds::DataSet, pca::PCA)
Fits a linear dimension reduction routine using PCA on the global descriptors in the dataset ds.
"""
function fit(ds::DataSet, pca::PCA)
d = try
[sum(get_values(get_local_descriptors(di))) for di in ds]
catch
error("No local descriptors found in DataSet")
end
# dc = [di - mean(d) for di in d]
λ, W = select_eigendirections(d, pca.tol)
end