From 4ccaa88279bafe04a9e6111a62373782a00ab709 Mon Sep 17 00:00:00 2001 From: kortschak Date: Wed, 21 Dec 2016 16:02:10 +1030 Subject: [PATCH] stat: address PR comments --- cca_test.go | 63 +++++++++++++++++++++++++++++------------------------ pca_cca.go | 31 +++++++++++++------------- pca_test.go | 54 +++++++++++++++++++++++++++++---------------- 3 files changed, 86 insertions(+), 62 deletions(-) diff --git a/cca_test.go b/cca_test.go index ed32412..92c2dbf 100644 --- a/cca_test.go +++ b/cca_test.go @@ -13,6 +13,7 @@ import ( ) func TestCanonicalCorrelations(t *testing.T) { +tests: for i, test := range []struct { xdata mat64.Matrix ydata mat64.Matrix @@ -149,36 +150,42 @@ func TestCanonicalCorrelations(t *testing.T) { }, } { var cc stat.CC - err := cc.CanonicalCorrelations(test.xdata, test.ydata, test.weights) - if err != nil { - t.Errorf("%d: unexpected error: %v", i, err) - continue - } + var corrs []float64 + var pVecs, qVecs *mat64.Dense + var phiVs, psiVs *mat64.Dense + for j := 0; j < 2; j++ { + err := cc.CanonicalCorrelations(test.xdata, test.ydata, test.weights) + if err != nil { + t.Errorf("%d use %d: unexpected error: %v", i, j, err) + continue tests + } - corrs := cc.Corrs(nil) - pVecs := cc.Left(nil, true) - qVecs := cc.Right(nil, true) - phiVs := cc.Left(nil, false) - psiVs := cc.Right(nil, false) + corrs = cc.Corrs(corrs) + pVecs = cc.Left(pVecs, true) + qVecs = cc.Right(qVecs, true) + phiVs = cc.Left(phiVs, false) + psiVs = cc.Right(psiVs, false) - if !floats.EqualApprox(corrs, test.wantCorrs, test.epsilon) { - t.Errorf("%d: unexpected variance result got:%v, want:%v", i, corrs, test.wantCorrs) - } - if !mat64.EqualApprox(pVecs, test.wantpVecs, test.epsilon) { - t.Errorf("%d: unexpected CCA result got:\n%v\nwant:\n%v", - i, mat64.Formatted(pVecs), mat64.Formatted(test.wantpVecs)) - } - if !mat64.EqualApprox(qVecs, test.wantqVecs, test.epsilon) { - t.Errorf("%d: unexpected CCA result got:\n%v\nwant:\n%v", - i, mat64.Formatted(qVecs), mat64.Formatted(test.wantqVecs)) - } - if !mat64.EqualApprox(phiVs, test.wantphiVs, test.epsilon) { - t.Errorf("%d: unexpected CCA result got:\n%v\nwant:\n%v", - i, mat64.Formatted(phiVs), mat64.Formatted(test.wantphiVs)) - } - if !mat64.EqualApprox(psiVs, test.wantpsiVs, test.epsilon) { - t.Errorf("%d: unexpected CCA result got:\n%v\nwant:\n%v", - i, mat64.Formatted(psiVs), mat64.Formatted(test.wantpsiVs)) + if !floats.EqualApprox(corrs, test.wantCorrs, test.epsilon) { + t.Errorf("%d use %d: unexpected variance result got:%v, want:%v", + i, j, corrs, test.wantCorrs) + } + if !mat64.EqualApprox(pVecs, test.wantpVecs, test.epsilon) { + t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v", + i, j, mat64.Formatted(pVecs), mat64.Formatted(test.wantpVecs)) + } + if !mat64.EqualApprox(qVecs, test.wantqVecs, test.epsilon) { + t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v", + i, j, mat64.Formatted(qVecs), mat64.Formatted(test.wantqVecs)) + } + if !mat64.EqualApprox(phiVs, test.wantphiVs, test.epsilon) { + t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v", + i, j, mat64.Formatted(phiVs), mat64.Formatted(test.wantphiVs)) + } + if !mat64.EqualApprox(psiVs, test.wantpsiVs, test.epsilon) { + t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v", + i, j, mat64.Formatted(psiVs), mat64.Formatted(test.wantpsiVs)) + } } } } diff --git a/pca_cca.go b/pca_cca.go index 79c08f5..6de8947 100644 --- a/pca_cca.go +++ b/pca_cca.go @@ -13,9 +13,9 @@ import ( "github.com/gonum/matrix/mat64" ) -// PC is the result of a principal components analysis. The results of a principal -// components analysis held by a PC are only valid after a call to PrincipalComponents -// has returned true. +// PC is a type for computing and extracting the principal components of a +// matrix. The results of the principal components analysis are only valid +// if the call to PrincipalComponents was successful. type PC struct { n, d int weights []float64 @@ -29,11 +29,11 @@ type PC struct { // // PrincipalComponents centers the variables but does not scale the variance. // -// The slice weights is used to weight the observations. If weights is nil, each +// The weights slice is used to weight the observations. If weights is nil, each // weight is considered to have a value of one, otherwise the length of weights // must match the number of observations or PrincipalComponents will panic. // -// PrincipleComponents returns whether the analysis was successful. +// PrincipalComponents returns whether the analysis was successful. func (c *PC) PrincipalComponents(a mat64.Matrix, weights []float64) (ok bool) { c.n, c.d = a.Dims() if weights != nil && len(weights) != c.n { @@ -48,17 +48,17 @@ func (c *PC) PrincipalComponents(a mat64.Matrix, weights []float64) (ok bool) { } // Vectors returns the component direction vectors of a principal components -// analysis. The vectors are returned in the columns of a d×n matrix. -// If dst is not nil it must either be zero-sized or be a d×n matrix. dst will -// be used as the destination for the direction vector data. If dst is nil, a -// new mat64.Dense is allocated for the destination. +// analysis. The vectors are returned in the columns of a d×min(n, d) matrix. +// If dst is not nil it must either be zero-sized or be a d×min(n, d) matrix. +// dst will be used as the destination for the direction vector data. If dst +// is nil, a new mat64.Dense is allocated for the destination. func (c *PC) Vectors(dst *mat64.Dense) *mat64.Dense { if !c.ok { panic("stat: use of unsuccessful principal components analysis") } if dst == nil { dst = &mat64.Dense{} - } else if d, n := dst.Dims(); (n != 0 || d != 0) && (n != c.n || d != c.d) { + } else if d, n := dst.Dims(); (n != 0 || d != 0) && (d != c.d || n != min(c.n, c.d)) { panic(matrix.ErrShape) } dst.VFromSVD(c.svd) @@ -66,9 +66,9 @@ func (c *PC) Vectors(dst *mat64.Dense) *mat64.Dense { } // Vars returns the column variances of the principal component scores, -// b * vecs where b is a matrix with centered columns. Variances are returned -// in descending sort order. -// If dst is not nil is is used to store the variances and returned. +// b * vecs, where b is a matrix with centered columns. Variances are returned +// in descending order. +// If dst is not nil it is used to store the variances and returned. // Vars will panic if the receiver has not successfully performed a principal // components analysis or dst is not nil and the length of dst is not min(n, d). func (c *PC) Vars(dst []float64) []float64 { @@ -98,8 +98,9 @@ func min(a, b int) int { return b } -// CC is the result of a canonical correlation analysis. A CC should not be used -// unless it has been returned by a successful call to CanonicalCorrelations. +// CC is a type for computing the canonical correlations of a pair of matrices. +// The results of the canonical correlation analysis are only valid +// if the call to CanonicalCorrelations was successful. type CC struct { // n is the number of observations used to // construct the canonical correlations. diff --git a/pca_test.go b/pca_test.go index 48d18d1..06618f5 100644 --- a/pca_test.go +++ b/pca_test.go @@ -12,6 +12,7 @@ import ( ) func TestPrincipalComponents(t *testing.T) { +tests: for i, test := range []struct { data mat64.Matrix weights []float64 @@ -56,6 +57,21 @@ func TestPrincipalComponents(t *testing.T) { wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329}, epsilon: 1e-12, }, + { // Truncated iris data to form wide matrix. + data: mat64.NewDense(3, 4, []float64{ + 5.1, 3.5, 1.4, 0.2, + 4.9, 3.0, 1.4, 0.2, + 4.7, 3.2, 1.3, 0.2, + }), + wantVecs: mat64.NewDense(4, 3, []float64{ + -0.5705187254552365, -0.7505979435049239, 0.08084520834544455, + -0.8166537769529318, 0.5615147645527523, -0.032338083338177705, + -0.08709186238359454, -0.3482870890450082, -0.22636658336724505, + 0, 0, -0.9701425001453315, + }), + wantVars: []float64{0.0844692361537822, 0.022197430512884326, 0}, + epsilon: 1e-12, + }, { // Truncated iris data transposed to check for operation on fat input. data: mat64.NewDense(10, 4, []float64{ 5.1, 3.5, 1.4, 0.2, @@ -131,29 +147,29 @@ func TestPrincipalComponents(t *testing.T) { epsilon: 1e-12, }, } { - vecs, vars, ok := principalComponents(test.data, test.weights) - if !ok { - t.Errorf("unexpected SVD failure for test %d", i) - continue - } - if !mat64.EqualApprox(vecs, test.wantVecs, test.epsilon) { - t.Errorf("%d: unexpected PCA result got:\n%v\nwant:\n%v", - i, mat64.Formatted(vecs), mat64.Formatted(test.wantVecs)) - } - if !approxEqual(vars, test.wantVars, test.epsilon) { - t.Errorf("%d: unexpected variance result got:%v, want:%v", i, vars, test.wantVars) + var pc PC + var vecs *mat64.Dense + var vars []float64 + for j := 0; j < 2; j++ { + ok := pc.PrincipalComponents(test.data, test.weights) + vecs = pc.Vectors(vecs) + vars = pc.Vars(vars) + if !ok { + t.Errorf("unexpected SVD failure for test %d use %d", i, j) + continue tests + } + if !mat64.EqualApprox(vecs, test.wantVecs, test.epsilon) { + t.Errorf("%d use %d: unexpected PCA result got:\n%v\nwant:\n%v", + i, j, mat64.Formatted(vecs), mat64.Formatted(test.wantVecs)) + } + if !approxEqual(vars, test.wantVars, test.epsilon) { + t.Errorf("%d use %d: unexpected variance result got:%v, want:%v", + i, j, vars, test.wantVars) + } } } } -func principalComponents(a mat64.Matrix, weights []float64) (vecs *mat64.Dense, vars []float64, ok bool) { - var pc PC - if !pc.PrincipalComponents(a, weights) { - return nil, nil, false - } - return pc.Vectors(nil), pc.Vars(nil), true -} - func approxEqual(a, b []float64, epsilon float64) bool { if len(a) != len(b) { return false