From 0331cab04a2fdad0a210263069fa143f6f7d038f Mon Sep 17 00:00:00 2001 From: btracey Date: Mon, 3 Aug 2015 10:21:37 -0600 Subject: [PATCH 1/2] Add QR factorization to lapack64 interface. --- cgo/lapack.go | 65 ++++++++++++++++++++++++++++++++++++++++++++ cgo/lapack_test.go | 8 ++++++ lapack.go | 1 + lapack64/lapack64.go | 26 ++++++++++++++++++ native/dgeqr2.go | 2 +- testlapack/dgeqrf.go | 5 +++- 6 files changed, 105 insertions(+), 2 deletions(-) diff --git a/cgo/lapack.go b/cgo/lapack.go index cc5e0ef..c76dcb1 100644 --- a/cgo/lapack.go +++ b/cgo/lapack.go @@ -77,6 +77,71 @@ func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok return clapack.Dpotrf(ul, n, a, lda) } +// Dgeqr2 computes a QR factorization of the m×n matrix A. +// +// In a QR factorization, Q is an m×m orthonormal matrix, and R is an +// upper triangular m×n matrix. +// +// During Dgeqr2, a is modified to contain the information to construct Q and R. +// The upper triangle of a contains the matrix R. The lower triangular elements +// (not including the diagonal) contain the elementary reflectors. Tau is modified +// to contain the reflector scales. Tau must have length at least k = min(m,n), and +// this function will panic otherwise. +// +// The ith elementary reflector can be explicitly constructed by first extracting +// the +// v[j] = 0 j < i +// v[j] = i j == i +// v[j] = a[i*lda+j] j > i +// and computing h_i = I - tau[i] * v * v^T. +// +// The orthonormal matrix Q can be constucted from a product of these elementary +// reflectors, Q = H_1*H_2 ... H_k, where k = min(m,n). +// +// Work is temporary storage of length at least n and this function will panic otherwise. +func (impl Implementation) Dgeqr2(m, n int, a []float64, lda int, tau, work []float64) { + // TODO(btracey): This is oriented such that columns of a are eliminated. + // This likely could be re-arranged to take better advantage of row-major + // storage. + checkMatrix(m, n, a, lda) + if len(work) < n { + panic(badWork) + } + k := min(m, n) + if len(tau) < k { + panic(badTau) + } + clapack.Dgeqr2(m, n, a, lda, tau) +} + +// Dgeqrf computes the QR factorization of the m×n matrix A using a blocked +// algorithm. Please see the documentation for Dgeqr2 for a description of the +// parameters at entry and exit. +// +// The C interface does not support providing temporary storage. To provide compatibility +// with native, lwork == -1 will not run Dgeqrf but will instead write the minimum +// work necessary to work[0]. If len(work) < lwork, Dgels will panic. +// +// tau must be at least len min(m,n), and this function will panic otherwise. +func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) { + if lwork == -1 { + work[0] = float64(n) + return + } + checkMatrix(m, n, a, lda) + if len(work) < lwork { + panic(shortWork) + } + if lwork < n { + panic(badWork) + } + k := min(m, n) + if len(tau) < k { + panic(badTau) + } + clapack.Dgeqrf(m, n, a, lda, tau) +} + // Dgetf2 computes the LU decomposition of the m×n matrix A. // The LU decomposition is a factorization of a into // A = P * L * U diff --git a/cgo/lapack_test.go b/cgo/lapack_test.go index 7a69d7e..9f96c5a 100644 --- a/cgo/lapack_test.go +++ b/cgo/lapack_test.go @@ -16,6 +16,14 @@ func TestDpotrf(t *testing.T) { testlapack.DpotrfTest(t, impl) } +func TestDgeqr2(t *testing.T) { + testlapack.Dgeqr2Test(t, impl) +} + +func TestDgeqrf(t *testing.T) { + testlapack.DgeqrfTest(t, impl) +} + func TestDgetf2(t *testing.T) { testlapack.Dgetf2Test(t, impl) } diff --git a/lapack.go b/lapack.go index f6fbb51..4ca3372 100644 --- a/lapack.go +++ b/lapack.go @@ -23,6 +23,7 @@ type Complex128 interface{} // Float64 defines the public float64 LAPACK API supported by gonum/lapack. type Float64 interface { + Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok bool) } diff --git a/lapack64/lapack64.go b/lapack64/lapack64.go index fe13d58..c2d58eb 100644 --- a/lapack64/lapack64.go +++ b/lapack64/lapack64.go @@ -47,3 +47,29 @@ func Potrf(a blas64.Symmetric) (t blas64.Triangular, ok bool) { t.Diag = blas.NonUnit return } + +// Geqrf computes the QR factorization of the m×n matrix A using a blocked +// algorithm. During Dgeqr2, A is modified to contain the information to construct Q and R. +// The upper triangle of a contains the matrix R. The lower triangular elements +// (not including the diagonal) contain the elementary reflectors. Tau is modified +// to contain the reflector scales. Tau must have length at least k = min(m,n), and +// this function will panic otherwise. +// +// The ith elementary reflector can be explicitly constructed by first extracting +// the +// v[j] = 0 j < i +// v[j] = i j == i +// v[j] = a[i*lda+j] j > i +// and computing h_i = I - tau[i] * v * v^T. +// +// The orthonormal matrix Q can be constucted from a product of these elementary +// reflectors, Q = H_1*H_2 ... H_k, where k = min(m,n). +// +// Work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= m and this function will panic otherwise. +// Dgeqrf is a blocked LQ factorization, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Dgelqf, +// the optimal work length will be stored into work[0]. +func Geqrf(a blas64.General, tau, work []float64, lwork int) { + lapack64.Dgeqrf(a.Rows, a.Cols, a.Data, a.Stride, tau, work, lwork) +} diff --git a/native/dgeqr2.go b/native/dgeqr2.go index efae4a7..a1bb4d7 100644 --- a/native/dgeqr2.go +++ b/native/dgeqr2.go @@ -6,7 +6,7 @@ package native import "github.com/gonum/blas" -// Dgeqr2 computes a QR factorization of the m×n matrix a. +// Dgeqr2 computes a QR factorization of the m×n matrix A. // // In a QR factorization, Q is an m×m orthonormal matrix, and R is an // upper triangular m×n matrix. diff --git a/testlapack/dgeqrf.go b/testlapack/dgeqrf.go index c596415..cbd5ba4 100644 --- a/testlapack/dgeqrf.go +++ b/testlapack/dgeqrf.go @@ -66,7 +66,7 @@ func DgeqrfTest(t *testing.T, impl Dgeqrfer) { impl.Dgeqr2(m, n, ans, lda, tau, work) // Compute blocked QR with small work. impl.Dgeqrf(m, n, a, lda, tau, work, len(work)) - if !floats.EqualApprox(ans, a, 1e-14) { + if !floats.EqualApprox(ans, a, 1e-12) { t.Errorf("Case %v, mismatch small work.", c) } // Try the full length of work. @@ -80,6 +80,9 @@ func DgeqrfTest(t *testing.T, impl Dgeqrfer) { } // Try a slightly smaller version of work to test blocking. + if len(work) <= n { + continue + } work = work[1:] lwork-- copy(a, aCopy) From aa76326a1e015d74b7ff4a1d60faa56fbde858ba Mon Sep 17 00:00:00 2001 From: btracey Date: Mon, 3 Aug 2015 16:59:00 -0600 Subject: [PATCH 2/2] Responded to PR comments --- cgo/lapack.go | 8 ++++---- lapack64/lapack64.go | 4 ++-- native/dgeqr2.go | 4 ++-- native/dgeqrf.go | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cgo/lapack.go b/cgo/lapack.go index c76dcb1..ef92d75 100644 --- a/cgo/lapack.go +++ b/cgo/lapack.go @@ -82,10 +82,10 @@ func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok // In a QR factorization, Q is an m×m orthonormal matrix, and R is an // upper triangular m×n matrix. // -// During Dgeqr2, a is modified to contain the information to construct Q and R. +// A is modified to contain the information to construct Q and R. // The upper triangle of a contains the matrix R. The lower triangular elements // (not including the diagonal) contain the elementary reflectors. Tau is modified -// to contain the reflector scales. Tau must have length at least k = min(m,n), and +// to contain the reflector scales. tau must have length at least min(m,n), and // this function will panic otherwise. // // The ith elementary reflector can be explicitly constructed by first extracting @@ -115,14 +115,14 @@ func (impl Implementation) Dgeqr2(m, n int, a []float64, lda int, tau, work []fl } // Dgeqrf computes the QR factorization of the m×n matrix A using a blocked -// algorithm. Please see the documentation for Dgeqr2 for a description of the +// algorithm. See the documentation for Dgeqr2 for a description of the // parameters at entry and exit. // // The C interface does not support providing temporary storage. To provide compatibility // with native, lwork == -1 will not run Dgeqrf but will instead write the minimum // work necessary to work[0]. If len(work) < lwork, Dgels will panic. // -// tau must be at least len min(m,n), and this function will panic otherwise. +// tau must have length at least min(m,n), and this function will panic otherwise. func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) { if lwork == -1 { work[0] = float64(n) diff --git a/lapack64/lapack64.go b/lapack64/lapack64.go index c2d58eb..1474153 100644 --- a/lapack64/lapack64.go +++ b/lapack64/lapack64.go @@ -49,10 +49,10 @@ func Potrf(a blas64.Symmetric) (t blas64.Triangular, ok bool) { } // Geqrf computes the QR factorization of the m×n matrix A using a blocked -// algorithm. During Dgeqr2, A is modified to contain the information to construct Q and R. +// algorithm. A is modified to contain the information to construct Q and R. // The upper triangle of a contains the matrix R. The lower triangular elements // (not including the diagonal) contain the elementary reflectors. Tau is modified -// to contain the reflector scales. Tau must have length at least k = min(m,n), and +// to contain the reflector scales. Tau must have length at least min(m,n), and // this function will panic otherwise. // // The ith elementary reflector can be explicitly constructed by first extracting diff --git a/native/dgeqr2.go b/native/dgeqr2.go index a1bb4d7..38678b6 100644 --- a/native/dgeqr2.go +++ b/native/dgeqr2.go @@ -11,10 +11,10 @@ import "github.com/gonum/blas" // In a QR factorization, Q is an m×m orthonormal matrix, and R is an // upper triangular m×n matrix. // -// During Dgeqr2, a is modified to contain the information to construct Q and R. +// A is modified to contain the information to construct Q and R. // The upper triangle of a contains the matrix R. The lower triangular elements // (not including the diagonal) contain the elementary reflectors. Tau is modified -// to contain the reflector scales. Tau must have length at least k = min(m,n), and +// to contain the reflector scales. tau must have length at least min(m,n), and // this function will panic otherwise. // // The ith elementary reflector can be explicitly constructed by first extracting diff --git a/native/dgeqrf.go b/native/dgeqrf.go index 4745a08..94c8b64 100644 --- a/native/dgeqrf.go +++ b/native/dgeqrf.go @@ -10,7 +10,7 @@ import ( ) // Dgeqrf computes the QR factorization of the m×n matrix A using a blocked -// algorithm. Please see the documentation for Dgeqr2 for a description of the +// algorithm. See the documentation for Dgeqr2 for a description of the // parameters at entry and exit. // // Work is temporary storage, and lwork specifies the usable memory length. @@ -19,7 +19,7 @@ import ( // by the temporary space available. If lwork == -1, instead of performing Dgelqf, // the optimal work length will be stored into work[0]. // -// tau must be at least len min(m,n), and this function will panic otherwise. +// tau must have length at least min(m,n), and this function will panic otherwise. func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) { // nb is the optimal blocksize, i.e. the number of columns transformed at a time. nb := impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1)