Skip to content
This repository has been archived by the owner on Nov 24, 2018. It is now read-only.

Commit

Permalink
Add dorgqr and tests, update dorg2r tests, and fix cgo dorgqr
Browse files Browse the repository at this point in the history
  • Loading branch information
btracey committed Oct 27, 2015
1 parent 676338c commit be045bb
Show file tree
Hide file tree
Showing 12 changed files with 520 additions and 36 deletions.
77 changes: 66 additions & 11 deletions cgo/lapack.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ const (
badWorkStride = "lapack: insufficient working array stride"
kGTM = "lapack: k > m"
kGTN = "lapack: k > n"
kLT0 = "lapack: k < 0"
mLTN = "lapack: m < n"
negDimension = "lapack: negative matrix dimension"
nLT0 = "lapack: n < 0"
nLTM = "lapack: n < m"
shortWork = "lapack: working array shorter than declared"
)

Expand Down Expand Up @@ -465,34 +467,87 @@ func (impl Implementation) Dgetrs(trans blas.Transpose, n, nrhs int, a []float64
clapack.Dgetrs(trans, n, nrhs, a, lda, ipiv32, b, ldb)
}

func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64) {
// Dorglq generates an m×n matrix Q with orthonormal rows defined by the
// product of elementary reflectors as computed by Dgelqf.
// Q = H(0) * H(2) * ... * H(k-1)
// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS
// routines.
//
// len(tau) >= k, 0 <= k <= n, and 0 <= m <= n.
//
// Work is temporary storage, and lwork specifies the usable memory length.
// The C interface does not support providing temporary storage. To provide compatibility
// with native, lwork == -1 will not run Dorglq but will instead write the minimum
// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum
// lwork >= m.
//
// Dorgqr will panic if the conditions on input values are not met.
func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
if lwork == -1 {
work[0] = float64(m)
return
}
checkMatrix(m, n, a, lda)
if len(tau) < k {
panic(badTau)
if k < 0 {
panic(kLT0)
}
if k > m {
panic(kGTM)
}
if k > m {
panic(kGTM)
if m > n {
panic(nLTM)
}
clapack.Dorglq(m, n, k, a, lda, tau)
}

func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64) {
checkMatrix(m, n, a, lda)
if len(tau) < k {
panic(badTau)
}
if len(work) < n {
if len(work) < lwork {
panic(shortWork)
}
if lwork < m {
panic(badWork)
}
clapack.Dorglq(m, n, k, a, lda, tau)
}

// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
// product of elementary reflectors as computed by Dgeqrf.
// Q = H(0) * H(2) * ... * H(k-1)
// Dorgqr is the blocked version of dorg2r that makes greater use of level-3 BLAS
// routines.
//
// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m.
//
// Work is temporary storage, and lwork specifies the usable memory length.
// The C interface does not support providing temporary storage. To provide compatibility
// with native, lwork == -1 will not run Dorgqr but will instead write the minimum
// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum
// lwork >= n.
//
// Dorgqr will panic if the conditions on input values are not met.
func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
if lwork == -1 {
work[0] = float64(n)
return
}
checkMatrix(m, n, a, lda)
if k < 0 {
panic(kLT0)
}
if k > n {
panic(kGTN)
}
if n > m {
panic(mLTN)
}
if len(tau) < k {
panic(badTau)
}
if len(work) < lwork {
panic(shortWork)
}
if lwork < n {
panic(badWork)
}
clapack.Dorgqr(m, n, k, a, lda, tau)
}

Expand Down
14 changes: 11 additions & 3 deletions cgo/lapack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,26 @@ func (d blockedTranslate) Dorml2(side blas.Side, trans blas.Transpose, m, n, k i
}

func (d blockedTranslate) Dorg2r(m, n, k int, a []float64, lda int, tau, work []float64) {
impl.Dorgqr(m, n, k, a, lda, tau, work)
impl.Dorgqr(m, n, k, a, lda, tau, work, len(work))
}

func (d blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) {
impl.Dorglq(m, n, k, a, lda, tau, work)
impl.Dorglq(m, n, k, a, lda, tau, work, len(work))
}

func TestDorglq(t *testing.T) {
testlapack.Dorgl2Test(t, blockedTranslate{impl})
testlapack.DorglqTest(t, blockedTranslate{impl})
}

func TestDorgqr(t *testing.T) {
testlapack.DorgqrTest(t, blockedTranslate{impl})
}

func TestDorgl2(t *testing.T) {
testlapack.Dorgl2Test(t, blockedTranslate{impl})
}

func TestDorg2r(t *testing.T) {
testlapack.Dorg2rTest(t, blockedTranslate{impl})
}

Expand Down
10 changes: 6 additions & 4 deletions native/dorg2r.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@ import (
"github.com/gonum/blas/blas64"
)

// Dorg2r generates an m×n matrix Q with orthonormal columns defined by the
// Dorgl2 generates an m×n matrix Q with orthonormal columns defined by the
// product of elementary reflectors as computed by Dgeqrf.
// Q = H(0) * H(2) * ... * H(k-1)
// The length of tau must be equal to k, and the length of work must be at least n.
// It also must be that 0 <= k <= n. m >= n >= 0. Dorg2r will panic if these
// conditions are not met.
// len(tau) >= k, 0 <= k <= n, 0 <= n <= m, len(work) >= n.
// Dorg2r will panic if these conditions are not met.
func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float64, work []float64) {
checkMatrix(m, n, a, lda)
if len(tau) < k {
Expand All @@ -29,6 +28,9 @@ func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float
if n > m {
panic(mLTN)
}
if len(work) < n {
panic(badWork)
}
if n == 0 {
return
}
Expand Down
11 changes: 8 additions & 3 deletions native/dorgl2.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ import (
// Dorgl2 generates an m×n matrix Q with orthonormal rows defined by the
// first m rows product of elementary reflectors as computed by Dgelqf.
// Q = H(0) * H(2) * ... * H(k-1)
// The length of tau must be equal to k, and the length of work must be at least n.
// It also must be that 0 <= k <= m. n >= m >= 0. Dorgl2 will panic if these
// conditions are not met.
// len(tau) >= k, 0 <= k <= m, 0 <= m <= n, len(work) >= m.
// Dorgl2 will panic if these conditions are not met.
func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) {
checkMatrix(m, n, a, lda)
if len(tau) < k {
Expand All @@ -26,6 +25,12 @@ func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work [
if k > m {
panic(kGTM)
}
if m > n {
panic(nLTM)
}
if len(work) < m {
panic(badWork)
}
if m == 0 {
return
}
Expand Down
111 changes: 111 additions & 0 deletions native/dorglq.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package native

import (
"github.com/gonum/blas"
"github.com/gonum/lapack"
)

// Dorglq generates an m×n matrix Q with orthonormal columns defined by the
// product of elementary reflectors as computed by Dgelqf.
// Q = H(0) * H(2) * ... * H(k-1)
// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS
// routines.
//
// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m.
//
// Work is temporary storage, and lwork specifies the usable memory length. At minimum,
// lwork >= m, and the amount of blocking is limited by the usable length.
// If lwork == -1, instead of computing Dorglq the optimal work length is stored
// into work[0].
//
// Dorglq will panic if the conditions on input values are not met.
func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1)
// work is treated as an n×nb matrix
if lwork == -1 {
work[0] = float64(max(1, m) * nb)
return
}
checkMatrix(m, n, a, lda)
if k < 0 {
panic(kLT0)
}
if k > m {
panic(kGTM)
}
if m > n {
panic(nLTM)
}
if len(tau) < k {
panic(badTau)
}
if len(work) < lwork {
panic(shortWork)
}
if lwork < m {
panic(badWork)
}
if m == 0 {
return
}
nbmin := 2 // Minimum number of blocks
var nx int // Minimum number of rows
iws := m // Length of work needed
var ldwork int
if nb > 1 && nb < k {
nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1))
if nx < k {
ldwork = nb
iws = m * ldwork
if lwork < iws {
nb = lwork / m
ldwork = nb
nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1))
}
}
}
var ki, kk int
if nb >= nbmin && nb < k && nx < k {
// The first kk rows are handled by the blocked method.
// Note: lapack has nx here, but this means the last nx rows are handled
// serially which could be quite different than nb.
ki = ((k - nb - 1) / nb) * nb
kk = min(k, ki+nb)
for i := kk; i < m; i++ {
for j := 0; j < kk; j++ {
a[i*lda+j] = 0
}
}
}
if kk < m {
// Perform the operation on colums kk to the end.
impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
}
if kk == 0 {
return
}
// Perform the operation on column-blocks
for i := ki; i >= 0; i -= nb {
ib := min(nb, k-i)
if i+ib < m {
impl.Dlarft(lapack.Forward, lapack.RowWise,
n-i, ib,
a[i*lda+i:], lda,
tau[i:],
work, ldwork)

impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise,
m-i-ib, n-i, ib,
a[i*lda+i:], lda,
work, ldwork,
a[(i+ib)*lda+i:], lda,
work[ib*ldwork:], ldwork)
}
impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work)
for l := i; l < i+ib; l++ {
for j := 0; j < i; j++ {
a[l*lda+j] = 0
}
}
}
}
Loading

0 comments on commit be045bb

Please sign in to comment.