From be045bbf9ac3b28e7bfb00e90d85e6909ad900e2 Mon Sep 17 00:00:00 2001 From: btracey Date: Tue, 27 Oct 2015 11:36:46 -0600 Subject: [PATCH] Add dorgqr and tests, update dorg2r tests, and fix cgo dorgqr --- cgo/lapack.go | 77 ++++++++++++++++++++++++---- cgo/lapack_test.go | 14 +++-- native/dorg2r.go | 10 ++-- native/dorgl2.go | 11 ++-- native/dorglq.go | 111 ++++++++++++++++++++++++++++++++++++++++ native/dorgqr.go | 115 ++++++++++++++++++++++++++++++++++++++++++ native/general.go | 2 + native/lapack_test.go | 24 ++++++--- testlapack/dorg2r.go | 20 +++++--- testlapack/dorglq.go | 82 ++++++++++++++++++++++++++++++ testlapack/dorgqr.go | 82 ++++++++++++++++++++++++++++++ testlapack/general.go | 8 ++- 12 files changed, 520 insertions(+), 36 deletions(-) create mode 100644 native/dorglq.go create mode 100644 native/dorgqr.go create mode 100644 testlapack/dorglq.go create mode 100644 testlapack/dorgqr.go diff --git a/cgo/lapack.go b/cgo/lapack.go index 2b11de8..4e67841 100644 --- a/cgo/lapack.go +++ b/cgo/lapack.go @@ -30,9 +30,11 @@ const ( badWorkStride = "lapack: insufficient working array stride" kGTM = "lapack: k > m" kGTN = "lapack: k > n" + kLT0 = "lapack: k < 0" mLTN = "lapack: m < n" negDimension = "lapack: negative matrix dimension" nLT0 = "lapack: n < 0" + nLTM = "lapack: n < m" shortWork = "lapack: working array shorter than declared" ) @@ -465,34 +467,87 @@ func (impl Implementation) Dgetrs(trans blas.Transpose, n, nrhs int, a []float64 clapack.Dgetrs(trans, n, nrhs, a, lda, ipiv32, b, ldb) } -func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64) { +// Dorglq generates an m×n matrix Q with orthonormal rows defined by the +// product of elementary reflectors as computed by Dgelqf. +// Q = H(0) * H(2) * ... * H(k-1) +// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS +// routines. +// +// len(tau) >= k, 0 <= k <= n, and 0 <= m <= n. +// +// Work is temporary storage, and lwork specifies the usable memory length. +// The C interface does not support providing temporary storage. To provide compatibility +// with native, lwork == -1 will not run Dorglq but will instead write the minimum +// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum +// lwork >= m. +// +// Dorgqr will panic if the conditions on input values are not met. +func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + if lwork == -1 { + work[0] = float64(m) + return + } checkMatrix(m, n, a, lda) - if len(tau) < k { - panic(badTau) + if k < 0 { + panic(kLT0) } if k > m { panic(kGTM) } - if k > m { - panic(kGTM) + if m > n { + panic(nLTM) } - clapack.Dorglq(m, n, k, a, lda, tau) -} - -func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64) { - checkMatrix(m, n, a, lda) if len(tau) < k { panic(badTau) } - if len(work) < n { + if len(work) < lwork { + panic(shortWork) + } + if lwork < m { panic(badWork) } + clapack.Dorglq(m, n, k, a, lda, tau) +} + +// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the +// product of elementary reflectors as computed by Dgeqrf. +// Q = H(0) * H(2) * ... * H(k-1) +// Dorgqr is the blocked version of dorg2r that makes greater use of level-3 BLAS +// routines. +// +// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m. +// +// Work is temporary storage, and lwork specifies the usable memory length. +// The C interface does not support providing temporary storage. To provide compatibility +// with native, lwork == -1 will not run Dorgqr but will instead write the minimum +// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum +// lwork >= n. +// +// Dorgqr will panic if the conditions on input values are not met. +func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + if lwork == -1 { + work[0] = float64(n) + return + } + checkMatrix(m, n, a, lda) + if k < 0 { + panic(kLT0) + } if k > n { panic(kGTN) } if n > m { panic(mLTN) } + if len(tau) < k { + panic(badTau) + } + if len(work) < lwork { + panic(shortWork) + } + if lwork < n { + panic(badWork) + } clapack.Dorgqr(m, n, k, a, lda, tau) } diff --git a/cgo/lapack_test.go b/cgo/lapack_test.go index 5934c7e..3f0b116 100644 --- a/cgo/lapack_test.go +++ b/cgo/lapack_test.go @@ -85,18 +85,26 @@ func (d blockedTranslate) Dorml2(side blas.Side, trans blas.Transpose, m, n, k i } func (d blockedTranslate) Dorg2r(m, n, k int, a []float64, lda int, tau, work []float64) { - impl.Dorgqr(m, n, k, a, lda, tau, work) + impl.Dorgqr(m, n, k, a, lda, tau, work, len(work)) } func (d blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) { - impl.Dorglq(m, n, k, a, lda, tau, work) + impl.Dorglq(m, n, k, a, lda, tau, work, len(work)) } func TestDorglq(t *testing.T) { - testlapack.Dorgl2Test(t, blockedTranslate{impl}) + testlapack.DorglqTest(t, blockedTranslate{impl}) } func TestDorgqr(t *testing.T) { + testlapack.DorgqrTest(t, blockedTranslate{impl}) +} + +func TestDorgl2(t *testing.T) { + testlapack.Dorgl2Test(t, blockedTranslate{impl}) +} + +func TestDorg2r(t *testing.T) { testlapack.Dorg2rTest(t, blockedTranslate{impl}) } diff --git a/native/dorg2r.go b/native/dorg2r.go index 7342ddc..1d53f15 100644 --- a/native/dorg2r.go +++ b/native/dorg2r.go @@ -9,12 +9,11 @@ import ( "github.com/gonum/blas/blas64" ) -// Dorg2r generates an m×n matrix Q with orthonormal columns defined by the +// Dorgl2 generates an m×n matrix Q with orthonormal columns defined by the // product of elementary reflectors as computed by Dgeqrf. // Q = H(0) * H(2) * ... * H(k-1) -// The length of tau must be equal to k, and the length of work must be at least n. -// It also must be that 0 <= k <= n. m >= n >= 0. Dorg2r will panic if these -// conditions are not met. +// len(tau) >= k, 0 <= k <= n, 0 <= n <= m, len(work) >= n. +// Dorg2r will panic if these conditions are not met. func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float64, work []float64) { checkMatrix(m, n, a, lda) if len(tau) < k { @@ -29,6 +28,9 @@ func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float if n > m { panic(mLTN) } + if len(work) < n { + panic(badWork) + } if n == 0 { return } diff --git a/native/dorgl2.go b/native/dorgl2.go index e64f403..8bb3f3f 100644 --- a/native/dorgl2.go +++ b/native/dorgl2.go @@ -12,9 +12,8 @@ import ( // Dorgl2 generates an m×n matrix Q with orthonormal rows defined by the // first m rows product of elementary reflectors as computed by Dgelqf. // Q = H(0) * H(2) * ... * H(k-1) -// The length of tau must be equal to k, and the length of work must be at least n. -// It also must be that 0 <= k <= m. n >= m >= 0. Dorgl2 will panic if these -// conditions are not met. +// len(tau) >= k, 0 <= k <= m, 0 <= m <= n, len(work) >= m. +// Dorgl2 will panic if these conditions are not met. func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) { checkMatrix(m, n, a, lda) if len(tau) < k { @@ -26,6 +25,12 @@ func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work [ if k > m { panic(kGTM) } + if m > n { + panic(nLTM) + } + if len(work) < m { + panic(badWork) + } if m == 0 { return } diff --git a/native/dorglq.go b/native/dorglq.go new file mode 100644 index 0000000..c5ac1a9 --- /dev/null +++ b/native/dorglq.go @@ -0,0 +1,111 @@ +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/lapack" +) + +// Dorglq generates an m×n matrix Q with orthonormal columns defined by the +// product of elementary reflectors as computed by Dgelqf. +// Q = H(0) * H(2) * ... * H(k-1) +// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS +// routines. +// +// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m. +// +// Work is temporary storage, and lwork specifies the usable memory length. At minimum, +// lwork >= m, and the amount of blocking is limited by the usable length. +// If lwork == -1, instead of computing Dorglq the optimal work length is stored +// into work[0]. +// +// Dorglq will panic if the conditions on input values are not met. +func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1) + // work is treated as an n×nb matrix + if lwork == -1 { + work[0] = float64(max(1, m) * nb) + return + } + checkMatrix(m, n, a, lda) + if k < 0 { + panic(kLT0) + } + if k > m { + panic(kGTM) + } + if m > n { + panic(nLTM) + } + if len(tau) < k { + panic(badTau) + } + if len(work) < lwork { + panic(shortWork) + } + if lwork < m { + panic(badWork) + } + if m == 0 { + return + } + nbmin := 2 // Minimum number of blocks + var nx int // Minimum number of rows + iws := m // Length of work needed + var ldwork int + if nb > 1 && nb < k { + nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1)) + if nx < k { + ldwork = nb + iws = m * ldwork + if lwork < iws { + nb = lwork / m + ldwork = nb + nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1)) + } + } + } + var ki, kk int + if nb >= nbmin && nb < k && nx < k { + // The first kk rows are handled by the blocked method. + // Note: lapack has nx here, but this means the last nx rows are handled + // serially which could be quite different than nb. + ki = ((k - nb - 1) / nb) * nb + kk = min(k, ki+nb) + for i := kk; i < m; i++ { + for j := 0; j < kk; j++ { + a[i*lda+j] = 0 + } + } + } + if kk < m { + // Perform the operation on colums kk to the end. + impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work) + } + if kk == 0 { + return + } + // Perform the operation on column-blocks + for i := ki; i >= 0; i -= nb { + ib := min(nb, k-i) + if i+ib < m { + impl.Dlarft(lapack.Forward, lapack.RowWise, + n-i, ib, + a[i*lda+i:], lda, + tau[i:], + work, ldwork) + + impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise, + m-i-ib, n-i, ib, + a[i*lda+i:], lda, + work, ldwork, + a[(i+ib)*lda+i:], lda, + work[ib*ldwork:], ldwork) + } + impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work) + for l := i; l < i+ib; l++ { + for j := 0; j < i; j++ { + a[l*lda+j] = 0 + } + } + } +} diff --git a/native/dorgqr.go b/native/dorgqr.go new file mode 100644 index 0000000..a7eb5c7 --- /dev/null +++ b/native/dorgqr.go @@ -0,0 +1,115 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/lapack" +) + +// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the +// product of elementary reflectors as computed by Dgeqrf. +// Q = H(0) * H(2) * ... * H(k-1) +// Dorgqr is the blocked version of dorg2r that makes greater use of level-3 BLAS +// routines. +// +// The length of tau must be equal to k, and the length of work must be at least n. +// It also must be that 0 <= k <= n and 0 <= n <= m. Work is temporary storage, +// and lwork specifies the usable memory length. At minimum, lwork >= n, and the +// amount of blocking is limited by the usable length. If lwork == -1, instead of +// computing Dorgqr the optimal work length is stored into work[0]. +// +// Dorgqr will panic if the conditions on input values are not met. +func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + nb := impl.Ilaenv(1, "DORGQR", " ", m, n, k, -1) + // work is treated as an n×nb matrix + if lwork == -1 { + work[0] = float64(max(1, n) * nb) + return + } + checkMatrix(m, n, a, lda) + if k < 0 { + panic(kLT0) + } + if k > n { + panic(kGTN) + } + if n > m { + panic(mLTN) + } + if len(tau) < k { + panic(badTau) + } + if len(work) < lwork { + panic(shortWork) + } + if lwork < n { + panic(badWork) + } + if n == 0 { + return + } + nbmin := 2 // Minimum number of blocks + var nx int // Minimum number of rows + iws := n // Length of work needed + var ldwork int + if nb > 1 && nb < k { + nx = max(0, impl.Ilaenv(3, "DORGQR", " ", m, n, k, -1)) + if nx < k { + ldwork = nb + iws = n * ldwork + if lwork < iws { + nb = lwork / n + ldwork = nb + nbmin = max(2, impl.Ilaenv(2, "DORGQR", " ", m, n, k, -1)) + } + } + } + var ki, kk int + if nb >= nbmin && nb < k && nx < k { + // The first kk columns are handled by the blocked method. + // Note: lapack has nx here, but this means the last nx rows are handled + // serially which could be quite different than nb. + ki = ((k - nb - 1) / nb) * nb + kk = min(k, ki+nb) + for j := kk; j < n; j++ { + for i := 0; i < kk; i++ { + a[i*lda+j] = 0 + } + } + } + if kk < n { + // Perform the operation on colums kk to the end. + impl.Dorg2r(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work) + } + if kk == 0 { + return + } + // Perform the operation on column-blocks + for i := ki; i >= 0; i -= nb { + ib := min(nb, k-i) + if i+ib < n { + impl.Dlarft(lapack.Forward, lapack.ColumnWise, + m-i, ib, + a[i*lda+i:], lda, + tau[i:], + work, ldwork) + + impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Forward, lapack.ColumnWise, + m-i, n-i-ib, ib, + a[i*lda+i:], lda, + work, ldwork, + a[i*lda+i+ib:], lda, + work[ib*ldwork:], ldwork) + } + impl.Dorg2r(m-i, ib, ib, a[i*lda+i:], lda, tau[i:], work) + // Set rows 0:i-1 of current block to zero + for j := i; j < i+ib; j++ { + for l := 0; l < i; l++ { + a[l*lda+j] = 0 + } + } + } +} diff --git a/native/general.go b/native/general.go index d2bb83a..2fddeb9 100644 --- a/native/general.go +++ b/native/general.go @@ -36,9 +36,11 @@ const ( badWorkStride = "lapack: insufficient working array stride" kGTM = "lapack: k > m" kGTN = "lapack: k > n" + kLT0 = "lapack: k < 0" mLTN = "lapack: m < n" negDimension = "lapack: negative matrix dimension" nLT0 = "lapack: n < 0" + nLTM = "lapack: n < m" shortWork = "lapack: working array shorter than declared" ) diff --git a/native/lapack_test.go b/native/lapack_test.go index 929179e..c17255a 100644 --- a/native/lapack_test.go +++ b/native/lapack_test.go @@ -104,6 +104,22 @@ func TestDlasv2(t *testing.T) { testlapack.Dlasv2Test(t, impl) } +func TestDorg2r(t *testing.T) { + testlapack.Dorg2rTest(t, impl) +} + +func TestDorgl2(t *testing.T) { + testlapack.Dorgl2Test(t, impl) +} + +func TestDorglq(t *testing.T) { + testlapack.DorglqTest(t, impl) +} + +func TestDorgqr(t *testing.T) { + testlapack.DorgqrTest(t, impl) +} + func TestDorml2(t *testing.T) { testlapack.Dorml2Test(t, impl) } @@ -120,14 +136,6 @@ func TestDorm2r(t *testing.T) { testlapack.Dorm2rTest(t, impl) } -func TestDorg2r(t *testing.T) { - testlapack.Dorg2rTest(t, impl) -} - -func TestDorgl2(t *testing.T) { - testlapack.Dorgl2Test(t, impl) -} - func TestDpocon(t *testing.T) { testlapack.DpoconTest(t, impl) } diff --git a/testlapack/dorg2r.go b/testlapack/dorg2r.go index a7a951f..4c17715 100644 --- a/testlapack/dorg2r.go +++ b/testlapack/dorg2r.go @@ -19,13 +19,17 @@ type Dorg2rer interface { func Dorg2rTest(t *testing.T, impl Dorg2rer) { for _, test := range []struct { - m, n, lda int + m, n, k, lda int }{ - {3, 3, 0}, - {4, 3, 0}, + {3, 3, 0, 0}, + {4, 3, 0, 0}, + {3, 3, 2, 0}, + {4, 3, 2, 0}, - {5, 5, 20}, - {10, 5, 20}, + {5, 5, 0, 20}, + {5, 5, 3, 20}, + {10, 5, 0, 20}, + {10, 5, 2, 20}, } { m := test.m n := test.n @@ -44,7 +48,11 @@ func Dorg2rTest(t *testing.T, impl Dorg2rer) { work = make([]float64, int(work[0])) impl.Dgeqrf(m, n, a, lda, tau, work, len(work)) - q := constructQ("QR", m, n, a, lda, tau) + k = test.k + if k == 0 { + k = n + } + q := constructQK("QR", m, n, k, a, lda, tau) impl.Dorg2r(m, n, k, a, lda, tau, work) diff --git a/testlapack/dorglq.go b/testlapack/dorglq.go new file mode 100644 index 0000000..5f2eb11 --- /dev/null +++ b/testlapack/dorglq.go @@ -0,0 +1,82 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package testlapack + +import ( + "math" + "math/rand" + "testing" + + "github.com/gonum/floats" +) + +type Dorglqer interface { + Dorgl2er + Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) +} + +func DorglqTest(t *testing.T, impl Dorglqer) { + // TODO(btracey): Base tests off of nb and nx. + for _, test := range []struct{ m, n, k, lda int }{ + {10, 10, 10, 0}, + {10, 10, 10, 20}, + {10, 30, 10, 0}, + {20, 30, 10, 0}, + + {100, 100, 100, 0}, + {100, 100, 50, 0}, + {100, 130, 100, 0}, + {100, 130, 50, 0}, + {100, 100, 100, 150}, + {100, 100, 50, 150}, + {100, 130, 100, 150}, + {100, 130, 50, 150}, + + {200, 200, 200, 0}, + {200, 200, 150, 0}, + {200, 230, 200, 0}, + {200, 230, 150, 0}, + {200, 200, 200, 250}, + {200, 200, 150, 250}, + {200, 230, 200, 250}, + {200, 230, 150, 250}, + } { + m := test.m + n := test.n + k := test.k + lda := test.lda + if lda == 0 { + lda = n + } + a := make([]float64, m*lda) + for i := range a { + a[i] = rand.Float64() + } + work := make([]float64, 1) + tau := make([]float64, m) + for i := range tau { + tau[i] = math.NaN() + } + // Compute LQ factorization. + impl.Dgelqf(m, n, a, lda, tau, work, -1) + work = make([]float64, int(work[0])) + impl.Dgelqf(m, n, a, lda, tau, work, len(work)) + + aUnblocked := make([]float64, len(a)) + copy(aUnblocked, a) + for i := range work { + work[i] = math.NaN() + } + impl.Dorgl2(m, n, k, aUnblocked, lda, tau, work) + // make sure work isn't used before initialized + for i := range work { + work[i] = math.NaN() + } + impl.Dorglq(m, n, k, a, lda, tau, work, len(work)) + if !floats.EqualApprox(a, aUnblocked, 1e-10) { + t.Errorf("Q Mismatch. m = %d, n = %d, k = %d, lda = %d", m, n, k, lda) + } + } +} diff --git a/testlapack/dorgqr.go b/testlapack/dorgqr.go new file mode 100644 index 0000000..9986277 --- /dev/null +++ b/testlapack/dorgqr.go @@ -0,0 +1,82 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package testlapack + +import ( + "math" + "math/rand" + "testing" + + "github.com/gonum/floats" +) + +type Dorgqrer interface { + Dorg2rer + Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) +} + +func DorgqrTest(t *testing.T, impl Dorgqrer) { + // TODO(btracey): Base tests off of nb and nx. + for _, test := range []struct{ m, n, k, lda int }{ + {10, 10, 10, 0}, + {10, 10, 10, 20}, + {30, 10, 10, 0}, + {30, 20, 10, 0}, + + {100, 100, 100, 0}, + {100, 100, 50, 0}, + {130, 100, 100, 0}, + {130, 100, 50, 0}, + {100, 100, 100, 150}, + {100, 100, 50, 150}, + {130, 100, 100, 150}, + {130, 100, 50, 150}, + + {200, 200, 200, 0}, + {200, 200, 150, 0}, + {230, 200, 200, 0}, + {230, 200, 150, 0}, + {200, 200, 200, 250}, + {200, 200, 150, 250}, + {230, 200, 200, 250}, + {230, 200, 150, 250}, + } { + m := test.m + n := test.n + k := test.k + lda := test.lda + if lda == 0 { + lda = n + } + a := make([]float64, m*lda) + for i := range a { + a[i] = rand.Float64() + } + work := make([]float64, 1) + tau := make([]float64, n) + for i := range tau { + tau[i] = math.NaN() + } + // Compute QR factorization. + impl.Dgeqrf(m, n, a, lda, tau, work, -1) + work = make([]float64, int(work[0])) + impl.Dgeqrf(m, n, a, lda, tau, work, len(work)) + + aUnblocked := make([]float64, len(a)) + copy(aUnblocked, a) + for i := range work { + work[i] = math.NaN() + } + impl.Dorg2r(m, n, k, aUnblocked, lda, tau, work) + // make sure work isn't used before initialized + for i := range work { + work[i] = math.NaN() + } + impl.Dorgqr(m, n, k, a, lda, tau, work, len(work)) + if !floats.EqualApprox(a, aUnblocked, 1e-10) { + t.Errorf("Q Mismatch. m = %d, n = %d, k = %d, lda = %d", m, n, k, lda) + } + } +} diff --git a/testlapack/general.go b/testlapack/general.go index 46b21aa..a229ec4 100644 --- a/testlapack/general.go +++ b/testlapack/general.go @@ -226,9 +226,15 @@ func constructH(tau []float64, v blas64.General, store lapack.StoreV, direct lap return h } -// constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2 +// constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2. func constructQ(kind string, m, n int, a []float64, lda int, tau []float64) blas64.General { k := min(m, n) + return constructQK(kind, m, n, k, a, lda, tau) +} + +// constructQK constructs the Q matrix from the result of dgeqrf and dgeqr2 using +// the first k reflectors. +func constructQK(kind string, m, n, k int, a []float64, lda int, tau []float64) blas64.General { var sz int switch kind { case "QR":