gonum · btracey · Jul 15, 2015 · Jul 3, 2015 · vladimir-ch · Jul 15, 2015
diff --git a/lapack.go b/lapack.go
@@ -10,23 +10,51 @@ const None = 'N'
 
 type Job byte
 
-const (
-	All       (Job) = 'A'
-	Slim      (Job) = 'S'
-	Overwrite (Job) = 'O'
-)
-
+// CompSV determines if the singular values are to be computed in compact form.
 type CompSV byte
 
 const (
-	Compact  (CompSV) = 'P'
-	Explicit (CompSV) = 'I'
+	Compact  CompSV = 'P'
+	Explicit CompSV = 'I'
 )
 
-// Float64 defines the float64 interface for the Lapack function. This interface
-// contains the functions needed in the gonum suite.
+// Complex128 defines the public complex128 LAPACK API supported by gonum/lapack.
+type Complex128 interface{}
+
+// Float64 defines the public float64 LAPACK API supported by gonum/lapack.
 type Float64 interface {
 	Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok bool)
 }
 
-type Complex128 interface{}
+// Direct specifies the direction of the multiplication for the Householder matrix.
+type Direct byte
+
+const (
+	Forward  Direct = 'F' // Reflectors are right-multiplied, H_1 * H_2 * ... * H_k
+	Backward Direct = 'B' // Reflectors are left-multiplied, H_k * ... * H_2 * H_1
+)
+
+// StoreV indicates the storage direction of elementary reflectors.
+type StoreV byte
+
+const (
+	ColumnWise StoreV = 'C' // Reflector stored in a column of the matrix.
+	RowWise    StoreV = 'R' // Reflector stored in a row of the matrix.
+)
+
+// MatrixNorm represents the kind of matrix norm to compute.
+type MatrixNorm byte
+
+const (
+	MaxAbs       MatrixNorm = 'M' // max(abs(A(i,j)))  ('M')
+	MaxColumnSum MatrixNorm = 'O' // Maximum column sum (one norm) ('1', 'O')
+	MaxRowSum    MatrixNorm = 'I' // Maximum row sum (infinity norm) ('I', 'i')
+	NormFrob     MatrixNorm = 'F' // Frobenium norm (sqrt of sum of squares) ('F', 'f', E, 'e')
+)
+
+// MatrixType represents the kind of matrix represented in the data.
+type MatrixType byte
+
+const (
+	General MatrixType = 'G' // A dense matrix (like blas64.General).
+)
diff --git a/native/dgelq2.go b/native/dgelq2.go
@@ -0,0 +1,44 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "github.com/gonum/blas"
+
+// Dgelq2 computes the LQ factorization of the m×n matrix a.
+//
+// During Dgelq2, a is modified to contain the information to construct Q and L.
+// The lower triangle of a contains the matrix L. The upper triangular elements
+// (not including the diagonal) contain the elementary reflectors. Tau is modified
+// to contain the reflector scales. Tau must have length of at least k = min(m,n)
+// and this function will panic otherwise.
+//
+// See Dgeqr2 for a description of the elementary reflectors and orthonormal
+// matrix Q. Q is constructed as a product of these elementary reflectors,
+// Q = H_k ... H_2*H_1.
+//
+// Work is temporary storage of length at least m and this function will panic otherwise.
+func (impl Implementation) Dgelq2(m, n int, a []float64, lda int, tau, work []float64) {
+	checkMatrix(m, n, a, lda)
+	k := min(m, n)
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if len(work) < m {
+		panic(badWork)
+	}
+	for i := 0; i < k; i++ {
+		a[i*lda+i], tau[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1)
+		if i < m-1 {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(blas.Right, m-i-1, n-i,
+				a[i*lda+i:], 1,
+				tau[i],
+				a[(i+1)*lda+i:], lda,
+				work)
+			a[i*lda+i] = aii
+		}
+	}
+}
diff --git a/native/dgelqf.go b/native/dgelqf.go
@@ -0,0 +1,84 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dgelqf computes the LQ factorization of the m×n matrix a using a blocked
+// algorithm. Please see the documentation for Dgelq2 for a description of the
+// parameters at entry and exit.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= m, and this function will panic otherwise.
+// Dgelqf is a blocked LQ factorization, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dgelqf,
+// the optimal work length will be stored into work[0].
+//
+// tau must have length at least min(m,n), and this function will panic otherwise.
+func (impl Implementation) Dgelqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
+	nb := impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1)
+	lworkopt := m * max(nb, 1)
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+	checkMatrix(m, n, a, lda)
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < m {
+		panic(badWork)
+	}
+	k := min(m, n)
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if k == 0 {
+		return
+	}
+	// Find the optimal blocking size based on the size of available memory
+	// and optimal machine parameters.
+	nbmin := 2
+	var nx int
+	iws := m
+	ldwork := nb
+	if nb > 1 && k > nb {
+		nx = max(0, impl.Ilaenv(3, "DGELQF", " ", m, n, -1, -1))
+		if nx < k {
+			iws = m * nb
+			if lwork < iws {
+				nb = lwork / m
+				nbmin = max(2, impl.Ilaenv(2, "DGELQF", " ", m, n, -1, -1))
+			}
+		}
+	}
+	// Computed blocked LQ factorization.
+	var i int
+	if nb >= nbmin && nb < k && nx < k {
+		for i = 0; i < k-nx; i += nb {
+			ib := min(k-i, nb)
+			impl.Dgelq2(ib, n-i, a[i*lda+i:], lda, tau[i:], work)
+			if i+ib < m {
+				impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
+					a[i*lda+i:], lda,
+					tau[i:],
+					work, ldwork)
+				impl.Dlarfb(blas.Right, blas.NoTrans, lapack.Forward, lapack.RowWise,
+					m-i-ib, n-i, ib,
+					a[i*lda+i:], lda,
+					work, ldwork,
+					a[(i+ib)*lda+i:], lda,
+					work[ib*ldwork:], ldwork)
+			}
+		}
+	}
+	// Perform unblocked LQ factorization on the remainder.
+	if i < k {
+		impl.Dgelq2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work)
+	}
+}
diff --git a/native/dgels.go b/native/dgels.go
@@ -0,0 +1,200 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dgels finds a minimum-norm solution based on the matrices a and b using the
+// QR or LQ factorization. Dgels returns false if the matrix
+// A is singular, and true if this solution was successfully found.
+//
+// The minimization problem solved depends on the input parameters.
+//
+//  1. If m >= n and trans == blas.NoTrans, Dgels finds X such that || A*X - B||_2
+//  is minimized.
+//  2. If m < n and trans == blas.NoTrans, Dgels finds the minimum norm solution of
+//  A * X = B.
+//  3. If m >= n and trans == blas.Trans, Dgels finds the minimum norm solution of
+//  A^T * X = B.
+//  4. If m < n and trans == blas.Trans, Dgels finds X such that || A*X - B||_2
+//  is minimized.
+// Note that the least-squares solutions (cases 1 and 3) perform the minimization
+// per column of B. This is not the same as finding the minimum-norm matrix.
+//
+// The matrix a is a general matrix of size m×n and is modified during this call.
+// The input matrix b is of size max(m,n)×nrhs, and serves two purposes. On entry,
+// the elements of b specify the input matrix B. B has size m×nrhs if
+// trans == blas.NoTrans, and n×nrhs if trans == blas.Trans. On exit, the
+// leading submatrix of b contains the solution vectors X. If trans == blas.NoTrans,
+// this submatrix is of size n×nrhs, and of size m×nrhs otherwise.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= max(m,n) + max(m,n,nrhs), and this function will panic
+// otherwise. A longer work will enable blocked algorithms to be called.
+// In the special case that lwork == -1, work[0] will be set to the optimal working
+// length.
+func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float64, lda int, b []float64, ldb int, work []float64, lwork int) bool {
+	notran := trans == blas.NoTrans
+	checkMatrix(m, n, a, lda)
+	mn := min(m, n)
+	checkMatrix(mn, nrhs, b, ldb)
+
+	// Find optimal block size.
+	tpsd := true
+	if notran {
+		tpsd = false
+	}
+	var nb int
+	if m >= n {
+		nb = impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1)
+		if tpsd {
+			nb = max(nb, impl.Ilaenv(1, "DORMQR", "LN", m, nrhs, n, -1))
+		} else {
+			nb = max(nb, impl.Ilaenv(1, "DORMQR", "LT", m, nrhs, n, -1))
+		}
+	} else {
+		nb = impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1)
+		if tpsd {
+			nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LT", n, nrhs, m, -1))
+		} else {
+			nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LN", n, nrhs, m, -1))
+		}
+	}
+	if lwork == -1 {
+		work[0] = float64(max(1, mn+max(mn, nrhs)*nb))
+		return true
+	}
+
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < mn+max(mn, nrhs) {
+		panic(badWork)
+	}
+	if m == 0 || n == 0 || nrhs == 0 {
+		impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb)
+		return true
+	}
+
+	// Scale the input matrices if they contain extreme values.
+	smlnum := dlamchS / dlamchP
+	bignum := 1 / smlnum
+	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
+	var iascl int
+	if anrm > 0 && anrm < smlnum {
+		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
+		iascl = 1
+	} else if anrm > bignum {
+		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
+	} else if anrm == 0 {
+		// Matrix all zeros
+		impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb)
+		return true
+	}
+	brow := m
+	if tpsd {
+		brow = n
+	}
+	bnrm := impl.Dlange(lapack.MaxAbs, brow, nrhs, b, ldb, nil)
+	ibscl := 0
+	if bnrm > 0 && bnrm < smlnum {
+		impl.Dlascl(lapack.General, 0, 0, bnrm, smlnum, brow, nrhs, b, ldb)
+		ibscl = 1
+	} else if bnrm > bignum {
+		impl.Dlascl(lapack.General, 0, 0, bnrm, bignum, brow, nrhs, b, ldb)
+		ibscl = 2
+	}
+
+	// Solve the minimization problem using a QR or an LQ decomposition.
+	var scllen int
+	if m >= n {
+		impl.Dgeqrf(m, n, a, lda, work, work[mn:], lwork-mn)
+		if !tpsd {
+			impl.Dormqr(blas.Left, blas.Trans, m, nrhs, n,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			ok := impl.Dtrtrs(blas.Upper, blas.NoTrans, blas.NonUnit, n, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			scllen = n
+		} else {
+			ok := impl.Dtrtrs(blas.Upper, blas.Trans, blas.NonUnit, n, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			for i := n; i < m; i++ {
+				for j := 0; j < nrhs; j++ {
+					b[i*ldb+j] = 0
+				}
+			}
+			impl.Dormqr(blas.Left, blas.NoTrans, m, nrhs, n,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			scllen = m
+		}
+	} else {
+		impl.Dgelqf(m, n, a, lda, work, work[mn:], lwork-mn)
+		if !tpsd {
+			ok := impl.Dtrtrs(blas.Lower, blas.NoTrans, blas.NonUnit,
+				m, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			for i := m; i < n; i++ {
+				for j := 0; j < nrhs; j++ {
+					b[i*ldb+j] = 0
+				}
+			}
+			impl.Dormlq(blas.Left, blas.Trans, n, nrhs, m,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			scllen = n
+		} else {
+			impl.Dormlq(blas.Left, blas.NoTrans, n, nrhs, m,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			ok := impl.Dtrtrs(blas.Lower, blas.Trans, blas.NonUnit,
+				m, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+		}
+	}
+
+	// Adjust answer vector based on scaling.
+	if iascl == 1 {
+		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, scllen, nrhs, b, ldb)
+	}
+	if iascl == 2 {
+		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, scllen, nrhs, b, ldb)
+	}
+	if ibscl == 1 {
+		impl.Dlascl(lapack.General, 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb)
+	}
+	if ibscl == 2 {
+		impl.Dlascl(lapack.General, 0, 0, bignum, bnrm, scllen, nrhs, b, ldb)
+	}
+	return true
+}