native: change Dlaqr2 into Dlaqr23

gonum · Aug 29, 2016 · ebab9ed · ebab9ed
1 parent 595f628
commit ebab9ed
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 50 deletions.
diff --git a/native/dlaqr2.go → native/dlaqr23.go b/native/dlaqr2.go → native/dlaqr23.go
@@ -12,7 +12,7 @@ import (
 	"github.com/gonum/lapack"
 )
 
-// Dlaqr2 performs the orthogonal similarity transformation of an n×n upper
+// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
 // Hessenberg matrix to detect and deflate fully converged eigenvalues from a
 // trailing principal submatrix using aggressive early deflation [1].
 //
@@ -34,28 +34,31 @@ import (
 // and the block must be isolated, that is, it must hold that
 //  ktop == 0   or H[ktop,ktop-1] == 0,
 //  kbot == n-1 or H[kbot+1,kbot] == 0,
-// otherwise Dlaqr2 will panic.
+// otherwise Dlaqr23 will panic.
 //
 // nw is the deflation window size. It must hold that
 //  0 <= nw <= kbot-ktop+1,
-// otherwise Dlaqr2 will panic.
+// otherwise Dlaqr23 will panic.
 //
 // iloz and ihiz specify the rows of the n×n matrix Z to which transformations
 // will be applied if wantz is true. It must hold that
 //  0 <= iloz <= ktop,  and  kbot <= ihiz < n,
-// otherwise Dlaqr2 will panic.
+// otherwise Dlaqr23 will panic.
 //
-// sr and si must have length kbot+1, otherwise Dlaqr2 will panic.
+// sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
 //
 // v and ldv represent an nw×nw work matrix.
 // t and ldt represent an nw×nh work matrix, and nh must be at least nw.
 // wv and ldwv represent an nv×nw work matrix.
 //
 // work must have length at least lwork and lwork must be at least max(1,2*nw),
-// otherwise Dlaqr2 will panic. Larger values of lwork may result in greater
+// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
 // efficiency. On return, work[0] will contain the optimal value of lwork.
 //
-// If lwork is -1, instead of performing Dlaqr2, the function only estimates the
+// recurse is the non-negative recursion depth. For recurse > 0, Dlaqr23 behaves
+// as DLAQR3, for recurse == 0 it behaves as DLAQR2.
+//
+// If lwork is -1, instead of performing Dlaqr23, the function only estimates the
 // optimal workspace size and stores it into work[0]. Neither h nor z are
 // accessed.
 //
@@ -75,7 +78,7 @@ import (
 //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
 //      URL: http://dx.doi.org/10.1137/S0895479801384585
 //
-func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int) {
+func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recurse int) (ns, nd int) {
 	switch {
 	case ktop < 0 || max(0, n-1) < ktop:
 		panic("lapack: invalid value of ktop")
@@ -98,6 +101,9 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
 			panic("lapack: invalid value of ihiz")
 		}
 	}
+	if recurse < 0 {
+		panic("lapack: invalid value of recurse")
+	}
 
 	// LAPACK code does not enforce the documented behavior
 	//  nw <= kbot-ktop+1
@@ -106,13 +112,21 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
 	lwkopt := max(1, 2*nw)
 	if jw > 2 {
 		// Workspace query call to Dgehrd.
-		impl.Dgehrd(jw, 0, jw-2, t, ldt, nil, work, -1)
+		impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
 		lwk1 := int(work[0])
 		// Workspace query call to Dormhr.
-		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
+		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
 		lwk2 := int(work[0])
-		// Optimal workspace.
-		lwkopt = jw + max(lwk1, lwk2)
+		if recurse > 0 {
+			// Workspace query call to Dlaqr4.
+			impl.Dlaqr4(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1)
+			lwk3 := int(work[0])
+			// Optimal workspace.
+			lwkopt = max(jw+max(lwk1, lwk2), lwk3)
+		} else {
+			// Optimal workspace.
+			lwkopt = jw + max(lwk1, lwk2)
+		}
 	}
 
 	// Quick return in case of workspace query.
@@ -179,7 +193,13 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
 	bi := blas64.Implementation()
 	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
 	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
-	infqr := impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
+	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
+	var infqr int
+	if recurse > 0 && jw > nmin {
+		infqr = impl.Dlaqr4(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork)
+	} else {
+		infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
+	}
 	// Note that ilo == 0 which conveniently coincides with the success
 	// value of infqr, that is, infqr as an index always points to the first
 	// converged eigenvalue.

diff --git a/native/dlaqr4.go b/native/dlaqr4.go
@@ -128,7 +128,7 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
 		panic(badWork)
 	// TODO(vladimir-ch): Enable if and when we figure out what the minimum
 	// necessary lwork value is. Dlaqr4 says that the minimum is n which
-	// clashes with Dlaqr2's opinion about optimal work when nw <= 2
+	// clashes with Dlaqr23's opinion about optimal work when nw <= 2
 	// (independent of n).
 	// case lwork < n && n > ntiny && lwork != -1:
 	// 	panic(badWork)
@@ -205,10 +205,10 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
 	nsr = min(nsr, min((n+6)/9, ihi-ilo))
 	nsr = max(2, nsr&^1)
 
-	// Workspace query call to Dlaqr2.
-	impl.Dlaqr2(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
-		nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1)
-	// Optimal workspace is max(Dlaqr5, Dlaqr2).
+	// Workspace query call to Dlaqr23.
+	impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
+		nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, 0)
+	// Optimal workspace is max(Dlaqr5, Dlaqr23).
 	lwkopt := max(3*nsr/2, int(work[0]))
 	// Quick return in case of workspace query.
 	if lwork == -1 {
@@ -313,9 +313,9 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
 		kwv := nw + 1
 		nhv := n - kwv - kt
 		// Aggressive early deflation.
-		ls, ld := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw,
+		ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
 			h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
-			h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork)
+			h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, 0)
 
 		// Adjust kbot accounting for new deflations.
 		kbot -= ld
@@ -333,13 +333,13 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
 		}
 
 		// ns is the nominal number of simultaneous shifts. This may be
-		// lowered (slightly) if Dlaqr2 did not provide that many
+		// lowered (slightly) if Dlaqr23 did not provide that many
 		// shifts.
 		ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
 
 		// If there have been no deflations in a multiple of kexsh
 		// iterations, then try exceptional shifts. Otherwise use shifts
-		// provided by Dlaqr2 above or from the eigenvalues of a
+		// provided by Dlaqr23 above or from the eigenvalues of a
 		// trailing principal submatrix.
 		if ndfl%kexsh == 0 {
 			ks = kbot - ns + 1

diff --git a/native/lapack_test.go b/native/lapack_test.go
@@ -144,8 +144,8 @@ func TestDlaqr1(t *testing.T) {
 	testlapack.Dlaqr1Test(t, impl)
 }
 
-func TestDlaqr2(t *testing.T) {
-	testlapack.Dlaqr2Test(t, impl)
+func TestDlaqr23(t *testing.T) {
+	testlapack.Dlaqr23Test(t, impl)
 }
 
 func TestDlaqr4(t *testing.T) {

diff --git a/testlapack/dlaqr2.go → testlapack/dlaqr23.go b/testlapack/dlaqr2.go → testlapack/dlaqr23.go
@@ -13,11 +13,11 @@ import (
 	"github.com/gonum/blas/blas64"
 )
 
-type Dlaqr2er interface {
-	Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int)
+type Dlaqr23er interface {
+	Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recurse int) (ns, nd int)
 }
 
-type dlaqr2Test struct {
+type dlaqr23Test struct {
 	wantt, wantz bool
 	ktop, kbot   int
 	nw           int
@@ -27,20 +27,23 @@ type dlaqr2Test struct {
 	evWant []complex128 // Optional slice with known eigenvalues.
 }
 
-func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
+func Dlaqr23Test(t *testing.T, impl Dlaqr23er) {
 	rnd := rand.New(rand.NewSource(1))
 
 	for _, wantt := range []bool{true, false} {
 		for _, wantz := range []bool{true, false} {
-			for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 53} {
-				for _, extra := range []int{0, 1, 11} {
-					for cas := 0; cas < 100; cas++ {
-						ktop := rnd.Intn(n)
-						kbot := rnd.Intn(n)
-						if ktop > kbot {
-							ktop, kbot = kbot, ktop
+			for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 100} {
+				for _, extra := range []int{0, 11} {
+					for cas := 0; cas < 30; cas++ {
+						var nw int
+						if nw <= 75 {
+							nw = rnd.Intn(n) + 1
+						} else {
+							nw = 76 + rnd.Intn(n-75)
 						}
-						nw := rnd.Intn(kbot-ktop+1) + 1
+						ktop := rnd.Intn(n - nw + 1)
+						kbot := ktop + nw - 1
+						kbot += rnd.Intn(n - kbot)
 						h := randomHessenberg(n, n+extra, rnd)
 						if ktop-1 >= 0 {
 							h.Data[ktop*h.Stride+ktop-1] = 0
@@ -50,7 +53,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
 						}
 						iloz := rnd.Intn(ktop + 1)
 						ihiz := kbot + rnd.Intn(n-kbot)
-						test := dlaqr2Test{
+						test := dlaqr23Test{
 							wantt: wantt,
 							wantz: wantz,
 							ktop:  ktop,
@@ -60,8 +63,10 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
 							iloz:  iloz,
 							ihiz:  ihiz,
 						}
-						testDlaqr2(t, impl, test, false, rnd)
-						testDlaqr2(t, impl, test, true, rnd)
+						testDlaqr23(t, impl, test, false, 1, rnd)
+						testDlaqr23(t, impl, test, true, 1, rnd)
+						testDlaqr23(t, impl, test, false, 0, rnd)
+						testDlaqr23(t, impl, test, true, 0, rnd)
 					}
 				}
 			}
@@ -72,7 +77,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
 	for _, wantt := range []bool{true, false} {
 		for _, wantz := range []bool{true, false} {
 			for _, extra := range []int{0, 1, 11} {
-				test := dlaqr2Test{
+				test := dlaqr23Test{
 					wantt: wantt,
 					wantz: wantz,
 					h:     randomHessenberg(0, extra, rnd),
@@ -82,14 +87,16 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
 					ihiz:  -1,
 					nw:    0,
 				}
-				testDlaqr2(t, impl, test, true, rnd)
-				testDlaqr2(t, impl, test, false, rnd)
+				testDlaqr23(t, impl, test, true, 1, rnd)
+				testDlaqr23(t, impl, test, false, 1, rnd)
+				testDlaqr23(t, impl, test, true, 0, rnd)
+				testDlaqr23(t, impl, test, false, 0, rnd)
 			}
 		}
 	}
 
 	// Tests with explicit eigenvalues computed by Octave.
-	for _, test := range []dlaqr2Test{
+	for _, test := range []dlaqr23Test{
 		{
 			h: blas64.General{
 				Rows:   1,
@@ -201,12 +208,14 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
 		test.wantt = true
 		test.wantz = true
 		test.nw = test.kbot - test.ktop + 1
-		testDlaqr2(t, impl, test, true, rnd)
-		testDlaqr2(t, impl, test, false, rnd)
+		testDlaqr23(t, impl, test, true, 1, rnd)
+		testDlaqr23(t, impl, test, false, 1, rnd)
+		testDlaqr23(t, impl, test, true, 0, rnd)
+		testDlaqr23(t, impl, test, false, 0, rnd)
 	}
 }
 
-func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *rand.Rand) {
+func testDlaqr23(t *testing.T, impl Dlaqr23er, test dlaqr23Test, opt bool, recurse int, rnd *rand.Rand) {
 	const tol = 1e-14
 
 	h := cloneGeneral(test.h)
@@ -244,15 +253,15 @@ func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *ran
 	var work []float64
 	if opt {
 		work = nanSlice(1)
-		impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride,
-			nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1)
+		impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride,
+			nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1, recurse)
 		work = nanSlice(int(work[0]))
 	} else {
 		work = nanSlice(max(1, 2*nw))
 	}
 
-	ns, nd := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride,
-		sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work))
+	ns, nd := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride,
+		sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work), recurse)
 
 	prefix := fmt.Sprintf("Case wantt=%v, wantz=%v, n=%v, ktop=%v, kbot=%v, nw=%v, iloz=%v, ihiz=%v, extra=%v",
 		wantt, wantz, n, ktop, kbot, nw, iloz, ihiz, extra)