Skip to content
This repository has been archived by the owner on Nov 24, 2018. It is now read-only.

Commit

Permalink
native: change Dlaqr2 into Dlaqr23
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-ch committed Aug 29, 2016
1 parent 595f628 commit ebab9ed
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 50 deletions.
46 changes: 33 additions & 13 deletions native/dlaqr2.go → native/dlaqr23.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"github.com/gonum/lapack"
)

// Dlaqr2 performs the orthogonal similarity transformation of an n×n upper
// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
// trailing principal submatrix using aggressive early deflation [1].
//
Expand All @@ -34,28 +34,31 @@ import (
// and the block must be isolated, that is, it must hold that
// ktop == 0 or H[ktop,ktop-1] == 0,
// kbot == n-1 or H[kbot+1,kbot] == 0,
// otherwise Dlaqr2 will panic.
// otherwise Dlaqr23 will panic.
//
// nw is the deflation window size. It must hold that
// 0 <= nw <= kbot-ktop+1,
// otherwise Dlaqr2 will panic.
// otherwise Dlaqr23 will panic.
//
// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
// will be applied if wantz is true. It must hold that
// 0 <= iloz <= ktop, and kbot <= ihiz < n,
// otherwise Dlaqr2 will panic.
// otherwise Dlaqr23 will panic.
//
// sr and si must have length kbot+1, otherwise Dlaqr2 will panic.
// sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
//
// v and ldv represent an nw×nw work matrix.
// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
// wv and ldwv represent an nv×nw work matrix.
//
// work must have length at least lwork and lwork must be at least max(1,2*nw),
// otherwise Dlaqr2 will panic. Larger values of lwork may result in greater
// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
// efficiency. On return, work[0] will contain the optimal value of lwork.
//
// If lwork is -1, instead of performing Dlaqr2, the function only estimates the
// recurse is the non-negative recursion depth. For recurse > 0, Dlaqr23 behaves
// as DLAQR3, for recurse == 0 it behaves as DLAQR2.
//
// If lwork is -1, instead of performing Dlaqr23, the function only estimates the
// optimal workspace size and stores it into work[0]. Neither h nor z are
// accessed.
//
Expand All @@ -75,7 +78,7 @@ import (
// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
// URL: http://dx.doi.org/10.1137/S0895479801384585
//
func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int) {
func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recurse int) (ns, nd int) {
switch {
case ktop < 0 || max(0, n-1) < ktop:
panic("lapack: invalid value of ktop")
Expand All @@ -98,6 +101,9 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
panic("lapack: invalid value of ihiz")
}
}
if recurse < 0 {
panic("lapack: invalid value of recurse")
}

// LAPACK code does not enforce the documented behavior
// nw <= kbot-ktop+1
Expand All @@ -106,13 +112,21 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
lwkopt := max(1, 2*nw)
if jw > 2 {
// Workspace query call to Dgehrd.
impl.Dgehrd(jw, 0, jw-2, t, ldt, nil, work, -1)
impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
lwk1 := int(work[0])
// Workspace query call to Dormhr.
impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
lwk2 := int(work[0])
// Optimal workspace.
lwkopt = jw + max(lwk1, lwk2)
if recurse > 0 {
// Workspace query call to Dlaqr4.
impl.Dlaqr4(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1)
lwk3 := int(work[0])
// Optimal workspace.
lwkopt = max(jw+max(lwk1, lwk2), lwk3)
} else {
// Optimal workspace.
lwkopt = jw + max(lwk1, lwk2)
}
}

// Quick return in case of workspace query.
Expand Down Expand Up @@ -179,7 +193,13 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
bi := blas64.Implementation()
bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
infqr := impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
var infqr int
if recurse > 0 && jw > nmin {
infqr = impl.Dlaqr4(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork)
} else {
infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
}
// Note that ilo == 0 which conveniently coincides with the success
// value of infqr, that is, infqr as an index always points to the first
// converged eigenvalue.
Expand Down
18 changes: 9 additions & 9 deletions native/dlaqr4.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
panic(badWork)
// TODO(vladimir-ch): Enable if and when we figure out what the minimum
// necessary lwork value is. Dlaqr4 says that the minimum is n which
// clashes with Dlaqr2's opinion about optimal work when nw <= 2
// clashes with Dlaqr23's opinion about optimal work when nw <= 2
// (independent of n).
// case lwork < n && n > ntiny && lwork != -1:
// panic(badWork)
Expand Down Expand Up @@ -205,10 +205,10 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
nsr = min(nsr, min((n+6)/9, ihi-ilo))
nsr = max(2, nsr&^1)

// Workspace query call to Dlaqr2.
impl.Dlaqr2(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1)
// Optimal workspace is max(Dlaqr5, Dlaqr2).
// Workspace query call to Dlaqr23.
impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, 0)
// Optimal workspace is max(Dlaqr5, Dlaqr23).
lwkopt := max(3*nsr/2, int(work[0]))
// Quick return in case of workspace query.
if lwork == -1 {
Expand Down Expand Up @@ -313,9 +313,9 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
kwv := nw + 1
nhv := n - kwv - kt
// Aggressive early deflation.
ls, ld := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw,
ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork)
h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, 0)

// Adjust kbot accounting for new deflations.
kbot -= ld
Expand All @@ -333,13 +333,13 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
}

// ns is the nominal number of simultaneous shifts. This may be
// lowered (slightly) if Dlaqr2 did not provide that many
// lowered (slightly) if Dlaqr23 did not provide that many
// shifts.
ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1

// If there have been no deflations in a multiple of kexsh
// iterations, then try exceptional shifts. Otherwise use shifts
// provided by Dlaqr2 above or from the eigenvalues of a
// provided by Dlaqr23 above or from the eigenvalues of a
// trailing principal submatrix.
if ndfl%kexsh == 0 {
ks = kbot - ns + 1
Expand Down
4 changes: 2 additions & 2 deletions native/lapack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ func TestDlaqr1(t *testing.T) {
testlapack.Dlaqr1Test(t, impl)
}

func TestDlaqr2(t *testing.T) {
testlapack.Dlaqr2Test(t, impl)
func TestDlaqr23(t *testing.T) {
testlapack.Dlaqr23Test(t, impl)
}

func TestDlaqr4(t *testing.T) {
Expand Down
61 changes: 35 additions & 26 deletions testlapack/dlaqr2.go → testlapack/dlaqr23.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ import (
"github.com/gonum/blas/blas64"
)

type Dlaqr2er interface {
Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int)
type Dlaqr23er interface {
Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recurse int) (ns, nd int)
}

type dlaqr2Test struct {
type dlaqr23Test struct {
wantt, wantz bool
ktop, kbot int
nw int
Expand All @@ -27,20 +27,23 @@ type dlaqr2Test struct {
evWant []complex128 // Optional slice with known eigenvalues.
}

func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
func Dlaqr23Test(t *testing.T, impl Dlaqr23er) {
rnd := rand.New(rand.NewSource(1))

for _, wantt := range []bool{true, false} {
for _, wantz := range []bool{true, false} {
for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 53} {
for _, extra := range []int{0, 1, 11} {
for cas := 0; cas < 100; cas++ {
ktop := rnd.Intn(n)
kbot := rnd.Intn(n)
if ktop > kbot {
ktop, kbot = kbot, ktop
for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 100} {
for _, extra := range []int{0, 11} {
for cas := 0; cas < 30; cas++ {
var nw int
if nw <= 75 {
nw = rnd.Intn(n) + 1
} else {
nw = 76 + rnd.Intn(n-75)
}
nw := rnd.Intn(kbot-ktop+1) + 1
ktop := rnd.Intn(n - nw + 1)
kbot := ktop + nw - 1
kbot += rnd.Intn(n - kbot)
h := randomHessenberg(n, n+extra, rnd)
if ktop-1 >= 0 {
h.Data[ktop*h.Stride+ktop-1] = 0
Expand All @@ -50,7 +53,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
}
iloz := rnd.Intn(ktop + 1)
ihiz := kbot + rnd.Intn(n-kbot)
test := dlaqr2Test{
test := dlaqr23Test{
wantt: wantt,
wantz: wantz,
ktop: ktop,
Expand All @@ -60,8 +63,10 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
iloz: iloz,
ihiz: ihiz,
}
testDlaqr2(t, impl, test, false, rnd)
testDlaqr2(t, impl, test, true, rnd)
testDlaqr23(t, impl, test, false, 1, rnd)
testDlaqr23(t, impl, test, true, 1, rnd)
testDlaqr23(t, impl, test, false, 0, rnd)
testDlaqr23(t, impl, test, true, 0, rnd)
}
}
}
Expand All @@ -72,7 +77,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
for _, wantt := range []bool{true, false} {
for _, wantz := range []bool{true, false} {
for _, extra := range []int{0, 1, 11} {
test := dlaqr2Test{
test := dlaqr23Test{
wantt: wantt,
wantz: wantz,
h: randomHessenberg(0, extra, rnd),
Expand All @@ -82,14 +87,16 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
ihiz: -1,
nw: 0,
}
testDlaqr2(t, impl, test, true, rnd)
testDlaqr2(t, impl, test, false, rnd)
testDlaqr23(t, impl, test, true, 1, rnd)
testDlaqr23(t, impl, test, false, 1, rnd)
testDlaqr23(t, impl, test, true, 0, rnd)
testDlaqr23(t, impl, test, false, 0, rnd)
}
}
}

// Tests with explicit eigenvalues computed by Octave.
for _, test := range []dlaqr2Test{
for _, test := range []dlaqr23Test{
{
h: blas64.General{
Rows: 1,
Expand Down Expand Up @@ -201,12 +208,14 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
test.wantt = true
test.wantz = true
test.nw = test.kbot - test.ktop + 1
testDlaqr2(t, impl, test, true, rnd)
testDlaqr2(t, impl, test, false, rnd)
testDlaqr23(t, impl, test, true, 1, rnd)
testDlaqr23(t, impl, test, false, 1, rnd)
testDlaqr23(t, impl, test, true, 0, rnd)
testDlaqr23(t, impl, test, false, 0, rnd)
}
}

func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *rand.Rand) {
func testDlaqr23(t *testing.T, impl Dlaqr23er, test dlaqr23Test, opt bool, recurse int, rnd *rand.Rand) {
const tol = 1e-14

h := cloneGeneral(test.h)
Expand Down Expand Up @@ -244,15 +253,15 @@ func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *ran
var work []float64
if opt {
work = nanSlice(1)
impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride,
nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1)
impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride,
nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1, recurse)
work = nanSlice(int(work[0]))
} else {
work = nanSlice(max(1, 2*nw))
}

ns, nd := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride,
sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work))
ns, nd := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride,
sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work), recurse)

prefix := fmt.Sprintf("Case wantt=%v, wantz=%v, n=%v, ktop=%v, kbot=%v, nw=%v, iloz=%v, ihiz=%v, extra=%v",
wantt, wantz, n, ktop, kbot, nw, iloz, ihiz, extra)
Expand Down

0 comments on commit ebab9ed

Please sign in to comment.