Skip to content

Commit

Permalink
lapack/gonum: implement optimal packing in Dlaqr5
Browse files Browse the repository at this point in the history
...and update workspace sizes in Dhseqr and Dlaqr04.
  • Loading branch information
vladimir-ch committed Jul 6, 2023
1 parent edd0155 commit efb7471
Show file tree
Hide file tree
Showing 5 changed files with 323 additions and 411 deletions.
2 changes: 1 addition & 1 deletion lapack/gonum/dhseqr.go
Expand Up @@ -191,7 +191,7 @@ func (impl Implementation) Dhseqr(job lapack.SchurJob, compz lapack.SchurComp, n
// Matrices of order ntiny or smaller must be processed by
// Dlahqr because of insufficient subdiagonal scratch space.
// This is a hard limit.
ntiny = 11
ntiny = 15

// nl is the size of a local workspace to help small matrices
// through a rare Dlahqr failure. nl > ntiny is required and
Expand Down
20 changes: 10 additions & 10 deletions lapack/gonum/dlaqr04.go
Expand Up @@ -118,7 +118,7 @@ func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float
// Matrices of order ntiny or smaller must be processed by
// Dlahqr because of insufficient subdiagonal scratch space.
// This is a hard limit.
ntiny = 11
ntiny = 15
// Exceptional deflation windows: try to cure rare slow
// convergence by varying the size of the deflation window after
// kexnw iterations.
Expand Down Expand Up @@ -218,20 +218,20 @@ func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float
} else {
fname = "DLAQR4"
}
// nwr is the recommended deflation window size. n is greater than 11,
// nwr is the recommended deflation window size. n is greater than ntiny,
// so there is enough subdiagonal workspace for nwr >= 2 as required.
// (In fact, there is enough subdiagonal space for nwr >= 3.)
// TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
// (In fact, there is enough subdiagonal space for nwr >= 4.)
// TODO(vladimir-ch): If there is enough space for nwr >= 4, should we
// use it?
nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
nwr = max(2, nwr)
nwr = min(ihi-ilo+1, min((n-1)/3, nwr))

// nsr is the recommended number of simultaneous shifts. n is greater
// than 11, so there is enough subdiagonal workspace for nsr to be even
// and greater than or equal to two as required.
// nsr is the recommended number of simultaneous shifts. n is greater than
// ntiny, so there is enough subdiagonal workspace for nsr to be even and
// greater than or equal to two as required.
nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
nsr = min(nsr, min((n+6)/9, ihi-ilo))
nsr = min(nsr, min((n-3)/6, ihi-ilo))
nsr = max(2, nsr&^1)

// Workspace query call to Dlaqr23.
Expand Down Expand Up @@ -264,7 +264,7 @@ func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float

// nsmax is the largest number of simultaneous shifts for which there is
// sufficient workspace.
nsmax := min((n+6)/9, 2*lwork/3) &^ 1
nsmax := min((n-3)/6, 2*lwork/3) &^ 1

ndfl := 1 // Number of iterations since last deflation.
ndec := 0 // Deflation window size decrement.
Expand Down Expand Up @@ -470,7 +470,7 @@ func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float
// (nhv must be at least kdu but more is better),
// - an nhv×kdu vertical work array WV along the left-hand-edge
// (nhv must be at least kdu but more is better).
kdu := 3*ns - 3
kdu := 2 * ns
ku := n - kdu
kwh := kdu
kwv = kdu + 3
Expand Down

0 comments on commit efb7471

Please sign in to comment.