Skip to content
This repository has been archived by the owner on Dec 9, 2018. It is now read-only.

Commit

Permalink
Fixed implementation of dgbmv and added tests.
Browse files Browse the repository at this point in the history
The old implementation of dgbmv was written in column-major style and had a few bugs. This writes it as row-major and adds a more robust test suite
  • Loading branch information
btracey committed Dec 2, 2014
1 parent 33f78a2 commit fb08479
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 110 deletions.
4 changes: 4 additions & 0 deletions cblas/level2double_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ func TestDger(t *testing.T) {
func TestDtxmv(t *testing.T) {
testblas.DtxmvTest(t, blasser)
}

func TestDgbmv(t *testing.T) {
testblas.DgbmvTest(t, blasser)
}
134 changes: 73 additions & 61 deletions goblas/level2double.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ const (
badLdaRow string = "lda must be greater than max(1,n) for row major"
badLdaCol string = "lda must be greater than max(1,m) for col major"
badLda string = "lda must be greater than max(1,n)"

badLdaGenBand string = "goblas: lda must be greater than 1 + kL + kU for general banded"
kLLT0 string = "goblas: kL < 0"
kULT0 string = "goblas: kU < 0"
)

func max(a, b int) int {
Expand Down Expand Up @@ -183,16 +187,11 @@ func (Blas) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, in

}

// Dgbmv performs y = alpha*A*x + beta*y where a is an mxn band matrix with
// kl subdiagonals and ku super-diagonals. m and n refer to the size of the full
// dense matrix, not the actual number of rows and columns in the contracted banded
// matrix.
func (b Blas) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
// Transform for row major
m, n = n, m
kU, kL = kL, kU
if tA == blas.NoTrans {
tA = blas.Trans
} else {
tA = blas.NoTrans
}

if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
Expand All @@ -202,10 +201,15 @@ func (b Blas) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []floa
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdaRow)
if kL < 0 {
panic(kLLT0)
}
if kL < 0 {
panic(kULT0)
}
if lda < kL+kU+1 {
panic(badLdaGenBand)
}

if incX == 0 {
panic(zeroInc)
}
Expand Down Expand Up @@ -248,64 +252,72 @@ func (b Blas) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []floa
return
}

// i and j are indices of the compacted banded matrix.
// off is the offset into the dense matrix (off + j = densej)
ld := min(m, n)
nCol := kU + 1 + kL
if tA == blas.NoTrans {
jx := kx
if incY == 1 {
for j := 0; j < n; j++ {
if x[jx] != 0 {
temp := alpha * x[jx]
k := kU - j
for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
y[i] += temp * a[k+i+j*lda]
}
iy := ky
if incX == 1 {
for i := 0; i < m; i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
xtmp := x[off : off+u-l]
var sum float64
for j, v := range atmp {
sum += xtmp[j] * v
}
jx += incX
y[iy] += sum * alpha
iy += incY
}
} else {
for j := 0; j < n; j++ {
if x[jx] != 0 {
temp := alpha * x[jx]
iy := ky
k := kU - j
for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
y[iy] += temp * a[k+i+j*lda]
iy += incY
}
}
return
}
for i := 0; i < m; i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
jx := kx
var sum float64
for _, v := range atmp {
sum += x[off*incX+jx] * v
jx += incX
if j >= kU {
ky += incY
}
}
y[iy] += sum * alpha
iy += incY
}
} else {
jy := ky
if incX == 1 {
for j := 0; j < n; j++ {
temp := 0.0
k := kU - j
for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
temp += a[k+i+j*lda] * x[i]
}
y[jy] += alpha * temp
jy += incY
}
} else {
for j := 0; j < n; j++ {
temp := 0.0
ix := kx
k := kU - j
for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
temp += a[k+i+j*lda] * x[ix]
ix += incX
}
y[jy] += alpha * temp
return
}
if incX == 1 {
for i := 0; i < m; i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
tmp := alpha * x[i]
jy := ky
for _, v := range atmp {
y[jy+off*incY] += tmp * v
jy += incY
if j > kU {
kx += incX
}
}
}
return
}
ix := kx
for i := 0; i < m; i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
tmp := alpha * x[ix]
jy := ky
for _, v := range atmp {
y[jy+off*incY] += tmp * v
jy += incY
}
ix += incX
}
}

Expand Down
4 changes: 4 additions & 0 deletions goblas/level2double_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ func TestDger(t *testing.T) {
func TestDtxmv(t *testing.T) {
testblas.DtxmvTest(t, blasser)
}

func TestDgbmv(t *testing.T) {
testblas.DgbmvTest(t, blasser)
}
76 changes: 76 additions & 0 deletions testblas/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,79 @@ func unflatten(a []float64, m, n int) [][]float64 {
}
return s
}

// flattenBanded turns a dense banded slice of slice into the compact banded matrix format
func flattenBanded(a [][]float64, ku, kl int) []float64 {
m := len(a)
n := len(a[0])
if ku < 0 || kl < 0 {
panic("testblas: negative band length")
}
// banded size is minimum of m and n because otherwise just have a bunch of zeros
nRows := m
if m < n {
nRows = n
}
nCols := (ku + kl + 1)
aflat := make([]float64, nRows*nCols)
for i := range aflat {
aflat[i] = math.NaN()
}
// loop over the rows, and then the bands
// elements in the ith row stay in the ith row
// order in bands is kept
for i := 0; i < nRows; i++ {
min := -kl
if i-kl < 0 {
min = -i
}
max := ku
if i+ku >= n {
max = n - i - 1
}
for j := min; j <= max; j++ {
col := kl + j
aflat[i*nCols+col] = a[i][i+j]
}
}
return aflat
}

// makeIncremented takes a slice with inc == 1 and makes an incremented version
// and adds extra values on the end
func makeIncremented(x []float64, inc int, extra int) []float64 {
if inc == 0 {
panic("zero inc")
}
absinc := inc
if absinc < 0 {
absinc = -inc
}
xcopy := make([]float64, len(x))
if inc > 0 {
copy(xcopy, x)
} else {
for i := 0; i < len(x); i++ {
xcopy[i] = x[len(x)-i-1]
}
}

// don't use NaN because it makes comparison hard
// Do use a weird unique value for easier debugging
counter := 100.0
var xnew []float64
for i, v := range xcopy {
xnew = append(xnew, v)
if i != len(x)-1 {
for j := 0; j < absinc-1; j++ {
xnew = append(xnew, counter)
counter++
}
}
}
for i := 0; i < extra; i++ {
xnew = append(xnew, counter)
counter++
}
return xnew
}

0 comments on commit fb08479

Please sign in to comment.