Fixed implementation of dgbmv and added tests.

The old implementation of dgbmv was written in column-major style and had a few bugs. This writes it as row-major and adds a more robust test suite
gonum · Dec 2, 2014 · fb08479 · fb08479
1 parent 33f78a2
commit fb08479
Show file tree

Hide file tree

Showing 5 changed files with 250 additions and 110 deletions.
diff --git a/cblas/level2double_test.go b/cblas/level2double_test.go
@@ -17,3 +17,7 @@ func TestDger(t *testing.T) {
 func TestDtxmv(t *testing.T) {
 	testblas.DtxmvTest(t, blasser)
 }
+
+func TestDgbmv(t *testing.T) {
+	testblas.DgbmvTest(t, blasser)
+}
diff --git a/goblas/level2double.go b/goblas/level2double.go
@@ -20,6 +20,10 @@ const (
 	badLdaRow    string = "lda must be greater than max(1,n) for row major"
 	badLdaCol    string = "lda must be greater than max(1,m) for col major"
 	badLda       string = "lda must be greater than max(1,n)"
+
+	badLdaGenBand string = "goblas: lda must be greater than 1 + kL + kU for general banded"
+	kLLT0         string = "goblas: kL < 0"
+	kULT0         string = "goblas: kU < 0"
 )
 
 func max(a, b int) int {
@@ -183,16 +187,11 @@ func (Blas) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, in
 
 }
 
+// Dgbmv performs y = alpha*A*x + beta*y where a is an mxn band matrix with
+// kl subdiagonals and ku super-diagonals. m and n refer to the size of the full
+// dense matrix, not the actual number of rows and columns in the contracted banded
+// matrix.
 func (b Blas) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
-	// Transform for row major
-	m, n = n, m
-	kU, kL = kL, kU
-	if tA == blas.NoTrans {
-		tA = blas.Trans
-	} else {
-		tA = blas.NoTrans
-	}
-
 	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
 		panic(badTranspose)
 	}
@@ -202,10 +201,15 @@ func (b Blas) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []floa
 	if n < 0 {
 		panic(nLT0)
 	}
-	if lda < max(1, n) {
-		panic(badLdaRow)
+	if kL < 0 {
+		panic(kLLT0)
+	}
+	if kL < 0 {
+		panic(kULT0)
+	}
+	if lda < kL+kU+1 {
+		panic(badLdaGenBand)
 	}
-
 	if incX == 0 {
 		panic(zeroInc)
 	}
@@ -248,64 +252,72 @@ func (b Blas) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []floa
 		return
 	}
 
+	// i and j are indices of the compacted banded matrix.
+	// off is the offset into the dense matrix (off + j = densej)
+	ld := min(m, n)
+	nCol := kU + 1 + kL
 	if tA == blas.NoTrans {
-		jx := kx
-		if incY == 1 {
-			for j := 0; j < n; j++ {
-				if x[jx] != 0 {
-					temp := alpha * x[jx]
-					k := kU - j
-					for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
-						y[i] += temp * a[k+i+j*lda]
-					}
+		iy := ky
+		if incX == 1 {
+			for i := 0; i < m; i++ {
+				l := max(0, kL-i)
+				u := min(nCol, ld+kL-i)
+				off := max(0, i-kL)
+				atmp := a[i*lda+l : i*lda+u]
+				xtmp := x[off : off+u-l]
+				var sum float64
+				for j, v := range atmp {
+					sum += xtmp[j] * v
 				}
-				jx += incX
+				y[iy] += sum * alpha
+				iy += incY
 			}
-		} else {
-			for j := 0; j < n; j++ {
-				if x[jx] != 0 {
-					temp := alpha * x[jx]
-					iy := ky
-					k := kU - j
-					for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
-						y[iy] += temp * a[k+i+j*lda]
-						iy += incY
-					}
-				}
+			return
+		}
+		for i := 0; i < m; i++ {
+			l := max(0, kL-i)
+			u := min(nCol, ld+kL-i)
+			off := max(0, i-kL)
+			atmp := a[i*lda+l : i*lda+u]
+			jx := kx
+			var sum float64
+			for _, v := range atmp {
+				sum += x[off*incX+jx] * v
 				jx += incX
-				if j >= kU {
-					ky += incY
-				}
 			}
+			y[iy] += sum * alpha
+			iy += incY
 		}
-	} else {
-		jy := ky
-		if incX == 1 {
-			for j := 0; j < n; j++ {
-				temp := 0.0
-				k := kU - j
-				for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
-					temp += a[k+i+j*lda] * x[i]
-				}
-				y[jy] += alpha * temp
-				jy += incY
-			}
-		} else {
-			for j := 0; j < n; j++ {
-				temp := 0.0
-				ix := kx
-				k := kU - j
-				for i := max(0, j-kU); i < min(m, j+kL+1); i++ {
-					temp += a[k+i+j*lda] * x[ix]
-					ix += incX
-				}
-				y[jy] += alpha * temp
+		return
+	}
+	if incX == 1 {
+		for i := 0; i < m; i++ {
+			l := max(0, kL-i)
+			u := min(nCol, ld+kL-i)
+			off := max(0, i-kL)
+			atmp := a[i*lda+l : i*lda+u]
+			tmp := alpha * x[i]
+			jy := ky
+			for _, v := range atmp {
+				y[jy+off*incY] += tmp * v
 				jy += incY
-				if j > kU {
-					kx += incX
-				}
 			}
 		}
+		return
+	}
+	ix := kx
+	for i := 0; i < m; i++ {
+		l := max(0, kL-i)
+		u := min(nCol, ld+kL-i)
+		off := max(0, i-kL)
+		atmp := a[i*lda+l : i*lda+u]
+		tmp := alpha * x[ix]
+		jy := ky
+		for _, v := range atmp {
+			y[jy+off*incY] += tmp * v
+			jy += incY
+		}
+		ix += incX
 	}
 }
 

diff --git a/goblas/level2double_test.go b/goblas/level2double_test.go
@@ -17,3 +17,7 @@ func TestDger(t *testing.T) {
 func TestDtxmv(t *testing.T) {
 	testblas.DtxmvTest(t, blasser)
 }
+
+func TestDgbmv(t *testing.T) {
+	testblas.DgbmvTest(t, blasser)
+}
diff --git a/testblas/common.go b/testblas/common.go
@@ -137,3 +137,79 @@ func unflatten(a []float64, m, n int) [][]float64 {
 	}
 	return s
 }
+
+// flattenBanded turns a dense banded slice of slice into the compact banded matrix format
+func flattenBanded(a [][]float64, ku, kl int) []float64 {
+	m := len(a)
+	n := len(a[0])
+	if ku < 0 || kl < 0 {
+		panic("testblas: negative band length")
+	}
+	// banded size is minimum of m and n because otherwise just have a bunch of zeros
+	nRows := m
+	if m < n {
+		nRows = n
+	}
+	nCols := (ku + kl + 1)
+	aflat := make([]float64, nRows*nCols)
+	for i := range aflat {
+		aflat[i] = math.NaN()
+	}
+	// loop over the rows, and then the bands
+	// elements in the ith row stay in the ith row
+	// order in bands is kept
+	for i := 0; i < nRows; i++ {
+		min := -kl
+		if i-kl < 0 {
+			min = -i
+		}
+		max := ku
+		if i+ku >= n {
+			max = n - i - 1
+		}
+		for j := min; j <= max; j++ {
+			col := kl + j
+			aflat[i*nCols+col] = a[i][i+j]
+		}
+	}
+	return aflat
+}
+
+// makeIncremented takes a slice with inc == 1 and makes an incremented version
+// and adds extra values on the end
+func makeIncremented(x []float64, inc int, extra int) []float64 {
+	if inc == 0 {
+		panic("zero inc")
+	}
+	absinc := inc
+	if absinc < 0 {
+		absinc = -inc
+	}
+	xcopy := make([]float64, len(x))
+	if inc > 0 {
+		copy(xcopy, x)
+	} else {
+		for i := 0; i < len(x); i++ {
+			xcopy[i] = x[len(x)-i-1]
+		}
+	}
+
+	// don't use NaN because it makes comparison hard
+	// Do use a weird unique value for easier debugging
+	counter := 100.0
+	var xnew []float64
+	for i, v := range xcopy {
+		xnew = append(xnew, v)
+		if i != len(x)-1 {
+			for j := 0; j < absinc-1; j++ {
+				xnew = append(xnew, counter)
+				counter++
+			}
+		}
+	}
+	for i := 0; i < extra; i++ {
+		xnew = append(xnew, counter)
+		counter++
+	}
+	return xnew
+}