From 8c8cfcc32ebf731dabf4e19e57a7d64cfc615b5b Mon Sep 17 00:00:00 2001 From: btracey Date: Mon, 12 Jan 2015 10:46:47 -0800 Subject: [PATCH 1/2] Optimized Inner with the new assembly code --- mat64/bench_test.go | 10 ++++++++++ mat64/inner.go | 6 ++++-- mat64/inner_test.go | 31 +++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 mat64/bench_test.go diff --git a/mat64/bench_test.go b/mat64/bench_test.go new file mode 100644 index 0000000..4a91a0e --- /dev/null +++ b/mat64/bench_test.go @@ -0,0 +1,10 @@ +package mat64 + +import "github.com/gonum/blas/testblas" + +const ( + Sm = testblas.SmallMat + Med = testblas.MediumMat + Lg = testblas.LargeMat + Huge = testblas.HugeMat +) diff --git a/mat64/inner.go b/mat64/inner.go index 154acad..63144b2 100644 --- a/mat64/inner.go +++ b/mat64/inner.go @@ -4,6 +4,8 @@ package mat64 +import "github.com/gonum/internal/asm" + // Inner computes the generalized inner product // x^T A y // between vectors x and y with matrix A. This is only a true inner product if @@ -28,8 +30,8 @@ func Inner(x []float64, A Matrix, y []float64) float64 { case RawMatrixer: bmat := b.RawMatrix() for i, xi := range x { - for j, yj := range y { - sum += xi * bmat.Data[i*bmat.Stride+j] * yj + if xi != 0 { + sum += xi * asm.DdotUnitary(bmat.Data[i*bmat.Stride:i*bmat.Stride+n], y) } } default: diff --git a/mat64/inner_test.go b/mat64/inner_test.go index 829f90a..af3b3d9 100644 --- a/mat64/inner_test.go +++ b/mat64/inner_test.go @@ -5,6 +5,9 @@ package mat64 import ( + "testing" + + "github.com/gonum/blas/blas64" "gopkg.in/check.v1" ) @@ -73,3 +76,31 @@ func (s *S) TestInner(c *check.C) { c.Check(want, check.Equals, got, check.Commentf("Test %v: want %v, got %v", i, want, got)) } } + +func benchmarkInner(b *testing.B, m, n int) { + x := make([]float64, m) + randomSlice(x) + y := make([]float64, n) + randomSlice(y) + data := make([]float64, m*n) + randomSlice(data) + for i := 0; i < b.N; i++ { + Inner(x, &Dense{blas64.General{Rows: m, Cols: n, Stride: n, Data: data}}, y) + } +} + +func BenchmarkInnerSmSm(b *testing.B) { + benchmarkInner(b, Sm, Sm) +} + +func BenchmarkInnerMedMed(b *testing.B) { + benchmarkInner(b, Med, Med) +} + +func BenchmarkInnerLgLg(b *testing.B) { + benchmarkInner(b, Lg, Lg) +} + +func BenchmarkInnerLgSm(b *testing.B) { + benchmarkInner(b, Lg, Sm) +} From 07a5cb547e252dc8e13ed8a6450d3a8a4ce72903 Mon Sep 17 00:00:00 2001 From: btracey Date: Mon, 12 Jan 2015 10:52:07 -0800 Subject: [PATCH 2/2] Allocate the matrix in advance and reset the timer for a fairer benchmark --- mat64/inner_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mat64/inner_test.go b/mat64/inner_test.go index af3b3d9..a26d214 100644 --- a/mat64/inner_test.go +++ b/mat64/inner_test.go @@ -84,8 +84,10 @@ func benchmarkInner(b *testing.B, m, n int) { randomSlice(y) data := make([]float64, m*n) randomSlice(data) + mat := &Dense{blas64.General{Rows: m, Cols: n, Stride: n, Data: data}} + b.ResetTimer() for i := 0; i < b.N; i++ { - Inner(x, &Dense{blas64.General{Rows: m, Cols: n, Stride: n, Data: data}}, y) + Inner(x, mat, y) } }