diff --git a/mat64/dense_arithmetic.go b/mat64/dense_arithmetic.go index 725e89c..aefa7c1 100644 --- a/mat64/dense_arithmetic.go +++ b/mat64/dense_arithmetic.go @@ -351,7 +351,6 @@ func (m *Dense) Dot(b Matrix) float64 { } // Mul takes the matrix product of a and b, placing the result in the receiver. -// If a or b are also the receiver, temporary workspace memory will be allocated. // // See the Muler interface for more information. func (m *Dense) Mul(a, b Matrix) { @@ -362,22 +361,22 @@ func (m *Dense) Mul(a, b Matrix) { panic(ErrShape) } + m.reuseAs(ar, bc) var w *Dense if m != a && m != b { w = m } else { - m.reuseAs(ar, bc) - w = &Dense{} + w = getWorkspace(ar, bc, false) + defer func() { + m.Copy(w) + putWorkspace(w) + }() } - w.reuseAs(ar, bc) if a, ok := a.(RawMatrixer); ok { if b, ok := b.(RawMatrixer); ok { amat, bmat := a.RawMatrix(), b.RawMatrix() blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, amat, bmat, 0, w.mat) - if w != m { - m.Copy(w) - } return } } @@ -395,9 +394,6 @@ func (m *Dense) Mul(a, b Matrix) { ) } } - if w != m { - m.Copy(w) - } return } } @@ -415,14 +411,10 @@ func (m *Dense) Mul(a, b Matrix) { w.mat.Data[r*w.mat.Stride+c] = v } } - if w != m { - m.Copy(w) - } } // MulTrans takes the matrix product of a and b, optionally transposing each, -// and placing the result in the receiver. If a or b are also the receiver, -// temporary workspace memory will be allocated. +// and placing the result in the receiver. // // See the MulTranser interface for more information. func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { @@ -440,14 +432,17 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { panic(ErrShape) } + m.reuseAs(ar, bc) var w *Dense if m != a && m != b { w = m } else { - m.reuseAs(ar, bc) - w = &Dense{} + w = getWorkspace(ar, bc, false) + defer func() { + m.Copy(w) + putWorkspace(w) + }() } - w.reuseAs(ar, bc) if a, ok := a.(RawMatrixer); ok { if b, ok := b.(RawMatrixer); ok { @@ -483,10 +478,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { bmat := b.RawMatrix() blas64.Gemm(aOp, bOp, 1, amat, bmat, 0, w.mat) } - - if w != m { - m.Copy(w) - } return } } @@ -506,9 +497,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { ) } } - if w != m { - m.Copy(w) - } return } // TODO(jonlawlor): determine if (b*a)' is more efficient @@ -521,9 +509,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { ) } } - if w != m { - m.Copy(w) - } return } if bTrans { @@ -536,9 +521,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { ) } } - if w != m { - m.Copy(w) - } return } for r := 0; r < ar; r++ { @@ -550,9 +532,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { ) } } - if w != m { - m.Copy(w) - } return } } @@ -573,9 +552,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { dataTmp[c] = v } } - if w != m { - m.Copy(w) - } return } @@ -592,9 +568,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { dataTmp[c] = v } } - if w != m { - m.Copy(w) - } return } if bTrans { @@ -611,9 +584,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { dataTmp[c] = v } } - if w != m { - m.Copy(w) - } return } for r := 0; r < ar; r++ { @@ -629,9 +599,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) { dataTmp[c] = v } } - if w != m { - m.Copy(w) - } } // Exp calculates the exponential of the matrix a, e^a, placing the result @@ -734,20 +701,25 @@ func (m *Dense) Pow(a Matrix, n int) { } // Perform iterative exponentiation by squaring in work space. - var w, s, x Dense - w.Clone(a) - s.Clone(a) + w := getWorkspace(r, r, false) + w.Copy(a) + s := getWorkspace(r, r, false) + s.Copy(a) + x := getWorkspace(r, r, false) for n--; n > 0; n >>= 1 { if n&1 != 0 { - x.Mul(&w, &s) + x.Mul(w, s) w, x = x, w } if n != 1 { - x.Mul(&s, &s) + x.Mul(s, s) s, x = x, s } } - m.Copy(&w) + m.Copy(w) + putWorkspace(w) + putWorkspace(s) + putWorkspace(x) } // Scale multiplies the elements of a by f, placing the result in the receiver. diff --git a/mat64/pool.go b/mat64/pool.go new file mode 100644 index 0000000..62faf6a --- /dev/null +++ b/mat64/pool.go @@ -0,0 +1,80 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat64 + +import ( + "sync" + + "github.com/gonum/blas/blas64" +) + +var tab64 = [64]byte{ + 0x3f, 0x00, 0x3a, 0x01, 0x3b, 0x2f, 0x35, 0x02, + 0x3c, 0x27, 0x30, 0x1b, 0x36, 0x21, 0x2a, 0x03, + 0x3d, 0x33, 0x25, 0x28, 0x31, 0x12, 0x1c, 0x14, + 0x37, 0x1e, 0x22, 0x0b, 0x2b, 0x0e, 0x16, 0x04, + 0x3e, 0x39, 0x2e, 0x34, 0x26, 0x1a, 0x20, 0x29, + 0x32, 0x24, 0x11, 0x13, 0x1d, 0x0a, 0x0d, 0x15, + 0x38, 0x2d, 0x19, 0x1f, 0x23, 0x10, 0x09, 0x0c, + 0x2c, 0x18, 0x0f, 0x08, 0x17, 0x07, 0x06, 0x05, +} + +// bits returns the ceiling of base 2 log of v. +// Approach based on http://stackoverflow.com/a/11398748. +func bits(v uint64) byte { + if v == 0 { + return 0 + } + v <<= 2 + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v |= v >> 32 + return tab64[((v-(v>>1))*0x07EDD5E59A4E28C2)>>58] - 1 +} + +// pool contains size stratified workspace Dense pools. +// Each pool element i returns sized matrices with a data +// slice capped at 1< len: %d cap: %d", i, j, len(w.mat.Data), cap(w.mat.Data))) + } + w.Set(0, 0, math.NaN()) + for k := 0; k < 5; k++ { + putWorkspace(w) + } + for k := 0; k < 5; k++ { + w = getWorkspace(i, j, true) + m = NewDense(i, j, nil) + c.Check(w.mat, check.DeepEquals, m.mat) + c.Check(w.capRows, check.DeepEquals, m.capRows) + c.Check(w.capCols, check.DeepEquals, m.capCols) + c.Check(cap(w.mat.Data) < 2*len(w.mat.Data), check.Equals, true, check.Commentf("r: %d c: %d -> len: %d cap: %d", i, j, len(w.mat.Data), cap(w.mat.Data))) + } + } + } +} + +var benchmat *Dense + +func poolBenchmark(n, r, c int, clear bool) { + for i := 0; i < n; i++ { + benchmat = getWorkspace(r, c, clear) + putWorkspace(benchmat) + } +} + +func newBenchmark(n, r, c int) { + for i := 0; i < n; i++ { + benchmat = NewDense(r, c, nil) + } +} + +func BenchmarkPool10by10Uncleared(b *testing.B) { poolBenchmark(b.N, 10, 10, false) } +func BenchmarkPool10by10Cleared(b *testing.B) { poolBenchmark(b.N, 10, 10, true) } +func BenchmarkNew10by10(b *testing.B) { newBenchmark(b.N, 10, 10) } +func BenchmarkPool100by100Uncleared(b *testing.B) { poolBenchmark(b.N, 100, 100, false) } +func BenchmarkPool100by100Cleared(b *testing.B) { poolBenchmark(b.N, 100, 100, true) } +func BenchmarkNew100by100(b *testing.B) { newBenchmark(b.N, 100, 100) } + +func BenchmarkMulWorkspaceDense100Half(b *testing.B) { denseMulWorkspaceBench(b, 100, 0.5) } +func BenchmarkMulWorkspaceDense100Tenth(b *testing.B) { denseMulWorkspaceBench(b, 100, 0.1) } +func BenchmarkMulWorkspaceDense1000Half(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.5) } +func BenchmarkMulWorkspaceDense1000Tenth(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.1) } +func BenchmarkMulWorkspaceDense1000Hundredth(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.01) } +func BenchmarkMulWorkspaceDense1000Thousandth(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.001) } +func denseMulWorkspaceBench(b *testing.B, size int, rho float64) { + b.StopTimer() + a, _ := randDense(size, rho, rand.NormFloat64) + d, _ := randDense(size, rho, rand.NormFloat64) + b.StartTimer() + for i := 0; i < b.N; i++ { + a.Mul(a, d) + } +}