Skip to content
This repository has been archived by the owner on Dec 10, 2018. It is now read-only.

Commit

Permalink
mat64: implement temporary matrix pools for workspaces
Browse files Browse the repository at this point in the history
  • Loading branch information
kortschak committed May 28, 2015
1 parent c2e8216 commit 4229766
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 52 deletions.
76 changes: 24 additions & 52 deletions mat64/dense_arithmetic.go
Expand Up @@ -351,7 +351,6 @@ func (m *Dense) Dot(b Matrix) float64 {
}

// Mul takes the matrix product of a and b, placing the result in the receiver.
// If a or b are also the receiver, temporary workspace memory will be allocated.
//
// See the Muler interface for more information.
func (m *Dense) Mul(a, b Matrix) {
Expand All @@ -362,22 +361,22 @@ func (m *Dense) Mul(a, b Matrix) {
panic(ErrShape)
}

m.reuseAs(ar, bc)
var w *Dense
if m != a && m != b {
w = m
} else {
m.reuseAs(ar, bc)
w = &Dense{}
w = getWorkspace(ar, bc, false)
defer func() {
m.Copy(w)
putWorkspace(w)
}()
}
w.reuseAs(ar, bc)

if a, ok := a.(RawMatrixer); ok {
if b, ok := b.(RawMatrixer); ok {
amat, bmat := a.RawMatrix(), b.RawMatrix()
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, amat, bmat, 0, w.mat)
if w != m {
m.Copy(w)
}
return
}
}
Expand All @@ -395,9 +394,6 @@ func (m *Dense) Mul(a, b Matrix) {
)
}
}
if w != m {
m.Copy(w)
}
return
}
}
Expand All @@ -415,14 +411,10 @@ func (m *Dense) Mul(a, b Matrix) {
w.mat.Data[r*w.mat.Stride+c] = v
}
}
if w != m {
m.Copy(w)
}
}

// MulTrans takes the matrix product of a and b, optionally transposing each,
// and placing the result in the receiver. If a or b are also the receiver,
// temporary workspace memory will be allocated.
// and placing the result in the receiver.
//
// See the MulTranser interface for more information.
func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
Expand All @@ -440,14 +432,17 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
panic(ErrShape)
}

m.reuseAs(ar, bc)
var w *Dense
if m != a && m != b {
w = m
} else {
m.reuseAs(ar, bc)
w = &Dense{}
w = getWorkspace(ar, bc, false)
defer func() {
m.Copy(w)
putWorkspace(w)
}()
}
w.reuseAs(ar, bc)

if a, ok := a.(RawMatrixer); ok {
if b, ok := b.(RawMatrixer); ok {
Expand Down Expand Up @@ -483,10 +478,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
bmat := b.RawMatrix()
blas64.Gemm(aOp, bOp, 1, amat, bmat, 0, w.mat)
}

if w != m {
m.Copy(w)
}
return
}
}
Expand All @@ -506,9 +497,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
)
}
}
if w != m {
m.Copy(w)
}
return
}
// TODO(jonlawlor): determine if (b*a)' is more efficient
Expand All @@ -521,9 +509,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
)
}
}
if w != m {
m.Copy(w)
}
return
}
if bTrans {
Expand All @@ -536,9 +521,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
)
}
}
if w != m {
m.Copy(w)
}
return
}
for r := 0; r < ar; r++ {
Expand All @@ -550,9 +532,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
)
}
}
if w != m {
m.Copy(w)
}
return
}
}
Expand All @@ -573,9 +552,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
dataTmp[c] = v
}
}
if w != m {
m.Copy(w)
}
return
}

Expand All @@ -592,9 +568,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
dataTmp[c] = v
}
}
if w != m {
m.Copy(w)
}
return
}
if bTrans {
Expand All @@ -611,9 +584,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
dataTmp[c] = v
}
}
if w != m {
m.Copy(w)
}
return
}
for r := 0; r < ar; r++ {
Expand All @@ -629,9 +599,6 @@ func (m *Dense) MulTrans(a Matrix, aTrans bool, b Matrix, bTrans bool) {
dataTmp[c] = v
}
}
if w != m {
m.Copy(w)
}
}

// Exp calculates the exponential of the matrix a, e^a, placing the result
Expand Down Expand Up @@ -734,20 +701,25 @@ func (m *Dense) Pow(a Matrix, n int) {
}

// Perform iterative exponentiation by squaring in work space.
var w, s, x Dense
w.Clone(a)
s.Clone(a)
w := getWorkspace(r, r, false)
w.Copy(a)
s := getWorkspace(r, r, false)
s.Copy(a)
x := getWorkspace(r, r, false)
for n--; n > 0; n >>= 1 {
if n&1 != 0 {
x.Mul(&w, &s)
x.Mul(w, s)
w, x = x, w
}
if n != 1 {
x.Mul(&s, &s)
x.Mul(s, s)
s, x = x, s
}
}
m.Copy(&w)
m.Copy(w)
putWorkspace(w)
putWorkspace(s)
putWorkspace(x)
}

// Scale multiplies the elements of a by f, placing the result in the receiver.
Expand Down
80 changes: 80 additions & 0 deletions mat64/pool.go
@@ -0,0 +1,80 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package mat64

import (
"sync"

"github.com/gonum/blas/blas64"
)

var tab64 = [64]byte{
0x3f, 0x00, 0x3a, 0x01, 0x3b, 0x2f, 0x35, 0x02,
0x3c, 0x27, 0x30, 0x1b, 0x36, 0x21, 0x2a, 0x03,
0x3d, 0x33, 0x25, 0x28, 0x31, 0x12, 0x1c, 0x14,
0x37, 0x1e, 0x22, 0x0b, 0x2b, 0x0e, 0x16, 0x04,
0x3e, 0x39, 0x2e, 0x34, 0x26, 0x1a, 0x20, 0x29,
0x32, 0x24, 0x11, 0x13, 0x1d, 0x0a, 0x0d, 0x15,
0x38, 0x2d, 0x19, 0x1f, 0x23, 0x10, 0x09, 0x0c,
0x2c, 0x18, 0x0f, 0x08, 0x17, 0x07, 0x06, 0x05,
}

// bits returns the ceiling of base 2 log of v.
// Approach based on http://stackoverflow.com/a/11398748.
func bits(v uint64) byte {
if v == 0 {
return 0
}
v <<= 2
v--
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
v |= v >> 32
return tab64[((v-(v>>1))*0x07EDD5E59A4E28C2)>>58] - 1
}

// pool contains size stratified workspace Dense pools.
// Each pool element i returns sized matrices with a data
// slice capped at 1<<i.
var pool [63]sync.Pool

func init() {
for i := range pool {
l := 1 << uint(i)
pool[i].New = func() interface{} {
return &Dense{mat: blas64.General{
Data: make([]float64, l),
}}
}
}
}

// getWorkspace returns a *Dense of size r×c and a data slice
// with a cap that is less than 2*r*c. If clear is true, the
// data slice visible through the Matrix interface is zeroed.
func getWorkspace(r, c int, clear bool) *Dense {
l := uint64(r * c)
w := pool[bits(l)].Get().(*Dense)
w.mat.Data = w.mat.Data[:l]
if clear {
zero(w.mat.Data)
}
w.mat.Rows = r
w.mat.Cols = c
w.mat.Stride = c
w.capRows = r
w.capCols = c
return w
}

// putWorkspace replaces a used *Dense into the appropriate size
// workspace pool. putWorkspace must not be called with a matrix
// where references to the underlying data slice has been kept.
func putWorkspace(w *Dense) {
pool[bits(uint64(cap(w.mat.Data)))].Put(w)
}
79 changes: 79 additions & 0 deletions mat64/pool_test.go
@@ -0,0 +1,79 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package mat64

import (
"math"
"math/rand"
"testing"

"gopkg.in/check.v1"
)

func (s *S) TestPool(c *check.C) {
for i := 1; i < 10; i++ {
for j := 1; j < 10; j++ {
var w, m *Dense
for k := 0; k < 5; k++ {
w = getWorkspace(i, j, true)
m = NewDense(i, j, nil)
c.Check(w.mat, check.DeepEquals, m.mat)
c.Check(w.capRows, check.DeepEquals, m.capRows)
c.Check(w.capCols, check.DeepEquals, m.capCols)
c.Check(cap(w.mat.Data) < 2*len(w.mat.Data), check.Equals, true, check.Commentf("r: %d c: %d -> len: %d cap: %d", i, j, len(w.mat.Data), cap(w.mat.Data)))
}
w.Set(0, 0, math.NaN())
for k := 0; k < 5; k++ {
putWorkspace(w)
}
for k := 0; k < 5; k++ {
w = getWorkspace(i, j, true)
m = NewDense(i, j, nil)
c.Check(w.mat, check.DeepEquals, m.mat)
c.Check(w.capRows, check.DeepEquals, m.capRows)
c.Check(w.capCols, check.DeepEquals, m.capCols)
c.Check(cap(w.mat.Data) < 2*len(w.mat.Data), check.Equals, true, check.Commentf("r: %d c: %d -> len: %d cap: %d", i, j, len(w.mat.Data), cap(w.mat.Data)))
}
}
}
}

var benchmat *Dense

func poolBenchmark(n, r, c int, clear bool) {
for i := 0; i < n; i++ {
benchmat = getWorkspace(r, c, clear)
putWorkspace(benchmat)
}
}

func newBenchmark(n, r, c int) {
for i := 0; i < n; i++ {
benchmat = NewDense(r, c, nil)
}
}

func BenchmarkPool10by10Uncleared(b *testing.B) { poolBenchmark(b.N, 10, 10, false) }
func BenchmarkPool10by10Cleared(b *testing.B) { poolBenchmark(b.N, 10, 10, true) }
func BenchmarkNew10by10(b *testing.B) { newBenchmark(b.N, 10, 10) }
func BenchmarkPool100by100Uncleared(b *testing.B) { poolBenchmark(b.N, 100, 100, false) }
func BenchmarkPool100by100Cleared(b *testing.B) { poolBenchmark(b.N, 100, 100, true) }
func BenchmarkNew100by100(b *testing.B) { newBenchmark(b.N, 100, 100) }

func BenchmarkMulWorkspaceDense100Half(b *testing.B) { denseMulWorkspaceBench(b, 100, 0.5) }
func BenchmarkMulWorkspaceDense100Tenth(b *testing.B) { denseMulWorkspaceBench(b, 100, 0.1) }
func BenchmarkMulWorkspaceDense1000Half(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.5) }
func BenchmarkMulWorkspaceDense1000Tenth(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.1) }
func BenchmarkMulWorkspaceDense1000Hundredth(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.01) }
func BenchmarkMulWorkspaceDense1000Thousandth(b *testing.B) { denseMulWorkspaceBench(b, 1000, 0.001) }
func denseMulWorkspaceBench(b *testing.B, size int, rho float64) {
b.StopTimer()
a, _ := randDense(size, rho, rand.NormFloat64)
d, _ := randDense(size, rho, rand.NormFloat64)
b.StartTimer()
for i := 0; i < b.N; i++ {
a.Mul(a, d)
}
}

0 comments on commit 4229766

Please sign in to comment.