Skip to content

Commit

Permalink
Merge pull request Theano#7 from bouthilx/sparse_block_meta_opts
Browse files Browse the repository at this point in the history
WIP: Do not merge yet.
  • Loading branch information
adbrebs committed May 21, 2015
2 parents 58d45a9 + 3820721 commit 28c9a7e
Show file tree
Hide file tree
Showing 9 changed files with 429 additions and 349 deletions.
1 change: 1 addition & 0 deletions theano/sandbox/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import opt
81 changes: 48 additions & 33 deletions theano/sandbox/blocksparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@

import theano
from theano import Op, Apply
import theano.tensor as T
from theano import tensor
from theano.tensor import discrete_dtypes
from theano.gradient import grad_undefined


class SparseBlockGemv(Op):

register_opt = defaultdict(list)

"""
This op computes the dot product of specified pieces of vectors
and matrices, returning pieces of vectors.
Expand All @@ -20,23 +17,27 @@ class SparseBlockGemv(Op):
The i and j are taken from the inputIdx and outputIdx lists
respectively.
"""

registered_opts = []

def __init__(self, inplace=False):
self.inplace = False

def make_node(self, o, W, h, inputIdx, outputIdx):
"""
Compute the dot product (plus bias) of the specified pieces of vectors
and matrices.
Compute the dot product of the specified pieces of vectors
and matrices.
Parameters
----------
var: shape, comment
W: (iBlocks, oBlocks, iSize, oSize), weight matrix
h: (batch, iWin, iSize), input from lower layer (sparse)
inputIdx: (batch, iWin), indexes of the input blocks
b: (oBlocks, oSize), bias vector
o: (batch, oWin, oSize) output vector
outputIdx: (batch, oWin), indexes of the output blocks
returns (batch, oWin, oSize), dot(W[i, j], h[i]) + b[j]
but b[j] is only added once
returns (batch, oWin, oSize), dot(W[i, j], h[i]) + o[j]
Notation
--------
- `batch` is the number of examples in a minibatch (batch size).
Expand All @@ -56,7 +57,7 @@ def make_node(self, o, W, h, inputIdx, outputIdx):
outputIdx = theano.tensor.as_tensor_variable(outputIdx)

if o.ndim != 3:
raise TypeError('The output o must be a 3D tensor')
raise TypeError('The output o must be a 2D tensor')
if W.ndim != 4:
raise TypeError('The weight matrix W must be a 4D tensor')
if h.ndim != 3:
Expand All @@ -69,7 +70,10 @@ def make_node(self, o, W, h, inputIdx, outputIdx):
assert inputIdx.type.dtype in discrete_dtypes
assert outputIdx.type.dtype in discrete_dtypes

return Apply(self, [o, W, h, inputIdx, outputIdx], [o.type()])
output = o.type.__class__(dtype=o.type.dtype,
broadcastable=(False,)*o.ndim)()

return Apply(self, [o, W, h, inputIdx, outputIdx], [output])

def perform(self, node, inp, out_):
raise NotImplementedError('Optimization of SparseBlockGemv failed.')
Expand All @@ -79,11 +83,11 @@ def grad(self, inputs, grads):
go = grads[0]

Wgrad = sparse_block_outer(W.zeros_like(),
h, go, inputIdx, outputIdx)
h, go, inputIdx, outputIdx)
hgrad = sparse_block_gemv(h.zeros_like(),
W.dimshuffle((1, 0, 3, 2)),
go,
outputIdx, inputIdx)
W.dimshuffle((1, 0, 3, 2)),
go,
outputIdx, inputIdx)
return [go, Wgrad, hgrad,
grad_undefined(self, 3, inputIdx,
"grad of inputIdx makes no sense"),
Expand All @@ -106,28 +110,39 @@ class SparseBlockOuter(Op):
subject to change without notice. It is involved in the gradient
of SparseBlockGemvSS.
"""

registered_opts = []

def __init__(self, inplace=False):
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}

def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
"""
o: (iBlocks, oBlocks, iSize, oSize), weight matrix
TODO: WRITEME
"""
one = T.constant(numpy.asarray(1.0, dtype='float32'))
o = basic_ops.as_cuda_ndarray_variable(o)
x = basic_ops.as_cuda_ndarray_variable(x)
y = basic_ops.as_cuda_ndarray_variable(y)
one = tensor.constant(numpy.asarray(1.0, dtype='float32'))
o = theano.tensor.as_tensor_variable(o)
x = theano.tensor.as_tensor_variable(x)
y = theano.tensor.as_tensor_variable(y)

if alpha is None:
alpha = one

output = o.type.__class__(dtype=o.type.dtype,
broadcastable=(False,)*o.ndim)()

return Apply(self, [o, x, y, xIdx, yIdx, alpha],
[o.type()])
[output])

def perform(self, node, inp, out_):
raise NotImplementedError('Optimization of SparseBlockOuter failed.')

def grad(self, inputs, output_gradients):
# TODO!
meta_grad_op = MetaGradSparseBlockGemv(output_gradients)
return [meta_grad_op(inp) for inp in inputs]

Expand All @@ -136,7 +151,7 @@ def grad(self, inputs, output_gradients):
sparse_block_outer_inplace = SparseBlockOuter(True)


def sparse_block_gemv_cpu(W, h, inputIdx, bias, outputIdx):
def cpu_sparse_block_gemv(o, W, h, inputIdx, outputIdx):
"""
Creates a graph for the sparse block dot operation. Check SparseBlockGemv's
docstring for information about the arguments.
Expand All @@ -146,32 +161,32 @@ def _loop_over_batch(b, W, h, inputIdx, outputIdx):
def _loop_over_outputIdx(i, b, W, h, inputIdx, outputIdx):

def _loop_over_inputIdx(j, b, i, W, h, inputIdx, outputIdx):
return T.dot(h[b, j, :], W[inputIdx[b, j],
return tensor.dot(h[b, j, :], W[inputIdx[b, j],
outputIdx[b, i], :, :])

res3 = theano.scan(fn=_loop_over_inputIdx,
sequences=T.arange(0, inputIdx.shape[1]),
sequences=tensor.arange(0, inputIdx.shape[1]),
non_sequences=[b, i, W, h, inputIdx, outputIdx],
name='_loop_over_inputIdx')[0]

return res3.sum(axis=0)

res2 = theano.scan(fn=_loop_over_outputIdx,
sequences=T.arange(0, outputIdx.shape[1]),
sequences=tensor.arange(0, outputIdx.shape[1]),
non_sequences=[b, W, h, inputIdx, outputIdx],
name='_loop_over_outputIdx')[0]

return res2

res1 = theano.scan(fn=_loop_over_batch,
sequences=T.arange(0, inputIdx.shape[0]),
sequences=tensor.arange(0, inputIdx.shape[0]),
non_sequences=[W, h, inputIdx, outputIdx],
name='_loop_over_batch')[0]

return res1 + bias.take(outputIdx, axis=0)
return res1 + o


def sparse_block_outer_cpu(x, y, xIdx, yIdx, alpha=1.0):
def cpu_sparse_block_outer(o, x, y, xIdx, yIdx, alpha=1.0):
"""
Creates a graph for the sparse block outer operation. Check
SparseBlockOuter's docstring for information about the arguments.
Expand All @@ -182,28 +197,28 @@ def _loop_over_batch(b, x, y, xIdx, yIdx):
def _loop_over_outputIdx(i, b, x, y, xIdx, yIdx):

def _loop_over_inputIdx(j, i, b, x, y, xIdx, yIdx):
return T.outer(y[b, yIdx[b, i], :], x[b, xIdx[b, j], :])
return tensor.outer(y[b, yIdx[b, i], :], x[b, xIdx[b, j], :])

res3 = theano.scan(fn=_loop_over_inputIdx,
sequences=T.arange(0, xIdx.shape[1]),
sequences=tensor.arange(0, xIdx.shape[1]),
non_sequences=[i, b, x, y, xIdx, yIdx],
name='_loop_over_inputIdx')[0]

return res3

res2 = theano.scan(fn=_loop_over_outputIdx,
sequences=T.arange(0, yIdx.shape[1]),
sequences=tensor.arange(0, yIdx.shape[1]),
non_sequences=[b, x, y, xIdx, yIdx],
name='_loop_over_outputIdx')[0]

return res2

res1 = theano.scan(fn=_loop_over_batch,
sequences=T.arange(0, xIdx.shape[0]),
sequences=tensor.arange(0, xIdx.shape[0]),
non_sequences=[x, y, xIdx, yIdx],
name='_loop_over_batch')[0]

return alpha * res1.sum(axis=0)
return alpha * res1.sum(axis=0) + o


def sparse_block_dot(W, h, inputIdx, b, outputIdx, inplace=False):
Expand Down Expand Up @@ -239,4 +254,4 @@ def sparse_block_dot(W, h, inputIdx, b, outputIdx, inplace=False):
inputIdx = inputIdx.dimshuffle('x', 0)
outputIdx = outputIdx.dimshuffle('x', 0)
return SparseBlockGemv(inplace)(b.take(outputIdx, axis=0), W, h,
inputIdx, outputIdx)
inputIdx, outputIdx)
39 changes: 4 additions & 35 deletions theano/sandbox/cuda/blocksparse.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

import numpy
import theano
from theano import Apply, tensor, scalar
Expand All @@ -10,10 +12,8 @@
_logger = logging.getLogger('theano.sandbox.cuda.blocksparse')

if cuda_available:
from theano.sandbox.cuda import (basic_ops,
opt, GpuFromHost,
HostFromGpu, host_from_gpu,
GpuDimShuffle)

from theano.sandbox.cuda import basic_ops
from theano.sandbox.cuda.opt_util import alpha_merge, output_merge


Expand Down Expand Up @@ -635,37 +635,6 @@ def c_code_cache_version(self):
gpu_sparse_block_outer_inplace = GpuSparseBlockOuter(True)


if cuda_available:
@opt.register_opt()
@opt.local_optimizer([gpu_sparse_block_gemv], inplace=True)
def local_inplace_blocksparse_gemv(node):
if node.op == gpu_sparse_block_gemv:
return [gpu_sparse_block_gemv_inplace(*node.inputs)]

@opt.register_opt()
@opt.local_optimizer([gpu_sparse_block_outer], inplace=True)
def local_inplace_blocksparse_outer(node):
if node.op == gpu_sparse_block_outer:
return [gpu_sparse_block_outer_inplace(*node.inputs)]

# XXX: these optimisations were badly broken and now require a working
# beta param (could only be a 0/1 thing for outer_merge, but
# alpha_merge needs the full range).

# @opt.register_opt()
# @alpha_merge(GpuSparseBlockOuter, alpha_in=5, beta_in=?, nd=4)
# def local_merge_blocksparse_alpha(node, *inputs):
# """
# GpuElemwise{mul}(lr, GpuSparseBlockOuter) -> GpuSparseBlockOuter(..., alpha=lr)
# """
# return [gpu_sparse_block_outer(*inputs)]

# @opt.register_opt()
# @output_merge(GpuSparseBlockOuter, alpha_in=5, beta_in=? out_in=0, nd=4)
# def local_merge_blocksparse_output(node, *inputs):
# return [gpu_sparse_block_outer(*inputs)]


def sparse_block_dot_SS(W, h, inputIdx, b, outputIdx):
"""
Compute the dot product (plus bias) of the specified pieces of vectors
Expand Down

0 comments on commit 28c9a7e

Please sign in to comment.