diff --git a/theano/tensor/signal/downsample.py b/theano/tensor/signal/downsample.py index 2081d4e7c64..87d2cd1eb7e 100644 --- a/theano/tensor/signal/downsample.py +++ b/theano/tensor/signal/downsample.py @@ -68,7 +68,7 @@ class DownsampleFactorMax(Op): """ @staticmethod - def out_shape(imgshape, ds, st, ignore_border=False): + def out_shape(imgshape, ds, ignore_border=False, st=None): """Return the shape of the output from this op, for input of given shape and flags. @@ -96,18 +96,43 @@ def out_shape(imgshape, ds, st, ignore_border=False): if len(imgshape) < 2: raise TypeError('imgshape must have at least two elements ' '(rows, cols)') + + if st == None: + st = ds r, c = imgshape[-2:] - rval = list(imgshape[:-2]) + [(r - ds[0]) // st[0] + 1, (c - ds[1]) // st[1] + 1] + if st[0] >= ds[0]: + nr = r // st[0] + else: + nr = (r - ds[0]) // st[0] + 1 + + if st[1] >= ds[1]: + nc = c // st[1] + else: + nc = (c - ds[1]) // st[1] + 1 + rval = list(imgshape[:-2]) + [nr, nc] if not ignore_border: - if isinstance(r, theano.Variable): - rval[-2] = tensor.switch((r - ds[0]) % st[0], rval[-2] + 1, rval[-2]) - elif (r - ds[0]) % st[0]: - rval[-2] += 1 - if isinstance(c, theano.Variable): - rval[-1] = tensor.switch((c - ds[1]) % st[1], rval[-1] + 1, rval[-1]) - elif (c - ds[1]) % st[1]: - rval[-1] += 1 + if st[0] >= ds[0]: + if isinstance(r, theano.Variable): + rval[-2] = tensor.switch(r % st[0], rval[-2] + 1, rval[-2]) + elif r % ds[0]: + rval[-2] += 1 + else: + if isinstance(r, theano.Variable): + rval[-2] = tensor.switch((r - ds[0]) % st[0], rval[-2] + 1, rval[-2]) + elif (r - ds[0]) % st[0]: + rval[-2] += 1 + + if st[1] >= ds[1]: + if isinstance(c, theano.Variable): + rval[-1] = tensor.switch(c % st[1], rval[-1] + 1, rval[-1]) + elif c % ds[1]: + rval[-1] += 1 + else: + if isinstance(c, theano.Variable): + rval[-1] = tensor.switch((c - ds[1]) % st[1], rval[-1] + 1, rval[-1]) + elif (c - ds[1]) % st[1]: + rval[-1] += 1 return rval def __init__(self, ds, ignore_border=False, st=None): @@ -148,7 +173,7 @@ def __hash__(self): return hash(type(self)) ^ hash(self.ds) ^ hash(self.st) ^ hash(self.ignore_border) def __str__(self): - return '%s{%s,%s}' % (self.__class__.__name__, + return '%s{%s,%s,%s}' % (self.__class__.__name__, self.ds, self.st, self.ignore_border) def make_node(self, x): @@ -165,10 +190,10 @@ def perform(self, node, inp, out): if len(x.shape) != 4: raise NotImplementedError( 'DownsampleFactorMax requires 4D input for now') - z_shape = self.out_shape(x.shape, self.ds, self.st, self.ignore_border) + z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st) if (z[0] is None) or (z[0].shape != z_shape): - z[0] = numpy.zeros(self.out_shape(x.shape, self.ds, self.st, - self.ignore_border)) + z[0] = numpy.zeros(self.out_shape(x.shape, self.ds, + self.ignore_border, self.st)) z[0] = theano._asarray(z[0], dtype=x.dtype) zz = z[0] @@ -182,32 +207,36 @@ def perform(self, node, inp, out): img_cols = x.shape[-1] if self.ignore_border: - x_usable2 = (x.shape[2] - ds0) // st0 * st0 + ds0 + if st0 >= ds0: + x_usable2 = (x.shape[2] // ds0 * ds0) + else: + x_usable2 = (x.shape[2] - ds0) // st0 * st0 + ds0 else: x_usable2 = x.shape[2] + if self.ignore_border: - x_usable3 = (x.shape[3] - ds1) // st1 * st1 + ds1 + if st1 >= ds1: + x_usable3 = (x.shape[3] // ds1 * ds1) + else: + x_usable3 = (x.shape[3] - ds1) // st1 * st1 + ds1 else: x_usable3 = x.shape[3] + for n in xrange(x.shape[0]): for k in xrange(x.shape[1]): for r in xrange(pr): row_st = r * st0 + row_end = __builtin__.min(row_st + ds0, img_rows) for c in xrange(pc): col_st = c * st1 - for i in xrange(ds0): - row_ind = row_st + i - if row_ind >= img_rows: - continue - for j in xrange(ds1): - col_ind = col_st + j - if col_ind >= img_cols: - continue + col_end = __builtin__.min(col_st + ds1, img_cols) + for row_ind in xrange(row_st, row_end): + for col_ind in xrange(col_st, col_end): zz[n, k, r, c] = __builtin__.max(zz[n, k, r, c], x[n, k, row_ind, col_ind]) def infer_shape(self, node, in_shapes): - shp = self.out_shape(in_shapes[0], self.ds, self.st, self.ignore_border) + shp = self.out_shape(in_shapes[0], self.ds, self.ignore_border, self.st) return [shp] def grad(self, inp, grads): @@ -290,7 +319,7 @@ def c_code_tmp(self, node, name, inp, out, sub): } """ % locals() - def c_code_cache_version_tmp(self): + def c_code_cache_version(self): return (0, 1) diff --git a/theano/tensor/signal/tests/test_downsample.py b/theano/tensor/signal/tests/test_downsample.py index 07a7b4ac96c..38eb2957542 100644 --- a/theano/tensor/signal/tests/test_downsample.py +++ b/theano/tensor/signal/tests/test_downsample.py @@ -1,4 +1,5 @@ import unittest +import __builtin__ import numpy import theano.tensor as tensor from theano.tests import unittest_tools as utt @@ -37,6 +38,60 @@ def numpy_max_pool_2d(input, ds, ignore_border=False): output_val[k][i, j] = numpy.max(patch) return output_val + @staticmethod + def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None): + '''Helper function, implementing max_pool_2d in pure numpy + this function provides st input to indicate the stide size + for the pooling regions. if not indicated, st == sd.''' + if len(input.shape) < 2: + raise NotImplementedError('input should have at least 2 dim,' + ' shape is %s'\ + % str(input.shape)) + + if st == None: + st = ds + xi = 0 + yi = 0 + if not ignore_border: + if st[0] >= ds[0]: + if input.shape[-2] % st[0]: + xi += 1 + else: + if (input.shape[-2] - ds[0]) % st[0]: + xi += 1 + if st[1] >= ds[1]: + if input.shape[-1] % st[1]: + yi += 1 + else: + if (input.shape[-1] % - ds[1]) % st[1]: + yi += 1 + out_shp = list(input.shape[:-2]) + if st[0] >= ds[0]: + out_shp.append(input.shape[-2] / ds[0] + xi) + else: + out_shp.append((input.shape[-2] - ds[0]) / st[0] + 1 + xi) + + if st[1] >= ds[1]: + out_shp.append(input.shape[-1] / ds[1] + yi) + else: + out_shp.append((input.shape[-1] - ds[1]) / st[1] + 1 + yi) + + output_val = numpy.zeros(out_shp) + + img_rows = input.shape[-2] + img_cols = input.shape[-1] + + for k in numpy.ndindex(*input.shape[:-2]): + for i in range(output_val.shape[-2]): + ii_st = i * ds[0] + ii_end = __builtin__.min(ii_st + ds[0], img_rows) + for j in range(output_val.shape[-1]): + jj_st = j * ds[1] + jj_end = __builtin__.min(jj_st + ds[1], img_cols) + patch = input[k][ii_st:ii_end, jj_st:jj_end] + output_val[k][i, j] = numpy.max(patch) + return output_val + def test_DownsampleFactorMax(self): rng = numpy.random.RandomState(utt.fetch_seed()) # generate random images