Skip to content

Commit

Permalink
Support SperableConv2D and one hot crossentropy (#1944)
Browse files Browse the repository at this point in the history
* Support SperableConv2D and one hot crossentropy

* fix compile error

* add python wrapper

* add new ut

* add ut

* fix unit test

* fix ut

* refine depthwiseConv2D

* fix seperable conv2d unit tests

* support DepthWiseConv2dBackpropInput and DepthWiseConv2DBackpropFilter

* refine the code

* add more test

* fix failed test and style issue

* fix inconsistant API with keras1

* add serialization unit test

* fix failed unit test
  • Loading branch information
yiheng committed Dec 12, 2017
1 parent 5335799 commit 36cd62b
Show file tree
Hide file tree
Showing 25 changed files with 1,288 additions and 17 deletions.
9 changes: 9 additions & 0 deletions pyspark/bigdl/nn/criterion.py
Expand Up @@ -283,6 +283,15 @@ def __init__(self,
super(DistKLDivCriterion, self).__init__(None, bigdl_type,
size_average)

class CategoricalCrossEntropy(Criterion):
"""
This criterion is same with cross entropy criterion, except it takes a one-hot format target
tensor
>>> cce = CategoricalCrossEntropy()
creating: createCategoricalCrossEntropy
"""
def __init__(self, bigdl_type="float"):
super(CategoricalCrossEntropy, self).__init__(None, bigdl_type)

class HingeEmbeddingCriterion(Criterion):

Expand Down
70 changes: 70 additions & 0 deletions pyspark/bigdl/nn/layer.py
Expand Up @@ -3221,6 +3221,76 @@ def __init__(self,
upper,
inplace)

class SpatialSeperableConvolution(Layer):

'''
Separable convolutions consist in first performing a depthwise spatial convolution (which acts
on each input channel separately) followed by a pointwise convolution which mixes together the
resulting output channels. The depth_multiplier argument controls how many output channels are
generated per input channel in the depthwise step.
:param n_input_channel The number of expected input planes in the image given into forward()
:param n_output_channel The number of output planes the convolution layer will produce.
:param depth_multiplier how many internal channels are generated per input channel
:param kernel_w The kernel width of the convolution
:param kernel_h The kernel height of the convolution
:param stride_w The step of the convolution in the width dimension.
:param stride_h The step of the convolution in the height dimension
:param pad_w The additional zeros added per width to the input planes.
:param pad_h The additional zeros added per height to the input planes.
:param with_bias: the optional initial value for if need bias
:param data_format: a string value of "NHWC" or "NCHW" to specify the input data format of this layer. In "NHWC" format
data is stored in the order of [batch_size, height, width, channels], in "NCHW" format data is stored
in the order of [batch_size, channels, height, width].
:param wRegularizer: instance of [[Regularizer]](eg. L1 or L2 regularization), applied to the depth weights matrices.
:param bRegularizer: instance of [[Regularizer]]applied to the pointwise bias.
:param pRegularizer: instance of [[Regularizer]]applied to the pointwise weights.
>>> conv = SpatialSeperableConvolution(6, 12, 1, 5, 5)
creating: createSpatialSeperableConvolution
>>> conv.setWRegularizer(L1Regularizer(0.5))
creating: createL1Regularizer
>>> conv.setBRegularizer(L1Regularizer(0.5))
creating: createL1Regularizer
>>> conv = SpatialSeperableConvolution(6, 12, 1, 5, 5, 1, 1, 0, 0, True, "NCHW", L1Regularizer(0.5), L1Regularizer(0.5), L1Regularizer(0.5))
creating: createL1Regularizer
creating: createL1Regularizer
creating: createL1Regularizer
creating: createSpatialSeperableConvolution
'''

def __init__(self,
n_input_channel,
n_output_channel,
depth_multiplier,
kernel_w,
kernel_h,
stride_w=1,
stride_h=1,
pad_w=0,
pad_h=0,
with_bias=True,
data_format="NCHW",
w_regularizer=None,
b_regularizer=None,
p_regularizer=None,
bigdl_type="float"):
super(SpatialSeperableConvolution, self).__init__(None, bigdl_type,
n_input_channel,
n_output_channel,
depth_multiplier,
kernel_w,
kernel_h,
stride_w,
stride_h,
pad_w,
pad_h,
with_bias,
data_format,
w_regularizer,
b_regularizer,
p_regularizer,
)

class ReLU6(Layer):

Expand Down
@@ -0,0 +1,60 @@
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.nn.abstractnn.AbstractCriterion
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.reflect.ClassTag

/**
* This is same with cross entropy criterion, except the target tensor is a one-hot tensor
* @tparam T The numeric type in the criterion, usually which are [[Float]] or [[Double]]
*/
class CategoricalCrossEntropy[T: ClassTag]()(implicit ev: TensorNumeric[T])
extends AbstractCriterion[Tensor[T], Tensor[T], T]{

private val crxEntropy = CrossEntropyCriterion[T]()

import CategoricalCrossEntropy._

private val buffer = Tensor[T]()

override def updateOutput(input: Tensor[T], target: Tensor[T]): T = {
buffer.resizeAs(input)
crxEntropy.forward(buffer.log(input), convertTensor(target))
}

override def backward(input: Tensor[T], target: Tensor[T]): Tensor[T] = {
gradInput = crxEntropy.backward(buffer, convertTensor(target))
gradInput.div(input)
}

override def updateGradInput(input: Tensor[T], target: Tensor[T]): Tensor[T] = {
gradInput = crxEntropy.updateGradInput(buffer, convertTensor(target))
gradInput.div(input)
}
}

object CategoricalCrossEntropy {
def apply[T: ClassTag]()(implicit ev: TensorNumeric[T]): CategoricalCrossEntropy[T] =
new CategoricalCrossEntropy()

private def convertTensor[T](tensor: Tensor[T]): Tensor[T] = {
tensor.max(2)._2
}
}
Expand Up @@ -113,7 +113,7 @@ class ClassNLLCriterion[@specialized(Float, Double) T: ClassTag]
target.squeeze()
require(target.dim() == 1,
"ClassNLLCriterion: illegal target! Target should be 1D tensor after squeeze," +
s"but target's size is: ${ target.size() }, please check your data.")
s"but target's dimension is: ${ target.dim() }, please check your data.")

total_weight = ev.fromType[Int](0)
output = ev.fromType[Int](0)
Expand Down

0 comments on commit 36cd62b

Please sign in to comment.