apache · piiswrong · Apr 27, 2017 · Apr 27, 2017 · Apr 27, 2017
diff --git a/src/operator/instance_norm-inl.h b/src/operator/instance_norm-inl.h
@@ -30,7 +30,7 @@ struct InstanceNormParam : public dmlc::Parameter<InstanceNormParam> {
   float eps;
   DMLC_DECLARE_PARAMETER(InstanceNormParam) {
     DMLC_DECLARE_FIELD(eps).set_default(1e-3f).describe(
-        "Epsilon to prevent division by 0.");
+        "An `epsilon` parameter to prevent division by 0.");
   }
 };  // struct InstanceNormParam
 

diff --git a/src/operator/instance_norm.cc b/src/operator/instance_norm.cc
@@ -28,27 +28,55 @@ Operator* InstanceNormProp::CreateOperatorEx(Context ctx,
 DMLC_REGISTER_PARAMETER(InstanceNormParam);
 
 MXNET_REGISTER_OP_PROPERTY(InstanceNorm, InstanceNormProp)
-    .add_argument("data", "NDArray-or-Symbol",
-                  "A n-dimensional tensor (n > 2) of the form [batch, "
-                  "channel, spatial_dim1, spatial_dim2, ...].")
-    .add_argument("gamma", "NDArray-or-Symbol",
-                  "A vector of length \'channel\', which multiplies the "
-                  "normalized input.")
-    .add_argument("beta", "NDArray-or-Symbol",
-                  "A vector of length \'channel\', which is added to the "
-                  "product of the normalized input and the weight.")
-    .add_arguments(InstanceNormParam::__FIELDS__())
-    .describe(
-        "An operator taking in a n-dimensional input tensor (n > 2), and "
-        "normalizing the input by subtracting the mean and variance calculated "
-        "over the spatial dimensions. This is an implemention of "
-        "the operator described in \"Instance Normalization: The "
-        "Missing Ingredient for Fast Stylization\", D. Ulyanov, A. Vedaldi, V. "
-        "Lempitsky, 2016 (arXiv:1607.08022v2). This layer is similar to batch "
-        "normalization, with two differences: first, the normalization is "
-        "carried out per example (\'instance\'), not over a batch. Second, the "
-        "same normalization is applied both at test and train time. This "
-        "operation is also known as \'contrast normalization\'.");
+.add_argument("data", "NDArray-or-Symbol",
+              "An n-dimensional input array (n > 2) of the form [batch, "
+              "channel, spatial_dim1, spatial_dim2, ...].")
+.add_argument("gamma", "NDArray-or-Symbol",
+              "A vector of length \'channel\', which multiplies the "
+              "normalized input.")
+.add_argument("beta", "NDArray-or-Symbol",
+              "A vector of length \'channel\', which is added to the "
+              "product of the normalized input and the weight.")
+.add_arguments(InstanceNormParam::__FIELDS__())
+.describe(R"code(Applies instance normalization to the n-dimensional input array.
 
+This operator takes an n-dimensional input array where (n>2) and normalizes
+the input using the following formula:
+
+.. math::
+
+  out = \frac{x - mean[data]}{ \sqrt{Var[data]} + \epsilon} * gamma + beta
+
+This layer is similar to batch normalization layer (`BatchNorm`)
+with two differences: first, the normalization is
+carried out per example (instance), not over a batch. Second, the
+same normalization is applied both at test and train time. This
+operation is also known as `contrast normalization`.
+
+If the input data is of shape [batch, channel, spacial_dim1, spacial_dim2, ...],
+`gamma` and `beta` parameters must be vectors of shape [channel].
+
+This implementation is based on paper:
+
+.. [1] Instance Normalization: The Missing Ingredient for Fast Stylization,
+   D. Ulyanov, A. Vedaldi, V. Lempitsky, 2016 (arXiv:1607.08022v2).
+
+Examples::
+
+  // Input of shape (2,1,2)
+  x = [[[ 1.1,  2.2]],
+       [[ 3.3,  4.4]]]
+
+  // gamma parameter of length 1
+  gamma = [1.5]
+
+  // beta parameter of length 1
+  beta = [0.5]
+
+  // Instance normalization is calculated with the above formula
+  InstanceNorm(x,gamma,beta) = [[[-0.997527  ,  1.99752665]],
+                                [[-0.99752653,  1.99752724]]]
+
+)code" ADD_FILELINE);
 }  // namespace op
 }  // namespace mxnet