Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
5 contributors

Users who have contributed to this file

@znation @1duo @TobyRoseman @califrench @brettkoonce
5362 lines (4625 sloc) 138 KB
// Copyright (c) 2017-2019, Apple Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-3-clause license that can be
// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
/**
* A neural network is defined through a collection of layers
* and represents a directed acyclic graph (DAG).
* Each layer has a name, a layer type,
* a list of input names, a list of output names,
* and a collection of parameters specific to the layer type.
*
* The graph structure and connectivity of the neural network
* is inferred from the input and output names.
* A neural network starts with the layer
* whose input name is equal to the value specified in
* ``Model.description.input.name``,
* and ends with the layer
* whose output name is equal to the value specified in
* ``Model.description.output.name``.
* Layers must have unique input and output names,
* and a layer may not have input or output names that
* refer to layers that are not yet defined.
*
* For CoreML specification version <=3,
* all inputs are mapped to static rank 5 tensors, with axis notations
* [Sequence, Batch, Channel, Height, Width].
*
* From specification version 4 onwards (iOS >= 13, macOS >= 10.15), more options are available
* (see enums ``NeuralNetworkMultiArrayShapeMapping``, ``NeuralNetworkImageShapeMapping``)
* to map inputs to generic N-Dimensional (or N rank) tensors, where N >= 1.
*
* Each layer type may have specific constraints on the ranks of its inputs and outputs.
*
* Some of the layers (such as softmax, reduce, etc) have parameters that have been described in
* terms of notational axis "Channel", "Height", "Width" or "Sequence". They can be re-interpreted easily in
* the general ND setting by using the following rule:
* "width" is same as axis = -1 (i.e. the last axis from the end)
* "height" is same as axis = -2 (i.e. the second last axis from the end)
* "channel" is same as axis = -3 (i.e. the third last axis from the end)
* "sequence" is same as axis = -5 (i.e. the fifth last axis from the end)
*
* Several layers are available in 3 different variations, with the names ending
* in identifiers: ``like``, ``static`` and ``dynamic``. For instance, ``FillLike``,
* ``FillStatic`` and ``FillDynamic``. The ``static`` variation generally will have
* a property corresponding to the shape of the output. For instance, if the
* output of the ``FillStatic`` layer is desired to be of shape (10, 4), the
* property ``targetShape`` will have to be set to [10, 4]. In the ``dynamic`` case,
* the shape is an input, hence it can be changed at runtime. For instance, for
* a ``FillDynamic`` layer, the input would have to be an array containing the
* values 10 and 4, if the desired output is of shape (10, 4). Whereas in the
* ``like`` case, the additional input's shape is used as the output shape, ignoring
* its values. For instance, for a ``FillLike`` layer, for an input with shape
* (10, 4), the output generated will also be of shape (10, 4), values of the
* input will be ignored.
*/
syntax = "proto3";
option optimize_for = LITE_RUNTIME;
import public "DataStructures.proto";
import public "Parameters.proto";
package CoreML.Specification;
enum NeuralNetworkMultiArrayShapeMapping {
/*
* Describes how the MultiArray shape for the inputs,
* provided in Features Types proto via model description,
* is mapped to construct tensors that are fed into the Neural Network layers.
*/
/*
* Default legacy value. Only supported for CoreML Specification version <= 3.
*
* The default legacy shape mapping resolves all input shapes to a rank 5 equivalent
* with axis notation of [Seq, Batch, Channel, Height, Width].
*
* When this enum value is selected,
* the repeated shape field in the message "ArrayFeatureType" in feature types proto,
* must be either length 1 or length 3.
*
* The following rule is used to map the values in the shape field to the actual tensor shape:
* rank 1 shape is mapped to shape [1,1,C,1,1]
* rank 3 shape is mapped to shape [1,1,C,H,W]
* At runtime, the first two dimensions (Seq or Batch) can be presented as well, with non-1 values.
*
* It is invalid to use this enum value if any of the layers added
* Specification version 4 (iOS >= 13, macOS >= 10.15) onwards are used in the network.
* Validator will raise an error in that case.
*/
RANK5_ARRAY_MAPPING = 0;
/*
* The exact shape and rank (i.e. number of dimensions in the shape) of the input,
* as specified in the message "ArrayFeatureType", is passed through to the layers.
* Supported only for Specification version >= 4 (iOS >= 13, macOS >= 10.15).
*/
EXACT_ARRAY_MAPPING = 1;
}
enum NeuralNetworkImageShapeMapping {
/*
* Describes how the shape of the input tensors is constructed from image inputs.
*/
/*
* In this case, image input is mapped to a rank 5 tensor.
* For Color images, input tensor is shaped as [1,1,3,H,W].
* For Gray images, input tensor is shaped as [1,1,1,H,W].
*/
RANK5_IMAGE_MAPPING = 0;
/*
* For Color images, input tensor is shaped as [1,3,H,W].
* For Gray images, input tensor is shaped as [1,1,H,W].
* Supported only for Specification version >= 4 (iOS >= 13, macOS >= 10.15).
*/
RANK4_IMAGE_MAPPING = 1;
}
/**
A neural network.
*/
message NeuralNetwork {
repeated NeuralNetworkLayer layers = 1;
repeated NeuralNetworkPreprocessing preprocessing = 2;
// use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs
NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
// use this enum value to determine the input tensor shapes to the neural network, for image inputs
NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
NetworkUpdateParameters updateParams = 10;
}
/// Preprocessing
/// -------------
/**
* A neural network preprocessor that
* performs a scalar multiplication of an image
* followed by addition of scalar biases to the channels.
*
* Input: X
* An image in BGR or RGB format with shape ``[3, H, W]``
* or in grayscale format with shape ``[1, H, W]``.
* Output: Y
* An image with format and shape corresponding to the input.
*
* If the input image is in BGR format:
*
* .. code::
*
* Y[0, :, :] = channelScale * X[0, :, :] + blueBias
* Y[1, :, :] = channelScale * X[1, :, :] + greenBias
* Y[2, :, :] = channelScale * X[2, :, :] + redBias
*
* If the input image is in RGB format:
*
* .. code::
*
* Y[0, :, :] = channelScale * X[0, :, :] + redBias
* Y[1, :, :] = channelScale * X[1, :, :] + greenBias
* Y[2, :, :] = channelScale * X[2, :, :] + blueBias
*
* If the input image is in grayscale format:
*
* .. code::
*
* Y[0, :, :] = channelScale * X[0, :, :] + grayBias
*/
message NeuralNetworkImageScaler {
float channelScale = 10; ///Scalar to be multiplied.
float blueBias = 20; ///Scalar blue bias to be added.
float greenBias = 21; ///Scalar green bias to be added.
float redBias = 22; ///Scalar red bias to be added.
float grayBias = 30; ///Scalar bias to be added for grayscale images.
}
/**
* A neural network preprocessor that
* subtracts the provided mean image from the input image.
* The mean image is subtracted from the input named
* ``NeuralNetworkPreprocessing.featureName``.
*/
message NeuralNetworkMeanImage {
/**
* Mean image stored as a flattened array of floats,
* representing shape [Channel,Height,Width].
*/
repeated float meanImage = 1;
}
/// Preprocessing parameters for image inputs.
message NeuralNetworkPreprocessing {
string featureName = 1; /// must be equal to the input name to which the preprocessing is applied
oneof preprocessor {
NeuralNetworkImageScaler scaler = 10;
NeuralNetworkMeanImage meanImage = 11;
}
}
/// Activation Functions
/// --------------------
/**
* A rectified linear unit (ReLU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \text{max}(0, x)
*/
message ActivationReLU {
}
/**
* A leaky rectified linear unit (ReLU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \begin{cases}
* x & \text{if } x \geq 0 \\
* \alpha x & \text{if } x < 0
* \end{cases}
*/
message ActivationLeakyReLU {
float alpha = 1; //negative slope value for leakyReLU
}
/**
* A hyperbolic tangent activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \dfrac{1 - e^{-2x}}{1 + e^{-2x}}
*/
message ActivationTanh {
}
/**
* A scaled hyperbolic tangent activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \alpha \tanh(\beta x)
*/
message ActivationScaledTanh {
float alpha = 1;
float beta = 2;
}
/**
* A sigmoid activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \dfrac{1}{1 + e^{-x}}
*/
message ActivationSigmoid {
}
/**
* A linear activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \alpha x + \beta
*/
message ActivationLinear {
float alpha = 1;
float beta = 2;
}
/**
* A hard sigmoid activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \text{min}(\text{max}(\alpha x + \beta, 0), 1)
*/
message ActivationSigmoidHard {
float alpha = 1;
float beta = 2;
}
/**
* A parameterized rectified linear unit (PReLU) activation function.
* Input must be at least rank 3. Axis = -3 is denoted by "C", or channels.
* "alpha" parameter can be a vector of length C.
*
* This function has the following formula:
*
* .. math::
* f(x_i) = \begin{cases}
* x_i & \text{if } x_i \geq 0 \\
* \alpha_i x_i & \text{if } x_i < 0
* \end{cases} \;,\;i=1,...,C
*/
message ActivationPReLU {
// parameter of length C or 1.
// If length is 1, same value is used for all channels
WeightParams alpha = 1;
}
/**
* An exponential linear unit (ELU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \begin{cases}
* x & \text{if } x \geq 0 \\
* \alpha (e^x - 1) & \text{if } x < 0
* \end{cases}
*/
message ActivationELU {
float alpha = 1;
}
/**
* A thresholded rectified linear unit (ReLU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \begin{cases}
* x & \text{if } x \geq \alpha \\
* 0 & \text{if } x < \alpha
* \end{cases}
*/
message ActivationThresholdedReLU {
float alpha = 1;
}
/**
* A softsign activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \dfrac{x}{1 + |x|}
*/
message ActivationSoftsign {
}
/**
* A softplus activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \text{log}(1 + e^x)
*/
message ActivationSoftplus {
}
/**
* A parametric softplus activation function.
* Input must be at least rank 3. axis = -3 is denoted by "C", or channels.
* "alpha"/"beta" parameter can be a vector of length C.
*
* This function has the following formula:
*
* .. math::
* f(x_i) = \alpha_i \text{log}(1 + e^{\beta_i x_i}) \;,\;i=1,...,C
*/
message ActivationParametricSoftplus {
// If length is 1, same value is used for all channels
WeightParams alpha = 1; //parameter of length C or 1
WeightParams beta = 2; //parameter of length C or 1
}
message ActivationParams {
oneof NonlinearityType {
ActivationLinear linear = 5;
ActivationReLU ReLU = 10;
ActivationLeakyReLU leakyReLU = 15;
ActivationThresholdedReLU thresholdedReLU = 20;
ActivationPReLU PReLU = 25;
ActivationTanh tanh = 30;
ActivationScaledTanh scaledTanh = 31;
ActivationSigmoid sigmoid = 40;
ActivationSigmoidHard sigmoidHard = 41;
ActivationELU ELU = 50;
ActivationSoftsign softsign = 60;
ActivationSoftplus softplus = 70;
ActivationParametricSoftplus parametricSoftplus = 71;
}
}
/**
* Representation of the intermediate tensors
*/
message Tensor {
// Number of dimensions in the tensor shape
uint32 rank = 1;
// actual value of the tensor shape.
// must be of length "rank". Can contain -1s for unknown dimensions.
repeated int64 dimValue = 2;
}
/**
* A single neural network layer.
*/
message NeuralNetworkLayer {
string name = 1; //descriptive name of the layer
repeated string input = 2;
repeated string output = 3;
repeated Tensor inputTensor = 4; // must be the same length as the "input" field
repeated Tensor outputTensor = 5; // must be the same length as the "output" field
// Must be set to true to mark the layer as updatable.
// If true, the weightParams in the layer's properties must also be set to updatable
// If false, the value of the isUpdatable parameter within the layer's weights are ignored
bool isUpdatable = 10;
oneof layer {
// Start at 100 here
ConvolutionLayerParams convolution = 100;
PoolingLayerParams pooling = 120;
ActivationParams activation = 130;
InnerProductLayerParams innerProduct = 140;
EmbeddingLayerParams embedding = 150;
// Normalization related layers
BatchnormLayerParams batchnorm = 160;
MeanVarianceNormalizeLayerParams mvn = 165;
L2NormalizeLayerParams l2normalize = 170;
SoftmaxLayerParams softmax = 175;
LRNLayerParams lrn = 180;
CropLayerParams crop = 190;
PaddingLayerParams padding = 200;
UpsampleLayerParams upsample = 210;
ResizeBilinearLayerParams resizeBilinear = 211;
CropResizeLayerParams cropResize = 212;
UnaryFunctionLayerParams unary = 220;
// Elementwise operations
AddLayerParams add = 230;
MultiplyLayerParams multiply = 231;
AverageLayerParams average = 240;
ScaleLayerParams scale = 245;
BiasLayerParams bias = 250;
MaxLayerParams max = 260;
MinLayerParams min = 261;
DotProductLayerParams dot = 270;
ReduceLayerParams reduce = 280;
LoadConstantLayerParams loadConstant = 290;
// Data reorganization
ReshapeLayerParams reshape = 300;
FlattenLayerParams flatten = 301;
PermuteLayerParams permute = 310;
ConcatLayerParams concat = 320;
SplitLayerParams split = 330;
SequenceRepeatLayerParams sequenceRepeat = 340;
ReorganizeDataLayerParams reorganizeData = 345;
SliceLayerParams slice = 350;
// Recurrent Layers
SimpleRecurrentLayerParams simpleRecurrent = 400;
GRULayerParams gru = 410;
UniDirectionalLSTMLayerParams uniDirectionalLSTM = 420;
BiDirectionalLSTMLayerParams biDirectionalLSTM = 430;
// Custom (user-implemented) Layer
CustomLayerParams custom = 500;
// Following layers are available only after CoreML Specification
// version >= 4 (iOS >= 13, macOS >= 10.15)
// Control Flow related Layers
CopyLayerParams copy = 600;
BranchLayerParams branch = 605;
LoopLayerParams loop = 615;
LoopBreakLayerParams loopBreak = 620;
LoopContinueLayerParams loopContinue = 625;
RangeStaticLayerParams rangeStatic = 635;
RangeDynamicLayerParams rangeDynamic = 640;
// Elementwise Unary Layers
ClipLayerParams clip = 660;
CeilLayerParams ceil = 665;
FloorLayerParams floor = 670;
SignLayerParams sign = 680;
RoundLayerParams round = 685;
Exp2LayerParams exp2 = 700;
SinLayerParams sin = 710;
CosLayerParams cos = 715;
TanLayerParams tan = 720;
AsinLayerParams asin = 730;
AcosLayerParams acos = 735;
AtanLayerParams atan = 740;
SinhLayerParams sinh = 750;
CoshLayerParams cosh = 755;
TanhLayerParams tanh = 760;
AsinhLayerParams asinh = 770;
AcoshLayerParams acosh = 775;
AtanhLayerParams atanh = 780;
ErfLayerParams erf = 790;
GeluLayerParams gelu = 795;
// Elementwise Binary with Broadcasting Support
EqualLayerParams equal = 815;
NotEqualLayerParams notEqual = 820;
LessThanLayerParams lessThan = 825;
LessEqualLayerParams lessEqual = 827;
GreaterThanLayerParams greaterThan = 830;
GreaterEqualLayerParams greaterEqual = 832;
LogicalOrLayerParams logicalOr = 840;
LogicalXorLayerParams logicalXor = 845;
LogicalNotLayerParams logicalNot = 850;
LogicalAndLayerParams logicalAnd = 855;
ModBroadcastableLayerParams modBroadcastable = 865;
MinBroadcastableLayerParams minBroadcastable = 870;
MaxBroadcastableLayerParams maxBroadcastable = 875;
AddBroadcastableLayerParams addBroadcastable = 880;
PowBroadcastableLayerParams powBroadcastable = 885;
DivideBroadcastableLayerParams divideBroadcastable = 890;
FloorDivBroadcastableLayerParams floorDivBroadcastable = 895;
MultiplyBroadcastableLayerParams multiplyBroadcastable = 900;
SubtractBroadcastableLayerParams subtractBroadcastable = 905;
// Tensor Manipulations
TileLayerParams tile = 920;
StackLayerParams stack = 925;
GatherLayerParams gather = 930;
ScatterLayerParams scatter = 935;
GatherNDLayerParams gatherND = 940;
ScatterNDLayerParams scatterND = 945;
SoftmaxNDLayerParams softmaxND = 950;
GatherAlongAxisLayerParams gatherAlongAxis = 952;
ScatterAlongAxisLayerParams scatterAlongAxis = 954;
ReverseLayerParams reverse = 960;
ReverseSeqLayerParams reverseSeq = 965;
SplitNDLayerParams splitND = 975;
ConcatNDLayerParams concatND = 980;
TransposeLayerParams transpose = 985;
SliceStaticLayerParams sliceStatic = 995;
SliceDynamicLayerParams sliceDynamic = 1000;
SlidingWindowsLayerParams slidingWindows = 1005;
TopKLayerParams topK = 1015;
ArgMinLayerParams argMin = 1020;
ArgMaxLayerParams argMax = 1025;
EmbeddingNDLayerParams embeddingND = 1040;
BatchedMatMulLayerParams batchedMatmul = 1045;
// Tensor Allocation / Reshape sort of operations
GetShapeLayerParams getShape = 1065;
LoadConstantNDLayerParams loadConstantND = 1070;
FillLikeLayerParams fillLike = 1080;
FillStaticLayerParams fillStatic = 1085;
FillDynamicLayerParams fillDynamic = 1090;
BroadcastToLikeLayerParams broadcastToLike = 1100;
BroadcastToStaticLayerParams broadcastToStatic = 1105;
BroadcastToDynamicLayerParams broadcastToDynamic = 1110;
SqueezeLayerParams squeeze = 1120;
ExpandDimsLayerParams expandDims = 1125;
FlattenTo2DLayerParams flattenTo2D = 1130;
ReshapeLikeLayerParams reshapeLike = 1135;
ReshapeStaticLayerParams reshapeStatic = 1140;
ReshapeDynamicLayerParams reshapeDynamic = 1145;
RankPreservingReshapeLayerParams rankPreservingReshape = 1150;
// Random Distributions
RandomNormalLikeLayerParams randomNormalLike = 1170;
RandomNormalStaticLayerParams randomNormalStatic = 1175;
RandomNormalDynamicLayerParams randomNormalDynamic = 1180;
RandomUniformLikeLayerParams randomUniformLike = 1190;
RandomUniformStaticLayerParams randomUniformStatic = 1195;
RandomUniformDynamicLayerParams randomUniformDynamic = 1200;
RandomBernoulliLikeLayerParams randomBernoulliLike = 1210;
RandomBernoulliStaticLayerParams randomBernoulliStatic = 1215;
RandomBernoulliDynamicLayerParams randomBernoulliDynamic = 1220;
CategoricalDistributionLayerParams categoricalDistribution = 1230;
// Reduction related Layers:
ReduceL1LayerParams reduceL1 = 1250;
ReduceL2LayerParams reduceL2 = 1255;
ReduceMaxLayerParams reduceMax = 1260;
ReduceMinLayerParams reduceMin = 1265;
ReduceSumLayerParams reduceSum = 1270;
ReduceProdLayerParams reduceProd = 1275;
ReduceMeanLayerParams reduceMean = 1280;
ReduceLogSumLayerParams reduceLogSum = 1285;
ReduceSumSquareLayerParams reduceSumSquare = 1290;
ReduceLogSumExpLayerParams reduceLogSumExp = 1295;
// Masking / Selection Layers
WhereNonZeroLayerParams whereNonZero = 1313;
MatrixBandPartLayerParams matrixBandPart = 1315;
LowerTriangularLayerParams lowerTriangular = 1320;
UpperTriangularLayerParams upperTriangular = 1325;
WhereBroadcastableLayerParams whereBroadcastable = 1330;
// Normalization Layers
LayerNormalizationLayerParams layerNormalization = 1350;
}
}
/**
* Branching Layer
*
* A layer that providies the functionality of branching or an If-Else block.
*
* Must have 1 input. There are no outputs as the execution is transferred to either the
* if or the else branch based on the value of the input.
*
* Input is the condition predicate. Must be a scalar (length 1 tensor).
*
*/
message BranchLayerParams {
/**
* execute this graph if the absolute value of the input Tensor is greater than 1e-6
* This must be present.
*/
NeuralNetwork ifBranch = 1;
/**
* execute this graph if the absolute value of the input Tensor is less than 1e-6
* This is optional.
*/
NeuralNetwork elseBranch = 2;
}
/**
* Loop Layer
*
* A layer that providies the functionality of a "for" loop or a "while" loop.
*
* There are either no inputs or 1 input. When an input is present, it corresponds to the maximum loop count,
* in that case the value of the "maxLoopIterations" field is ignored. Input must be a scalar.
* (For description below, maxLoopIterations is assumed to be the value of the input, when its present)
*
* No outputs are produced. Blobs produced by the condition or the body network are visible in the scope of the overall network.
*
* "conditionNetwork" must produce a tensor with the name specified in the "conditionVar" field.
*
* There are 3 possible cases for determining the termination condition:
*
* Case 1:
*
* If there is no "conditionNetwork", in this case the layer corresponds to a pure for loop, which is run "maxLoopIterations" number of times.
* Equivalent pseudo-code:
*
* for loopIterator = 0 : maxLoopIterations
* bodyNetwork()
*
*
* Case 2:
*
* "conditionNetwork" is present, and "maxLoopIterations" is 0 and there is no input,
* in this case the layer corresponds to a while loop. Equivalent pseudo-code:
*
* conditionVar = conditionNetwork()
* while conditionVar:
* bodyNetwork()
* conditionVar = conditionNetwork()
*
*
* Case 3:
*
* "conditionNetwork" is provided, and "maxLoopIterations" is positive or there is an input,
* in this case the layer corresponds to a while loop with a joint condition. Equivalent pseudo-code:
*
* loopIterator = 0
* conditionVar = conditionNetwork()
* while (conditionVar and loopIterator < maxLoopIterations):
* bodyNetwork()
* loopIterator = loopIterator + 1
* conditionVar = conditionNetwork()
*
*/
message LoopLayerParams {
/**
* maximum number of iterations. Ignored if input is present.
*/
uint64 maxLoopIterations = 1;
/**
* This field provides the name of the tensor which is produced by the conditionNetwork
* and whose value is checked to start/continue/terminate the loop. Value close to 0.0f is treated as False.
* This field is optional.
* Must be a non empty string if and only if "conditionNetwork" is present.
*/
string conditionVar = 2;
/**
* Must generate a tensor with the name provided in the "conditionVar" field.
* This field is optional.
* Must be present if and only if "conditionVar" field is a non empty string.
*/
NeuralNetwork conditionNetwork = 3;
/**
* Body of the loop.
* This field must be present.
*/
NeuralNetwork bodyNetwork = 4;
}
/**
* Loop break Layer
*
* Terminate the loop that has this layer.
* If present, it should always reside in the "bodyNetwork" of the loop layer
*
* No inputs/outputs
*
*/
message LoopBreakLayerParams {
}
/**
* Loop Continue Layer
*
* Stop the current loop iteration and continue on the next iteration.
* If present, it should always reside in the "bodyNetwork" of the loop layer
*
* No inputs/outputs
*
*/
message LoopContinueLayerParams {
}
/**
* Copy Layer
*
* A layer that copies its input tensor to the output tensor.
* Must have 1 input and 1 output, with distinct names.
* This is the only layer that is allowed to re-generate an output that is already present in the neural network prior to this layer,
* in which case it will overwrite the output tensor.
*
*/
message CopyLayerParams {
}
/**
* GreaterThan Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise greater than operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 > x2
* or
* y = x1 > alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message GreaterThanLayerParams {
/**
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/**
* GreaterEqual Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise greater equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 >= x2
* or
* y = x1 >= alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message GreaterEqualLayerParams {
/**
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/**
* LessThan Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise less than operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 < x2
* or
* y = x1 < alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message LessThanLayerParams {
/**
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/**
* LessEqual Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise less equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 <= x2
* or
* y = x1 <= alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message LessEqualLayerParams {
/**
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/**
* Equal Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 == x2
* or
* y = x1 == alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message EqualLayerParams {
/**
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 1;
}
/**
* NotEqual Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise not equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 != x2
* or
* y = x1 != alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message NotEqualLayerParams {
/**
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 1;
}
/**
* LogicalAnd Layer
*
* Must have 2 inputs, produces 1 output.
* Perform elementwise logical AND operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = AND(x1, x2)
*
* Broadcasting is supported.
*
*/
message LogicalAndLayerParams {
}
/**
* LogicalOr Layer
*
* Must have 2 inputs, produces 1 output.
* Perform elementwise logical OR operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = OR(x1, x2)
*
* Broadcasting is supported.
*
*/
message LogicalOrLayerParams {
}
/**
* LogicalXor Layer
*
* Must have 2 inputs, produces 1 output.
* Perform elementwise logical XOR operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = XOR(x1, x2)
*
* Broadcasting is supported.
*
*/
message LogicalXorLayerParams {
}
/**
* LogicalNot Layer
*
* Must have 1 input, produces 1 output.
* Perform elementwise logical NOT operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = NOT(x)
*
*
*/
message LogicalNotLayerParams {
}
/// Border Amounts
/// --------------
/**
* Specifies the amount of spatial border to be either padded or cropped.
*
* For padding:
*
* .. code::
*
* H_out = borderAmounts[0].startEdgeSize + H_in + borderAmounts[0].endEdgeSize
* W_out = borderAmounts[1].startEdgeSize + W_in + borderAmounts[1].endEdgeSize
*
* topPaddingAmount == Height startEdgeSize
* bottomPaddingAmount == Height endEdgeSize
* leftPaddingAmount == Width startEdgeSize
* rightPaddingAmount == Width endEdgeSize
*
* For cropping:
*
* .. code::
*
* H_out = (-borderAmounts[0].startEdgeSize) + H_in + (-borderAmounts[0].endEdgeSize)
* W_out = (-borderAmounts[1].startEdgeSize) + W_in + (-borderAmounts[1].endEdgeSize)
*
* topCropAmount == Height startEdgeSize
* bottomCropAmount == Height endEdgeSize
* leftCropAmount == Width startEdgeSize
* rightCropAmount == Width endEdgeSize
*/
message BorderAmounts {
message EdgeSizes {
/**
* The amount to be padded or cropped from the beginning.
*/
uint64 startEdgeSize = 1;
/**
* The amount to be padded or cropped from the end.
*/
uint64 endEdgeSize = 2;
}
/**
* The border amounts.
* This must be length 2 in the order ``[H, W]``.
*/
repeated EdgeSizes borderAmounts = 10;
}
/**
* Specifies the type of padding to be used with Convolution/Deconvolution and Pooling layers.
* After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
* output spatial shape ``[H_out, W_out]``.
*
* .. code::
*
* topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
* bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
* leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
* rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
*
* With Convolution or Pooling:
*
* .. code::
*
* H_out = int_division_round_down((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0]),stride[0]) + 1
*
* which is same as:
*
* .. code::
*
* H_out = int_division_round_up((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0] + 1),stride[0])
*
* With Deconvolution:
*
* .. code::
*
* H_out = (H_in-1) * stride[0] + kernelSize[0] - (topPaddingAmount + bottomPaddingAmount)
*
*
* The equivalent expressions hold true for ``W_out`` as well.
*
*
* By default, the values of ``paddingAmounts`` are set to ``0``,
* which results in a "true" valid padding.
* If non-zero values are provided for ``paddingAmounts``,
* "valid" convolution/pooling is performed within the spatially expanded input.
*
*/
message ValidPadding {
BorderAmounts paddingAmounts = 1;
}
/**
* Specifies the type of padding to be used with Convolution/Deconvolution and pooling layers.
* After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
* output spatial shape ``[H_out, W_out]``.
* With Convolution or pooling:
*
* .. code::
*
* H_out = int_division_round_up(H_in,stride[0])
* W_out = int_division_round_up(W_in,stride[1])
*
* This is achieved by using the following padding amounts:
*
* .. code::
*
* totalPaddingHeight = max(0,(H_out-1) * stride[0] + KernelSize[0] - Hin)
* totalPaddingWidth = max(0,(W_out-1) * stride[1] + KernelSize[1] - Win)
*
* There are two modes of asymmetry:
* ``BOTTOM_RIGHT_HEAVY``, and ``TOP_LEFT_HEAVY``.
*
* If the mode is ``BOTTOM_RIGHT_HEAVY``:
*
* .. code::
*
* topPaddingAmount = floor(totalPaddingHeight / 2)
* bottomPaddingAmount = totalPaddingHeight - topPaddingAmount
* leftPaddingAmount = floor(totalPaddingWidth / 2)
* rightPaddingAmount = totalPaddingWidth - leftPaddingAmount
*
* If the mode is ``TOP_LEFT_HEAVY``:
*
* .. code::
*
* bottomPaddingAmount = floor(totalPaddingHeight / 2)
* topPaddingAmount = totalPaddingHeight - bottomPaddingAmount
* rightPaddingAmount = floor(totalPaddingWidth / 2)
* leftPaddingAmount = totalPaddingWidth - rightPaddingAmount
*
*
* With Deconvolution:
*
* .. code::
*
* H_out = H_in * stride[0]
* W_out = W_in * stride[1]
*/
message SamePadding {
enum SamePaddingMode {
BOTTOM_RIGHT_HEAVY = 0;
TOP_LEFT_HEAVY = 1;
}
SamePaddingMode asymmetryMode = 1;
}
/**
* Specifies how grid points are sampled from an interval.
* Without the loss of generality, assume the interval to be [0, X-1] from which N points are to be sampled.
* Here X may correspond to an input image's height or width.
* All the methods can be expressed in terms of numpy's linspace function, along with the constraint that grid points have to lie in the interval [0, X-1].
* Note: numpy.linspace(start = start, end = end, num = N, endpoint = True) corresponds to sampling
* N points uniformly from the interval [start, end], endpoints included.
* The methods vary in how the ``start`` and ``end`` values are computed.
*/
message SamplingMode {
enum Method {
/**
* start = 0, end = X-1
* grid points = numpy.linspace(start, end)
*/
STRICT_ALIGN_ENDPOINTS_MODE = 0;
/**
* if N == 1: start = end = (X-1)/2
* otherwise, start = 0, end = X-1
* grid points = numpy.linspace(start, end)
*/
ALIGN_ENDPOINTS_MODE = 1;
/**
* start = 0, end = X - X/N
* grid points = min(X-1, numpy.linspace(start, end))
* This is same as the mode used in the upsample layer in this specification, when used with bilinear interpolation. In that case N/X = upsample ratio.
*/
UPSAMPLE_MODE = 2;
/**
* spacing = max(1, X-1)/N
* start = 0.5 * spacing
* end = start + (N-1) * spacing
* grid points = min(X-1, numpy.linspace(start, end))
*/
ROI_ALIGN_MODE = 3;
}
Method samplingMethod = 1;
}
/**
* Specifies the convention used to specify four bounding box coordinates for an image of size (Height, Width).
* The (0,0) coordinate corresponds to the top-left corner of the image.
*/
message BoxCoordinatesMode {
enum Coordinates {
/**
* [h_start, w_start, h_end, w_end]
*/
CORNERS_HEIGHT_FIRST = 0;
/**
* [w_start, h_start, w_end, h_end]
*/
CORNERS_WIDTH_FIRST = 1;
/**
* [h_center, w_center, box_height, box_width]
*/
CENTER_SIZE_HEIGHT_FIRST = 2;
/**
* [w_center, h_center, box_width, box_height]
*/
CENTER_SIZE_WIDTH_FIRST = 3;
}
Coordinates boxMode = 1;
}
/**
* Weights for layer parameters.
* Weights are stored as repeated floating point numbers
* using row-major ordering
* and can represent 1-, 2-, 3-, or 4-dimensional data.
*/
message WeightParams {
/**
* Values specified in single / float / FP32 precision.
*/
repeated float floatValue = 1;
/**
* Values in 16-bit half precision floating point.
*/
bytes float16Value = 2;
/**
* Raw value specification for custom layers and quantized lower precisions.
*/
bytes rawValue = 30;
/**
* Quantization related parameters.
*/
QuantizationParams quantization = 40;
bool isUpdatable = 50;
}
/**
* Quantization parameters.
*/
message QuantizationParams {
uint64 numberOfBits = 1;
oneof QuantizationType {
LinearQuantizationParams linearQuantization = 101;
LookUpTableQuantizationParams lookupTableQuantization = 102;
}
}
message LinearQuantizationParams {
/**
* Stores scale and bias values corresponding to the quantized weights.
* Must be an array of 1 element, or an array of C elements, where C
* is number of output channels. For recurrent layers it is equal to
* the output vector size.
*/
repeated float scale = 1;
repeated float bias = 2;
}
message LookUpTableQuantizationParams {
/* Stores look-up table quantization values. Must be an array of
(2^numberOfBits) Elements.
*/
repeated float floatValue = 1;
}
/// Layers
/// ------
/**
* A layer that performs spatial convolution or deconvolution.
*
* .. code::
*
* y = ConvolutionLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 4.
* Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C_out, H_out, W_out]
*
*
* If ``dilationFactor`` is not 1, effective kernel size is
* modified as follows:
*
* .. code::
*
* KernelSize[0] <-- (kernelSize[0]-1) * dilationFactor[0] + 1
* KernelSize[1] <-- (kernelSize[1]-1) * dilationFactor[1] + 1
*
* Type of padding can be ``valid`` or ``same``. Output spatial dimensions depend on the
* the type of padding. For details, refer to the descriptions of the messages "ValidPadding"
* and "SamePadding". Padded values are all zeros.
*
* For Deconvolution, ``ConvolutionPaddingType`` (``valid`` or ``same``) is ignored when ``outputShape`` is set.
*
*
*/
message ConvolutionLayerParams {
/**
* The number of kernels.
* Same as ``C_out`` used in the layer description.
*/
uint64 outputChannels = 1;
/**
* Channel dimension of the kernels.
* Must be equal to ``inputChannels / nGroups``, if isDeconvolution == False
* Must be equal to ``inputChannels``, if isDeconvolution == True
*/
uint64 kernelChannels = 2;
/**
* Group convolution, i.e. weight reuse along channel axis.
* Input and kernels are divided into g groups
* and convolution / deconvolution is applied within the groups independently.
* If not set or 0, it is set to the default value 1.
*/
uint64 nGroups = 10;
/**
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[3, 3]`` is used.
*/
repeated uint64 kernelSize = 20;
/**
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 stride = 30;
/**
* Must be length 2 in order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
* It is ignored if ``isDeconvolution == true``.
*/
repeated uint64 dilationFactor = 40;
/**
* The type of padding.
*/
oneof ConvolutionPaddingType {
ValidPadding valid = 50;
SamePadding same = 51;
}
/**
* Flag to specify whether it is a deconvolution layer.
*/
bool isDeconvolution = 60;
/**
* Flag to specify whether a bias is to be added or not.
*/
bool hasBias = 70;
/**
* Weights associated with this layer.
* If convolution (``isDeconvolution == false``), weights have the shape
* ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``, where kernelChannels == inputChannels / nGroups
* If deconvolution (``isDeconvolution == true``) weights have the shape
* ``[kernelChannels, outputChannels / nGroups, kernelHeight, kernelWidth]``, where kernelChannels == inputChannels
*/
WeightParams weights = 90;
WeightParams bias = 91; /// Must be of size [outputChannels].
/**
* The output shape, which has length 2 ``[H_out, W_out]``.
* This is used only for deconvolution (``isDeconvolution == true``).
* If not set, the deconvolution output shape is calculated
* based on ``ConvolutionPaddingType``.
*/
repeated uint64 outputShape = 100;
}
/**
* A layer that performs a matrix-vector or matrix-matrix product.
* This is equivalent to a fully-connected, or dense layer.
* The weight parameters correspond to a matrix of dimensions (inputChannels, outputChannels) i.e. (C_in, C_out)
*
* .. code::
*
* y = InnerProductLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Input can have rank 1 to rank 5. This is how it is reshaped in to the matrix (for rank > 1):
* rank 1 (x1) : in this case, the layer corresponds to a matrix-vector product. x1 must be equal to C_in
* rank 2 (x1, x2): x2 must be equal to C_in
* rank 3 (x1, x2, x3) --> (x1 * x2, x3). x3 must be equal to C_in
* rank 4 (x1, x2, x3, x4) ---> (x1, x2 * x3 * x4). x2 * x3 * x4 must be equal to C_in
* rank 5 (x1, x2, x3, x4, x5) ---> (x1 * x2, x3 * x4 * x5). x3 * x4 * x5 must be equal to C_in
*
* Output
* Output rank is same as the input rank
* rank 1: (C_out)
* rank 2: (x1, C_out)
* rank 3: (x1, x2, C_out)
* rank 4: (x1, C_out, 1, 1)
* rank 5: (x1, x2, C_out, 1, 1)
*
*/
message InnerProductLayerParams {
uint64 inputChannels = 1; /// Input size: C_in.
uint64 outputChannels = 2; /// Output size: C_out.
bool hasBias = 10; /// Whether a bias is added or not.
WeightParams weights = 20; /// Weight matrix [C_out, C_in].
WeightParams bias = 21; /// Bias vector [C_out].
}
/**
* A layer that performs a matrix lookup and optionally adds a bias.
* The weights matrix is stored with dimensions [outputChannels, inputDim].
*
* .. code::
*
* y = EmbeddingLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Input values must be in the range ``[0, inputDim - 1]``.
*
* Input must have rank equal to 4 or 5, such that the last 3 dimensions are all 1.
* rank 4: shape (x1, 1, 1, 1). x1 is effectively the batch/sequence length.
* rank 5: shape (x1, x2 , 1, 1, 1). x1 * x2 is effectively the combined batch/sequence length.
*
* Output
* Output rank is same as the input rank. Please see input description above.
* rank 4: shape (x1, outputChannels, 1, 1)
* rank 5: shape (x1, x2, outputChannels, 1, 1)
*
*/
message EmbeddingLayerParams {
uint64 inputDim = 1; /// Size of the input dictionary.
uint64 outputChannels = 2; /// Size of the output vectors.
bool hasBias = 10; /// Whether a bias is added or not.
WeightParams weights = 20; /// 2-D weights of dimensions [outputChannels, inputDim].
WeightParams bias = 21; /// Bias of size [outputChannels].
}
/**
* A layer that performs a matrix lookup and optionally adds a bias.
* The weights matrix is stored with dimensions [embeddingSize, vocabSize].
*
* .. code::
*
* y = EmbeddingNDLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Input values must be in the range ``[0, vocabSize - 1]``.
* Input must have rank at least 2. The last dimension must always be 1.
* rank 2: shape (x1, 1). x1 is the batch/sequence length.
* rank 3: shape (x1, x2, 1). x1 * x2 is effectively the combined batch/sequence length.
* rank 4: shape (x1, x2, x3, 1). x1 * x2 * x2 is effectively the combined batch/sequence length.
* rank 5: shape (x1, x2 , x3, x4, 1). x1 * x2 * x3 * x4 is effectively the combined batch/sequence length.
*
* Output
* Output rank is same as the input rank. Please see input description above.
* rank 2: shape (x1, embeddingSize)
* rank 3: shape (x1, x2, embeddingSize)
* rank 4: shape (x1, x2, x3, embeddingSize)
* rank 5: shape (x1, x2, x3, x4, embeddingSize)
*
*/
message EmbeddingNDLayerParams {
uint64 vocabSize = 1; /// Size of the input dictionary.
uint64 embeddingSize = 2; /// Size of the output vectors.
bool hasBias = 3; /// Whether a bias is added or not.
WeightParams weights = 20; /// 2-D weights of dimensions [embeddingSize, vocabSize].
WeightParams bias = 21; /// Bias of size [embeddingSize].
}
/**
* A layer that performs batch normalization,
* which is performed along axis = -3,
* and repeated along the other axes, if present.
*
* .. code::
*
* y = BatchnormLayer(x)
*
* Requires 1 input and produces 1 output.
*
* This operation is described by the following formula:
*
* .. math::
* y_i = \gamma_i \dfrac{ (x_i - \mu_i)}{\sqrt{\sigma_i^2 + \epsilon}} + \beta_i \;,\;i=1,....,C
*
* Input
* A blob with rank greater than equal to 3.
* Example: Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* A blob with the same shape as the input.
*/
message BatchnormLayerParams {
uint64 channels = 1; /// Size of the channel dimension in the input.
/**
* If ``computeMeanVar == true``,
* the mean and variance are calculated from either
* the single input instance, if ``instanceNormalization == true``,
* or the whole batch, if ``instanceNormalization = false``.
* and the values provided in parameters "mean" and "variance" are ignored.
*/
bool computeMeanVar = 5;
bool instanceNormalization = 6;
/**
* A small constant to avoid division by 0 while normalizing by variance.
* Defaults to ``1e-5`` if not set or set to ``0``.
*/
float epsilon = 10;
WeightParams gamma = 15; /// Parameter of length [channels]
WeightParams beta = 16; /// Parameter of length [channels]
WeightParams mean = 17; /// Parameter of length [channels]
WeightParams variance = 18; /// Parameter of length [channels]
}
/**
* A spatial pooling layer.
*
* .. code::
*
* y = PoolingLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 4.
* Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C, H_out, W_out]
*
* Padding options are similar to ``ConvolutionLayerParams``
* with the additional option of ``ValidCompletePadding`` (``includeLastPixel``),
* which ensures that the last application of the kernel
* always includes the last pixel of the input image, if there is padding.
*
* .. code::
*
* H_out = int_division_round_up((H_in + 2 * paddingAmounts[0] - kernelSize[0]),Stride[0]) + 1)
* if (paddingAmounts[0] > 0 or paddingAmounts[1] > 0)
* if ((H_out - 1) * Stride >= H_in + paddingAmounts[0]) {
* H_out = H_out - 1
* }
* }
*
* The equivalent expressions hold true for ``W_out`` as well.
* Only symmetric padding is supported with this option.
*/
message PoolingLayerParams {
enum PoolingType {
MAX = 0;
AVERAGE = 1;
L2 = 2;
}
PoolingType type = 1; /// Type of pooling operation.
/**
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[3, 3]`` is used.
*/
repeated uint64 kernelSize = 10;
/**
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 stride = 20;
message ValidCompletePadding {
/**
* Must be length 2 in order ``[H, W]``.
* If not set, value ``[0, 0]`` is used.
*/
repeated uint64 paddingAmounts = 10;
}
oneof PoolingPaddingType {
ValidPadding valid = 30;
SamePadding same = 31;
ValidCompletePadding includeLastPixel = 32;
}
/**
* If true, padded values are excluded from the count (denominator)
* when computing average pooling.
*/
bool avgPoolExcludePadding = 50;
/**
* If true, global pooling is performed.
* Kernel size is inferred from the input data spatial dimensions.
*/
bool globalPooling = 60;
}
/**
* A layer that performs padding along spatial dimensions.
*
* .. code::
*
* y = PaddingLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H_in, W_in]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* Same rank as the input.
* e.g.: blob with shape ``[C, H_out, W_out]``.
*
* Output dimensions are calculated as follows:
*
* .. code::
*
* H_out = H_in + topPaddingAmount + bottomPaddingAmount
* W_out = W_in + leftPaddingAmount + rightPaddingAmount
*
* topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
* bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
* leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
* rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
*
* There are three types of padding:
*
* - ``PaddingConstant``, which fills a constant value at the border.
* - ``PaddingReflection``, which reflects the values at the border.
* - ``PaddingReplication``, which replicates the values at the border.
*
* Given the following input:
*
* .. code::
*
* [1, 3, 4] : 1 2 3 4
* 5 6 7 8
* 9 10 11 12
*
* Here is the output of applying the padding
* ``(top=2, left=2, bottom=0, right=0)``
* with each of the supported types:
*
* - ``PaddingConstant`` (``value = 0``):
* .. code::
*
* [1, 5, 6] : 0 0 0 0 0 0
* 0 0 0 0 0 0
* 0 0 1 2 3 4
* 0 0 5 6 7 8
* 0 0 9 10 11 12
*
* - ``PaddingReflection``:
* .. code::
*
* [1, 5, 6] : 11 10 9 10 11 12
* 7 6 5 6 7 8
* 3 2 1 2 3 4
* 7 6 5 6 7 8
* 11 10 9 10 11 12
*
* - ``PaddingReplication``:
* .. code::
*
* [1, 5, 6] : 1 1 1 2 3 4
* 1 1 1 2 3 4
* 1 1 1 2 3 4
* 5 5 5 6 7 8
* 9 9 9 10 11 12
*/
message PaddingLayerParams {
/**
* Fill a constant value in the padded region.
*/
message PaddingConstant {
float value = 1;
}
/**
* Reflect the values at the border for padding.
*/
message PaddingReflection {
}
/**
* Replicate the values at the border for padding.
*/
message PaddingReplication {
}
oneof PaddingType {
PaddingConstant constant = 1;
PaddingReflection reflection = 2;
PaddingReplication replication = 3;
}
BorderAmounts paddingAmounts = 10; /// Amounts to be padded to the input.
}
/**
* A layer that concatenates along the axis = -3 or -5.
* For general concatenation along any axis, see ConcatNDLayer.
*
* .. code::
*
* y = ConcatLayer(x1,x2,....)
*
* Requires more than 1 input and produces 1 output.
*
* Input
* All input blobs must have same rank.
* If "sequenceConcat" = False, rank must be greater than equal to 3. In this case concatenation is along axis = -3
* If "sequenceConcat" = True, rank must be greater than equal to 5. In this case concatenation is along axis = -5
*
* Output
* Same rank as the input.
*
*/
message ConcatLayerParams {
/**
* If true, concatenate along the axis = -5 instead of axis = -3.
*/
bool sequenceConcat = 100;
}
/**
* A layer that performs local response normalization (LRN).
*
* .. code::
*
* y = LRNLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* Example: Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
* Output
* A blob with the same shape as the input.
*
* This layer is described by the following formula:
*
* .. math::
* x_i \leftarrow \dfrac{x_i}{\left ( k + \dfrac{\alpha}{C} \sum_j x_j^2 \right )^\beta}
*
* where the summation is done over a ``(localSize, 1, 1)`` neighborhood ---
* that is, over a window "across" channels in 1x1 spatial neighborhoods.
*/
message LRNLayerParams {
float alpha = 1;
float beta = 2;
uint64 localSize = 3; /// Number of channels in the normalization window.
float k = 4; /// Defaults to 1 if not set or 0. Must be strictly positive.
}
/**
* Softmax Normalization Layer
*
* A layer that performs softmax normalization.
* Normalization is applied along axis = -3 or N-3 (where N is the rank of the input)
* For softmax layer that can operate on any axis, see SoftmaxNDLayer.
*
*
* .. code::
*
* y = SoftmaxLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Must be a blob with rank >= 3.
* Output
* A blob with the same shape as the input.
*
* This layer is described by the following formula:
*
* .. math::
* x_i \leftarrow \dfrac{e^{x_i}}{\sum_i{e^{x_i}}}
*/
message SoftmaxLayerParams {
}
/**
* A layer that uniformly splits across axis = -3 to produce a specified number of outputs.
* For general split operation along any axis, see SplitNDLayer.
*
* .. code::
*
* (y1,y2,...yN) = SplitLayer(x), where N = nOutputs
*
* Requires 1 input and produces multiple outputs.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``
* Output
* ``nOutputs`` blobs each with same rank as the input.
* e.g.: For input that is of shape ``[C, H, W]``, output shapes will be ``[C/nOutputs, H, W]``
*/
message SplitLayerParams {
uint64 nOutputs = 1; /// The number of outputs.
}
/**
* A layer that performs elementwise addition.
* This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer.
*
* .. code::
*
* y = AddLayer(x1,x2,...)
*
* Requires 1 or more than 1 input and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
* Output
* A blob with shape equal to the input blob.
*
* If only one input is provided, scalar addition is performed:
*
* .. math::
* y = x + \alpha
*
*/
message AddLayerParams {
/**
* Scalar to be added to the input.
* Only used if there is a single input.
*/
float alpha = 1;
}
/**
* A layer that performs elementwise multiplication.
* This layer has limited broadcasting support. For general broadcasting see MultiplyBroadcastableLayer.
*
* .. code::
*
* y = MultiplyLayer(x1,x2,...)
*
* Requires 1 or more than 1 input and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
* Output
* A blob with shape equal to the first input blob.
*
* If only one input is provided, scalar multiplication is performed:
*
* .. math::
* y = \alpha x
*
*/
message MultiplyLayerParams {
/**
* Scalar to be multiplied with the input.
* Only used if there is a single input.
*/
float alpha = 1;
}
/**
* A layer that applies a unary function.
*
* .. code::
*
* y = UnaryFunctionLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with no rank constraints.
* Output
* A blob with the same shape as the input.
*
* The input is first modified by shifting and scaling:
*
* .. math::
* x \leftarrow \text{scale} \cdot x + \text{shift}
*/
message UnaryFunctionLayerParams {
/**
* A unary operator.
*
* The following functions are supported:
*
* ``SQRT``
* .. math:: f(x) = \sqrt{x}
*
* ``RSQRT``
* .. math:: f(x) = \dfrac{1}{\sqrt{x + \epsilon}}
*
* ``INVERSE``
* .. math:: f(x) = \dfrac{1}{x + \epsilon}
*
* ``POWER``
* .. math:: f(x) = x^\alpha
*
* ``EXP``
* .. math:: f(x) = e^x
*
* ``LOG``
* .. math:: f(x) = \log x
*
* ``ABS``
* .. math:: f(x) = |x|
*
* ``THRESHOLD``
* .. math:: f(x) = \text{max}(\alpha, x)
*/
enum Operation {
SQRT = 0;
RSQRT = 1;
INVERSE = 2;
POWER = 3;
EXP = 4;
LOG = 5;
ABS = 6;
THRESHOLD = 7;
}
Operation type = 1; /// The type of unary function.
/**
* A constant used in ``POWER`` and ``THRESHOLD`` functions.
*/
float alpha = 2;
/**
* A small constant to avoid division by 0 while normalizing variance.
* Defaults to ``1e-6`` if not set or set to ``0``.
*/
float epsilon = 3;
/**
* Input is shifted by this amount
* before the unary function is applied.
* Defaults to ``0.0`` if not set.
*/
float shift = 4;
/**
* Input is scaled by this amount
* before the unary function is applied.
* Defaults to ``1.0`` if not set or set to ``0``.
*/
float scale = 5;
}
/**
* A layer that scales up spatial dimensions.
* It supports two modes: nearest neighbour (default) and bilinear.
*
* .. code::
*
* y = UpsampleLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* Same rank as the input.
* e.g.: blob with shape ``[C, scalingFactor[0] * H, scalingFactor[1] * W]``
*/
message UpsampleLayerParams {
/**
* Scaling Factor.
* Must be length 2 in order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 scalingFactor = 1;
enum InterpolationMode {
NN = 0; /// Nearest Neighbour
BILINEAR = 1; /// Bilinear
}
InterpolationMode mode = 5;
}
/**
* A layer that resizes the input to a pre-specified spatial size using bilinear interpolation.
*
* .. code::
*
* y = ResizeBilinearLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H_in, W_in]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* Same rank as the input.
* e.g.: blob with shape ``[C, H_out, W_out]``.
*
*/
message ResizeBilinearLayerParams {
/**
* Target Spatial Size.
* Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 targetSize = 1;
/**
* Mode used to compute the grid on which the spatial output values are evaluated.
* Same mode is applied to both the height and width axes.
*/
SamplingMode mode = 2;
}
/**
* A layer that extracts cropped spatial patches or RoIs (regions of interest) from the input and resizes them to a pre-specified size using
* bilinear interpolation.
* Note that RoI Align layer can be implemented with this layer followed by a pooling layer.
*
* .. code::
*
* y = CropResizeLayer(x)
*
* Requires 2 inputs and produces 1 output.
*
* Input
* There are two inputs.
* First input represents an image feature map.
* Second input represents the bounding box coordinates for N patches or RoIs (region of interest).
*
* First input is rank 5: [1, Batch, C, H_in, W_in].
* Second input is rank 5. Its shape can be either [N, 1, 4, 1, 1] or [N, 1, 5, 1, 1].
*
* N: number of patches/RoIs to be extracted
*
* If RoI shape = ``[N, 1, 4, 1, 1]``
* The axis=-3 corresponds to the four coordinates specifying the bounding box.
* All the N RoIs are extracted from all the batches of the input.
*
* If RoI shape = ``[N, 1, 5, 1, 1]``
* The first element of the axis=-3 specifies the input batch id from which to extract the RoI and
* must be in the interval ``[0, Batch - 1]``. That is, n-th RoI is extracted from the RoI[n,0,0,0,0]-th
* input batch id. The last four elements of the axis=-3 specify the bounding box coordinates.
*
* Output
* A blob with rank 5.
* - Shape is [N, Batch, C, H_out, W_out] if input RoI shape is [N, 1, 4, 1, 1]
* - Shape is [N, 1, C, H_out, W_out] if input RoI shape is [N, 1, 5, 1, 1]
*
*/
message CropResizeLayerParams {
/**
* Target Spatial Size.
* Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 targetSize = 1;
/**
* If true the bounding box coordinates must be in the interval [0, 1].
* They are scaled by (H_in - 1), (W_in - 1), i.e. based on the input spatial dimensions.
* If false the bounding box coordinates must be in the interval
* [0, H_in -1] and [0, W_in - 1], respectively for height and width dimensions.
*/
bool normalizedCoordinates = 2;
/**
* Mode used to compute the grid on which the spatial output values are evaluated.
* Same mode is applied to both the height and width axes.
*/
SamplingMode mode = 3;
/**
* Representation used to express the bounding box coordinates.
* It determines how the values of the second input are interpreted.
*/
BoxCoordinatesMode boxIndicesMode = 4;
/**
* Additional spatial scale that multiplies the bounding box coordinates.
* Generally used while implementing the RoI Align layer,
* which uses unnormalized RoI coordinates along with a spatial scale less than or equal to 1.
*/
float spatialScale = 5;
}
/**
* A layer that performs elementwise addition of a bias,
* which is broadcasted to match the input shape.
*
* .. code::
*
* y = BiasLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
* Output
* A blob with the same shape as the input.
*/
message BiasLayerParams {
/**
* The shape of the bias.
* Must be one of the following:
* ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
*/
repeated uint64 shape = 1;
/**
* The bias values.
* The size must be equal to the product of the ``shape`` dimensions.
*/
WeightParams bias = 2;
}
/**
* A layer that performs elmentwise multiplication by a scale factor
* and optionally adds a bias;
* both the scale and bias are broadcasted to match the input shape.
*
* .. code::
*
* y = ScaleLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
* Output
* A blob with the same shape as the input.
*/
message ScaleLayerParams {
/**
* The shape of the scale.
* Must be one of the following:
* ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
*/
repeated uint64 shapeScale = 1;
/**
* The scale values.
* The size must be equal to the product of the ``shape`` dimensions.
*/
WeightParams scale = 2; /// Scale values. Size must be equal to the product of dimensions specified in shapeScale.
bool hasBias = 3; /// If true, a bias is added after scaling.
/**
* The shape of the bias.
* Must be one of the following:
* ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
*/
repeated uint64 shapeBias = 4;
/**
* The bias values.
* The size must be equal to the product of the ``shape`` dimensions.
*/
WeightParams bias = 5;
}
/**
* A layer that loads data as a parameter and provides it as an output.
* The output is rank 5. For general rank, see LoadConstantNDLayer.
*
* .. code::
*
* y = LoadConstantLayer()
*
* Requires no input and produces 1 output.
*
* Output:
* A blob with rank 5 and shape ``[1, 1, C, H, W]``
*/
message LoadConstantLayerParams {
/**
* The shape of the constant to be loaded,
* which must be``[C, H, W]``, that is length 3.
*/
repeated uint64 shape = 1;
/**
* The data values,
* of size ``C * H * W``.
*/
WeightParams data = 2;
}
/**
* A layer that performs L2 normalization, i.e. divides by the
* the square root of the sum of squares of all elements of input.
*
* .. code::
*
* y = L2NormalizeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
* Output
* A blob with the same shape as the input.
*
* This layer is described by the following formula:
*
* .. math::
* x_i \leftarrow \dfrac{x_i}{\sqrt{\sum{x_i^2} + \epsilon}}
*/
message L2NormalizeLayerParams {
/**
* A small constant to avoid division by 0 while normalizing variance.
* Defaults to ``1e-6`` if not set or set to ``0``.
*/
float epsilon = 1;
}
/// Data Reorganization Layers
/// --------------------------
/**
* A layer that flattens the input.
*
* .. code::
*
* y = FlattenLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* e.g.: Rank 4 blob represents [Batch, C, H, W]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
* Output
* Same rank as the input, such that last two dimensions are both 1.
* e.g.: For rank 4 input, output shape is ``[Batch, C * H * W, 1, 1]``
*
* There are two X orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
* ``CHANNEL_FIRST`` does not require data to be rearranged,
* because row major ordering is used by internal storage.
* ``CHANNEL_LAST`` requires data to be rearranged.
*/
message FlattenLayerParams {
enum FlattenOrder {
CHANNEL_FIRST = 0;
CHANNEL_LAST = 1;
}
FlattenOrder mode = 1;
}
/**
* A layer that recasts the input into a new shape.
*
* .. code::
*
* y = ReshapeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank 5.
* e.g.: ``[1, 1, C, H, W]`` or ``[Seq, 1, C, H, W]``.
* Output
* A blob with rank 5.
* e.g.: ``[1, 1, C_out, H_out, W_out]`` or ``[Seq_out, 1, C_out, H_out, W_out]``.
*
* There are two reshape orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
* ``CHANNEL_FIRST`` is equivalent to
* flattening the input to ``[Seq, 1, C * H * W, 1, 1]`` in channel first order
* and then reshaping it to the target shape;
* no data rearrangement is required.
* ``CHANNEL_LAST`` is equivalent to
* flattening the input to ``[Seq, 1, H * W * C, 1, 1]`` in channel last order,
* reshaping it to ``[Seq_out, 1, H_out, W_out, C_out]`` (it is now in "H_out-major"" order),
* and then permuting it to ``[C_out, H_out, W_out]``;
* both the flattening and permuting requires the data to be rearranged.
*/
message ReshapeLayerParams {
/**
* The shape of the output.
* Must be of length 3 or 4.
* If set to 3, ``targetShape`` is interpreted as
* ``[1, 1, C_out, H_out, W_out]``, and sequence length of the input is preserved.
* If set to 4, ``targetShape`` is interpreted as
* ``[Seq_out, 1, C_out, H_out, W_out]``,
* where ``Seq_out`` is the new sequence length.
*/
repeated int64 targetShape = 1;
enum ReshapeOrder {
CHANNEL_FIRST = 0;
CHANNEL_LAST = 1;
}
ReshapeOrder mode = 2;
}
/**
* A layer that rearranges the dimensions and data of an input.
* For generic transpose/permute operation see TransposeLayer.
*
* .. code::
*
* y = PermuteLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Must be a rank 5 blob.
* e.g.: shape ``[Seq, B, C, H, W]``.
* Output
* Rank 5 blob. Tranposed version of the input, such that dimensions at axis=1 or axis=-4 is unchanged.
*
*
* Examples:
*
* Assume input shape is [Seq, B, C, H, W]
*
* - If ``axis`` is set to ``[0, 3, 1, 2]``,
* then the output has shape ``[Seq, B, W, C, H]``
*
* - If ``axis`` is set to ``[3, 1, 2, 0]``,
* then the output has shape ``[W, B, C, H, Seq]``
*
* - If ``axis`` is set to ``[0, 3, 2, 1]``,
* then the output has shape ``[Seq, B, W, H, C]``
*
* - If ``axis`` is not set, or is set to ``[0, 1, 2, 3]``,
* the output is the same as the input.
*/
message PermuteLayerParams {
/**
* The order in which to permute the dimensions.
* Must have length 4 and a permutation of ``[0, 1, 2, 3]``.
*/
repeated uint64 axis = 1;
}
/**
* A layer that reorganizes data in the input in specific ways.
*
* .. code::
*
* y = ReorganizeDataLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
* Output
* Same rank as the input.
* e.g.: blob with shape ``[C_out, H_out, W_out]``.
*
* mode == SPACE_TO_DEPTH
* ``[C_out, H_out, W_out]`` : ``[C * blockSize * blockSize, H/blockSize, W/blockSize]``.
* blockSize must divide H and W.
* Data is moved from the spatial dimensions to the channel dimension. Input is spatially divided into
* non-overlapping blocks of size blockSize X blockSize and data from each block is moved into the
* channel dimension.
*
* mode == DEPTH_TO_SPACE
* ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W * blockSize]``.
* Square of blockSize must divide C.
* Reverse of SPACE_TO_DEPTH. Data is moved from the channel dimension to the spatial dimensions.
*
*/
message ReorganizeDataLayerParams {
enum ReorganizationType {
SPACE_TO_DEPTH = 0;
DEPTH_TO_SPACE = 1;
}
ReorganizationType mode = 1;
uint64 blockSize = 2; /// must be greater than 1
}
/**
* A layer that slices the input data along axis = -1 or -2 or -3.
* For general slice along any axis, please see SliceStaticLayer/SliceDynamicLayer.
*
* .. code::
*
* y = SliceLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob that can, in general, have any rank. However, depending on the value of "axis" ,
* there may be additional rank constraints.
* Output
* A blob with the same rank as the input.
*
* Sliced section is taken from the interval ``[startIndex, endIndex)``, i.e.
* startIndex is inclusive while endIndex is exclusive.
* stride must be positive and represents the step size for slicing.
* Negative indexing is supported for startIndex and endIndex.
* -1 denotes N-1, -2 denotes N-2 and so on, where N is the length of the dimension to be sliced.
*
*/
message SliceLayerParams {
int64 startIndex = 1; /// start of the sliced section. Inclusive.
int64 endIndex = 2; /// end of sliced section. Exclusive.
uint64 stride = 3; /// The step size. Must be positive.
enum SliceAxis {
CHANNEL_AXIS = 0;
HEIGHT_AXIS = 1;
WIDTH_AXIS = 2;
}
// The following mapping is used for interpreting this parameter:
// CHANNEL_AXIS => axis = -3, input must have rank at least 3.
// HEIGHT_AXIS => axis = -2, input must have rank at least 2.
// WIDTH_AXIS => axis = -1
SliceAxis axis = 4;
}
/**
* A layer that reduces the input using a specified operation.
*
* .. code::
*
* y = ReduceLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob that can, in general, have any rank. However, depending on the value of "axis" ,
* there may be additional rank constraints.
* Output
* A blob with the same rank as the input, which has 1s on the dimensions specififed in the parameter "axis"
*
* Values supported for axis are [-1], [-2], [-3], [-2,-1], [-3,-2,-1]
* and the equivalent positive values (depending on the rank of the input)
* For mode == 'ArgMax', axis must be [-1] or [-2] or [-3].
*/
message ReduceLayerParams {
/*
* The following reduction operations are supported
* and are applied on the specified axis of the input array:
*
* ``SUM``
* Sum of all elements
*
* .. math:: \sum{x_i}
*
* ``AVG``
* Sum of all elements divided by the number of elements
*
* .. math:: \dfrac{\sum^n{x_i}}{n}
*
* ``PROD``
* Product of all elements
*
* .. math:: \prod{x_i}
*
* ``LOGSUM``
* Sum of the natural logarithm of all elements
*
* .. math:: \sum{\ln{(x_i + \epsilon)}}
*
* ``SUMSQUARE``
* Sum of squares of all elements
*
* .. math:: \sum{x^2}
*
* ``L1``
* L1 normalization of all elements
*
* .. math:: ||x||_1 = \sum{|x_i|}
*
* ``L2``
* L2 normalization of all elements
*
* .. math:: ||x||_2 = \sqrt{\sum{x_i^2}}
*
* ``MAX``
* Maximum of all elements
*
* .. math:: \text{max}(x_i)
*
* ``MIN``
* Minumum of all elements
*
* .. math:: \text{min}(x_i)
*
* ``ARGMAX``
* Argument of the maximum of all elements
*
* .. math:: \text{argmax}(x_i)
*
*/
enum ReduceOperation {
SUM = 0;
AVG = 1;
PROD = 2;
LOGSUM = 3;
SUMSQUARE = 4;
L1 = 5;
L2 = 6;
MAX = 7;
MIN = 8;
ARGMAX = 9; /// only supported with axis = C, H or W.
}
ReduceOperation mode = 1; /// Specifies function used to reduce.
/**
* Used if mode is ``LOGSUM``.
* Defaults to ``1e-6`` if not set or is set to ``0``.
*/
float epsilon = 2;
enum ReduceAxis {
CHW = 0;
HW = 1;
C = 2;
H = 3;
W = 4;
}
// The following mapping is used for interpreting this parameter:
// CHW = axis [-3, -2, -1], input must have rank at least 3.
// HW = axis [-2, -1], input must have rank at least 2.
// C = axis [-3]
// H = axis [-2]
// W = axis [-1]
ReduceAxis axis = 3;
}
/**
* A layer that crops the spatial dimensions of an input.
* If two inputs are provided, the shape of the second input is used as the reference shape.
*
* .. code::
*
* y = CropLayer(x1) or y = CropLayer(x1,x2)
*
* Requires 1 or 2 inputs and produces 1 output.
*
* Input
* 1 or 2 tensors, each with rank at least 3, both inputs must have equal rank.
* Example:
* - 1 input case: A blob with shape ``[C, H_in, W_in]``.
* - 2 input case: 1st blob with shape ``[C, H_in, W_in]``, 2nd blob with shape ``[C, H_out, W_out]``.
*
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* Same rank as the inputs.
* e.g.: A blob with shape ``[C, H_out, W_out]``.
*
* If one input is used, output is computed as follows:
*
* .. code::
*
* y = x1[:, topCropAmount:H_in - bottomCropAmount, leftCropAmount:W_in - rightCropAmount]
*
* topCropAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
* bottomCropAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
* leftCropAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
* rightCropAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
*
* H_out = H_in - topCropAmount - bottomCropAmount
* W_out = W_in - leftCropAmount - rightCropAmount
*
* If two inputs are used, output is computed as follows:
*
* .. code::
*
* y = x1[:, offset[0]:offset[0] + H_out, offset[1]:offset[1] + W_out]
*/
message CropLayerParams {
/**
* The amounts to be cropped from the input.
* Used only if a single input is provided.
*/
BorderAmounts cropAmounts = 1;
/**
* The offset amounts.
* Used only if two inputs are provided.
* Must be of length 2, in order ``[H, W]``.
*/
repeated uint64 offset = 5;
}
/**
* A layer that computes the elementwise average of the inputs.
* This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer.
*
* .. code::
*
* y = AverageLayer(x1,x2,...)
*
* Requires multiple inputs and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
* Output
* A blob with the same shape as each input.
*/
message AverageLayerParams {
}
/**
* A layer that computes the elementwise maximum over the inputs.
*
* .. code::
*
* y = MaxLayer(x1,x2,...)
*
* Requires multiple inputs and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, C, 1, 1], [B, C, H, W]
* Output
* A blob with the same shape as each input.
*/
message MaxLayerParams {
}
/**
* A layer that computes the elementwise minimum over the inputs.
*
* .. code::
*
* y = MinLayer(x1,x2,...)
*
* Requires multiple inputs and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, C, 1, 1], [B, C, H, W]
* Output
* A blob with the same shape as each input.
*/
message MinLayerParams {
}
/**
* A layer that computes the dot product of two vectors.
*
* .. code::
*
* y = DotProductLayer(x1,x2)
*
* Requires 2 inputs and produces 1 output.
*
* Input
* Two blobs with rank at least 3, such that the last two dimensions must be 1.
* e.g.: blobs with shape ``[B, C, 1, 1]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* Same rank as the input.
* e.g. for rank 4 inputs, output shape: [B, 1, 1, 1]
*/
message DotProductLayerParams {
/**
* If true, inputs are normalized first,
* thereby computing the cosine similarity.
*/
bool cosineSimilarity = 1;
}
/**
* A layer that performs mean variance normalization, along axis = -3.
*
* .. code::
*
* y = MeanVarianceNormalizeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* Example: Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
*
* Output
* A blob with the same shape as the input.
*
* If ``acrossChannels == true``
* normalization is performed on flattened input, i.e. the input is reshaped to (Batch,C), where "Batch" constains
* all dimensions from 0 to -4 (inclusive), and C constains dimensions -1, -2, -3.
*
* If ``acrossChannels == false``
* normalization is performed within a channel,
* across spatial dimensions (i.e. last two dimensions).
*/
message MeanVarianceNormalizeLayerParams {
/**
* If true, mean and variance are computed across channels.
*/
bool acrossChannels = 1;
/**
* If false, only mean is subtracted.
*/
bool normalizeVariance = 2;
/**
* A small constant to avoid division by 0 while normalizing variance.
* Defaults to ``1e-6`` if not set or set to ``0``.
*/
float epsilon = 3;
}
/**
* A layer that repeats a sequence or the dimension sitting at axis = -5
*
* .. code::
*
* y = SequenceRepeatLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 5.
* e.g: shape ``[Seq, B, C, H, W]``
* Output
* A blob with the same rank as the input.
* e.g.: for input shape ``[Seq, B, C, H, W]``, output shape is ``[nRepetitions * Seq, B, C, H, W]``.
*/
message SequenceRepeatLayerParams {
/**
* Number of repetitions.
* Defaults to ``1`` if not set or set to ``0``.
*/
uint64 nRepetitions = 1;
}
/// Recurrent Layers
/// ----------------
/*
* The following activations are supported with recurrent layers:
* - Linear
* - Sigmoid
* - Tanh
* - ReLU
* - Scaled Hyperbolic Tangent: alpha * tanh(beta * x), currently only supported for alpha = 1.7159, beta = 2/3
* - Hard Sigmoid: min(max(alpha * x + beta, 0), 1), currently only supported for alpha = 0.2, beta = 0.5
*/
/**
* A simple recurrent layer.
*
* .. code::
*
* y_t = SimpleRecurrentLayer(x_t, y_{t-1})
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
* - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
*
* This layer is described by the following equation:
*
* .. math::
* \boldsymbol{y_t} = f(\mathrm{clip}(W \boldsymbol{x_t} + \
* R \boldsymbol{y_{t-1}} + b))
*
* - ``W`` is a 2-dimensional weight matrix
* (``[outputVectorSize, inputVectorSize]``, row-major)
* - ``R`` is a 2-dimensional recursion matrix
* (``[outputVectorSize, outputVectorSize]``, row-major)
* - ``b`` is a 1-dimensional bias vector (``[outputVectorSize]``)
* - ``f()`` is an activation
* - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
*/
message SimpleRecurrentLayerParams {
uint64 inputVectorSize = 1; /// The size of the input vectors.
uint64 outputVectorSize = 2; /// The size of the output vectors.
/**
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
ActivationParams activation = 10; /// The activation function.
/**
If false output is just the result after final state update.
If true, output is a sequence, containing outputs at all time steps.
*/
bool sequenceOutput = 15;
bool hasBiasVector = 20; /// If false, no bias is added.
WeightParams weightMatrix = 30; /// Weight matrix W.
WeightParams recursionMatrix = 31; /// Recursion Weight matrix R.
WeightParams biasVector = 32; /// Bias vector b.
bool reverseInput = 100;
// If true, then the node processes the input sequence from right to left
}
/**
* Gated-Recurrent Unit (GRU) Layer
*
* .. code::
*
* y_t = GRULayer(x_t, y_{t-1})
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
* - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
*
* This layer is described by the following equations:
*
* Update Gate
* .. math::
* \boldsymbol{z_t} = \
* f(\mathrm{clip}(W_z \boldsymbol{x_t} + \
* R_z \boldsymbol{y_{t-1}} + b_z)
*
* Reset Gate
* .. math::
* \boldsymbol{r_t} = \
* f(\mathrm{clip}(W_r \boldsymbol{x_t} + \
* R_r \boldsymbol{y_{t-1}} + b_r))
*
* Cell Memory State
* .. math::
* \boldsymbol{c_t} = \
* \boldsymbol{y_{t-1}} \odot \boldsymbol{r_t}
*
* Output Gate
* .. math::
* \boldsymbol{o_t} = \
* g(\mathrm{clip}(W_o \boldsymbol{x_t} + \
* R_o \boldsymbol{c_t} + b_o))
*
* Output
* .. math::
* \boldsymbol{y_t} = \
* (1 - \boldsymbol{z_t}) \odot \boldsymbol{o_t} + \
* \boldsymbol{z_t} \odot \boldsymbol{y_{t-1}}
*
* - ``W_z``, ``W_r``, ``W_o`` are 2-dimensional input weight matrices
* (``[outputVectorSize, inputVectorSize]``, row-major)
* - ``R_z``, ``R_r``, ``R_o`` are 2-dimensional recursion matrices
* (``[outputVectorSize, outputVectorSize]``, row-major)
* - ``b_z``, ``b_r``, ``b_o`` are 1-dimensional bias vectors
* (``[outputVectorSize]``)
* - ``f()``, ``g()`` are activations
* - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
* - ``⊙`` denotes the elementwise product of matrices
*/
message GRULayerParams {
uint64 inputVectorSize = 1; /// Size of the input vectors.
uint64 outputVectorSize = 2; /// Size of the output vectors.
/**
* 2 element array representing activations [f(), g()] in that order.
* Typical values used = [sigmoid, tanh].
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
repeated ActivationParams activations = 10;
/**
* If false output is just the result after final state update.
* If true, output is a sequence, containing outputs at all time steps.
*/
bool sequenceOutput = 15;
/**
* If false, no biases (``b_z``, ``b_r``, ``b_o``) are added.
*/
bool hasBiasVectors = 20;
WeightParams updateGateWeightMatrix = 30; /// Weight Matrix W_z.
WeightParams resetGateWeightMatrix = 31; /// Weight Matrix W_r.
WeightParams outputGateWeightMatrix = 32; /// Weight Matrix W_o.
WeightParams updateGateRecursionMatrix = 50; /// Recursion Weight Matrix R_z.
WeightParams resetGateRecursionMatrix = 51; /// Recursion Weight Matrix R_r.
WeightParams outputGateRecursionMatrix = 52; /// Recursion Weight Matrix R_o.
WeightParams updateGateBiasVector = 70; /// Bias vector b_z.
WeightParams resetGateBiasVector = 71; /// Bias vector b_r.
WeightParams outputGateBiasVector = 72; /// Bias vector b_o.
/// If true, then the node processes the input sequence from right to left
bool reverseInput = 100;
}
/**
* Long short-term memory (LSTM) parameters.
*
* This is described by the following equations:
*
* Input Gate
* .. math::
* \boldsymbol{i_t} = \
* f(\mathrm{clip}(W_i \boldsymbol{x_t} + \
* R_i \boldsymbol{y_{t-1}} + \
* p_i \odot c_{t-1} + b_i))
*
* Forget Gate
* .. math::
* \boldsymbol{f_t} = \
* f(\mathrm{clip}(W_f \boldsymbol{x_t} + \
* R_f \boldsymbol{y_{t-1}} + \
* p_f \odot c_{t-1} + b_f))
*
* Block Input
* .. math::
* \boldsymbol{z_t} = \
* g(\mathrm{clip}(W_z \boldsymbol{x_t} + \
* R_z \boldsymbol{y_{t-1}} + b_z))
*
* Cell Memory State
* .. math::
* \boldsymbol{c_t} = \
* \boldsymbol{c_{t-1}} \odot \boldsymbol{f_t} + \
* \boldsymbol{i_t} \odot \boldsymbol{z_t}
*
* Output Gate
* .. math::
* \boldsymbol{o_t} = \
* f(\mathrm{clip}(W_o \boldsymbol{x_t} + \
* R_o \boldsymbol{y_{t-1}} + \
* p_o \odot c_t + b_o))
*
* Output
* .. math::
* \boldsymbol{y_t} = \
* h(\boldsymbol{c_t}) \odot \boldsymbol{o_t}
*
* - ``W_i``, ``W_f``, ``W_z``, ``W_o`` are 2-dimensional input weight matrices
* (``[outputVectorSize, inputVectorSize]``, row-major)
* - ``R_i``, ``R_f``, ``R_z``, ``R_o`` are 2-dimensional recursion matrices
* (``[outputVectorSize, outputVectorSize]``, row-major)
* - ``b_i``, ``b_f``, ``b_z``, ``b_o`` are 1-dimensional bias vectors
* (``[outputVectorSize]``)
* - ``p_``, ``p_f``, ``p_o`` are 1-dimensional peephole vectors
* (``[outputVectorSize]``)
* - ``f()``, ``g()``, ``h()`` are activations
* - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
* - ``⊙`` denotes the elementwise product of matrices
*/
message LSTMParams {
/**
* If true, output is a sequence, containing outputs at all time steps.
* If false, output is just the result after final state update.
*/
bool sequenceOutput = 10;
/**
* If false, no biases (``b_i``, ``b_f``, ``b_z``, ``b_o``) are added.
*/
bool hasBiasVectors = 20;
/**
* If true, a vector of ``1`` values is added to ``b_f``.
*/
bool forgetBias = 30;
/**
* If true, peephole vectors are included.
*/
bool hasPeepholeVectors = 40;
/**
* If the coupled Input and Forget flag is on, the behaviour of
* ``c_t`` is changed to the following (i.e. forget gate is not used):
*
* .. math::
* \boldsymbol{c_t} = \
* \boldsymbol{c_{t-1}} \odot (1 - \boldsymbol{i_t}) + \
* \boldsymbol{i_t} \odot \boldsymbol{z_t}
*
*/
bool coupledInputAndForgetGate = 50;
/**
* Places a limit on the maximum and minimum values of ``c_t``.
* c_t = min(c_t, cellClipThreshold)
* c_t = max(c_t, -cellClipThreshold)
* If 0, it is set to its default value = 50.0.
*/
float cellClipThreshold = 60;
}
/**
* Weights for long short-term memory (LSTM) layers
*/
message LSTMWeightParams {
WeightParams inputGateWeightMatrix = 1; /// Weight Matrix W_i.
WeightParams forgetGateWeightMatrix = 2; /// Weight Matrix W_f.
WeightParams blockInputWeightMatrix = 3; /// Weight Matrix W_z.
WeightParams outputGateWeightMatrix = 4; /// Weight Matrix W_o.
WeightParams inputGateRecursionMatrix = 20; /// Recursion Weight Matrix R_i.
WeightParams forgetGateRecursionMatrix = 21; /// Recursion Weight Matrix R_f.
WeightParams blockInputRecursionMatrix = 22; /// Recursion Weight Matrix R_z.
WeightParams outputGateRecursionMatrix = 23; /// Recursion Weight Matrix R_o.
//biases:
WeightParams inputGateBiasVector = 40; /// Bias vector b_i.
WeightParams forgetGateBiasVector = 41; /// Bias vector b_f.
WeightParams blockInputBiasVector = 42; /// Bias vector b_z.
WeightParams outputGateBiasVector = 43; /// Bias vector b_o.
//peepholes:
WeightParams inputGatePeepholeVector = 60; /// Peephole vector p_i.
WeightParams forgetGatePeepholeVector = 61; /// Peephole vector p_f.
WeightParams outputGatePeepholeVector = 62; /// Peephole vector p_o.
}
/**
* A unidirectional long short-term memory (LSTM) layer.
*
* .. code::
*
* (y_t, c_t) = UniDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1})
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
* - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
*
*/
message UniDirectionalLSTMLayerParams {
uint64 inputVectorSize = 1; /// Size of the input vectors.
uint64 outputVectorSize = 2; /// Size of the output vectors.
/**
* 3 element array representing activations [f(),g(),h()] in that order.
* Typical values used = [sigmoid, tanh, tanh].
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
repeated ActivationParams activations = 10;
LSTMParams params = 15;
LSTMWeightParams weightParams = 20; /// Weights, biases and peepholes.
/// If true, then the node processes the input sequence from right to left
bool reverseInput = 100;
}
/**
* Bidirectional long short-term memory (LSTM) layer
*
* .. code::
*
* (y_t, c_t, y_t_reverse, c_t_reverse) = BiDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1}, y_{t-1}_reverse, c_{t-1}_reverse)
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``2 * outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
* - Output Shape: ``[Seq, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
*
*
* The first LSTM operates on the input sequence in the forward direction.
* The second LSTM operates on the input sequence in the reverse direction.
*
* Example: given the input sequence ``[x_1, x_2, x_3]``,
* where ``x_i`` are vectors at time index ``i``:
*
* The forward LSTM output is ``[yf_1, yf_2, yf_3]``,
*
* where ``yf_i`` are vectors of size ``outputVectorSize``:
*
* - ``yf_1`` is the output at the end of sequence {``x_1``}
* - ``yf_2`` is the output at the end of sequence {``x_1``, ``x_2``}
* - ``yf_3`` is the output at the end of sequence {``x_1``, ``x_2``, ``x_3``}
*
* The backward LSTM output: ``[yb_1, yb_2, yb_3]``,
*
* where ``yb_i`` are vectors of size ``outputVectorSize``:
*
* - ``yb_1`` is the output at the end of sequence {``x_3``}
* - ``yb_2`` is the output at the end of sequence {``x_3``, ``x_2``}
* - ``yb_3`` is the output at the end of sequence {``x_3``, ``x_2``, ``x_1``}
*
* Output of the bi-dir layer:
*
* - if ``sequenceOutput = True`` : { ``[yf_1, yb_3]``, ``[yf_2, yb_2]``, ``[yf_3, yb_1]`` }
* - if ``sequenceOutput = False`` : { ``[yf_3, yb_3]`` }
*/
message BiDirectionalLSTMLayerParams {
/**
* Size of the input vectors.
*/
uint64 inputVectorSize = 1;
/**
* Size of the outputs vectors.
* It is same for both forward and backward LSTMs.
*/
uint64 outputVectorSize = 2;
/**
* 3 element array representing activations [f(),g(),h()] in that order.
* Typical values used = [sigmoid, tanh, tanh].
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
repeated ActivationParams activationsForwardLSTM = 10;
/**
* Currently, backward LSTM activations
* must be same as the ones for the forward LSTM.
*/
repeated ActivationParams activationsBackwardLSTM = 11;
/**
* Common parameters shared by the forward and backward LSTMs.
*/
LSTMParams params = 15;
/**
* Weights and biases.
* Must be a length 2 message,
* for the forward and backward LSTM respectively.
*/
repeated LSTMWeightParams weightParams = 20;
}
message CustomLayerParams {
message CustomLayerParamValue {
oneof value {
double doubleValue = 10;
string stringValue = 20;
int32 intValue = 30;
int64 longValue = 40;
bool boolValue = 50;
}
}
string className = 10; // The name of the class (conforming to MLCustomLayer) corresponding to this layer
repeated WeightParams weights = 20; // Any weights -- these are serialized in binary format and memmapped at runtime
map<string, CustomLayerParamValue> parameters = 30; // these may be handled as strings, so this should not be large
string description = 40; // An (optional) description of the layer provided by the model creator. This information is displayed when viewing the model, but does not affect the model's execution on device.
}
/**
* A layer that rearranges the dimensions and data of an input.
*
* .. code::
*
* y = TransposeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor.
* Output
* A N-Dimensional tensor of the same rank but with dimensions and data permuted according to axes.
* Shape: ``[InputShape[axis[0]], InputShape[axis[1]], ... , InputShape[axis[N-1]]]``
*
* Examples:
*
* - If ``axes`` is set to ``[3, 1, 2, 0]`` and the input shape is ``[6,7,8,9]``,
* then the output has shape ``[9,7,8,6]``
*/
message TransposeLayerParams {
/**
* Length of "axes" should match the rank of input & output tensor
* "axes" should be a permutation of "[0,1,2,...,N-1]" where N is the rank.
*/
repeated uint64 axes = 1; //
}
/**
* A layer that computes the matrix multiplication of two tensors with numpy-like broadcasting
* where the matrices reside in the last two indices of the tensor.
*
* .. code::
*
* y = BatchedMatMul(a,b)
*
* Requires 1 or 2 inputs and produces 1 output.
*
* The first tensor, "a", must be provided as an input. The second tensor can either be an input or provided as a weight matrix parameter.
*
* Input
* - a: First N-Dimensional tensor
* - b: Second N-Dimensional tensor (either a rank-N input or a matrix, i.e. N=2, provided as a layer parameter)
*
* Output
* A tensor containing the matrix product of two tensors.
* When there are two inputs: rank is max(2, rank(a), rank(b))
* When there is one input: rank is same as that of the input.
*
* This operation behaves as following:
*
* When there are two inputs:
* - If N >= 2 for both tensors, it is treated as a batch of matrices residing in the last two indices.
* All the indices, except for the last two, are broadcasted using conventional rules.
* - If the first tensor is 1-D, it is converted to a 2-D tensor by prepending a 1 to its shape. Eg. (D) -> (1,D)
* - If the second tensor is 1-D, it is converted to a 2-D tensor by appending a 1 to its shape. Eg. (D) -> (D,1)
*
* When there is one input:
* - The weight matrix corresponds to a matrix, of shape (X1, X2). Values of X1, X2 must be provided as layer parameters.
* - The input, "a", is reshaped into a matrix by combining all the leading dimensions, except the last, into a batch dimension. eg:
* - if "a" is rank 1 (X1,) --> (1, X1). Output shape will be (X2,)
* - if "a" is rank 2 (B1, X1) --> no need to reshape. Output shape will be (B1, X2)
* - if "a" is rank 3 (B1, B2, X1) --> (B1 * B2, X1). Output shape will be (B1, B2, X2)
* - etc
*/
message BatchedMatMulLayerParams {
/**
* If transposeA is true, it transposes the left matrix on the fly before matrix multiplication.
* (is ignored when there is one input)
*/
bool transposeA = 1;
/**
* If transposeB is true, it transposes the right matrix on the fly before matrix multiplication.
* (is ignored when there is one input)
*/
bool transposeB = 2;
/*
* Following parameters are ignored when there are two inputs.
*/
uint64 weightMatrixFirstDimension = 5; /// X1: same as the last dimension of the input tensor
uint64 weightMatrixSecondDimension = 6; /// X2: same as the last dimension of the output tensor
bool hasBias = 7; /// Whether a bias is added or not. Supported only when there is one input.
/*
* Weight matrix representing shape [X1, X2].
* Values are however stored in column major order,
* in the "repeated float" or "bytes" fields of the message "WeightParams"
*/
WeightParams weights = 8;
WeightParams bias = 9; /// Bias vector [X2]. Supported only when there is one input.
}
/**
* A layer that concatenates a list of tensors along a specified axis.
*
* .. code::
*
* y = ConcatNDLayer(x1,x2,....)
*
* Requires at least 1 input and produces 1 output.
*
* Input
* A Sequence of N-dimensional tensors. The rank of the input tensors must match and all dimensions except 'axis' must be equal.
* Output
* A N-Dimensional tensor with the same rank .
*
*
*/
message ConcatNDLayerParams {
/**
* Dimension along which to concatenate. Supports negative values of the parameter 'axis'.
*/
int64 axis = 1;
}
/**
* A layer that performs softmax normalization along a specified axis.
*
* .. code::
*
* y = SoftmaxNDLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor.
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message SoftmaxNDLayerParams {
/**
* Dimension on which the softmax would be performed. Supports negative values of the parameter 'axis'.
*/
int64 axis = 1;
}
/**
* A layer that reverses specific dimensions of the input tensor.
*
* Requires 1 input and produces 1 output.
*/
message ReverseLayerParams {
/**
* Reverses each dimension of the input tensor for which corresponding reverseDim is set to True.
* Requires len(reverseDim) == rank(inputTensor)
*/
repeated bool reverseDim = 1;
}
/**
* A layer that reverses variable length slices.
*
* Requires 2 inputs and produces 1 output.
*
*/
message ReverseSeqLayerParams {
int64 batchAxis = 1; // batch axis has to be strictly less than seq_axis
int64 sequenceAxis = 2;
}
/**
* A layer that loads data as a parameter and provides it as an output.
*
* .. code::
*
* y = LoadConstantNDLayer()
*
* Requires no input and produces 1 output.
*
* Output:
* A blob whose rank is between 1 to 4.
*/
message LoadConstantNDLayerParams {
/**
* The shape of the constant to be loaded.
*/
repeated uint64 shape = 1;
WeightParams data = 2;
}
/**
* A layer that fills the tensor with a scalar value.
*
* Requires 1 input and produces 1 output.
*
* .. code::
*
* y = FillLikeLayer(x)
*
* Input
* A N-Dimensional tensor.
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*/
message FillLikeLayerParams {
float value = 1;
}
/**
* A layer that fills the tensor with a scalar value.
*
* Requires no input and produces 1 output.
*
* .. code::
*
* y = FillStaticLayer(x)
*
* Output
* A N-Dimensional tensor of shape targetShape.
*
*/
message FillStaticLayerParams {
float value = 1;
repeated uint64 targetShape = 2;
}
/**
* A layer that fills the tensor with a scalar value.
*
* Requires 1 input and produces 1 output.
*
* .. code::
*
* y = FillDynamicLayer(x)
*
* Input
* A N-Dimensional tensor.
*
* Output
* A N-Dimensional tensor with the same shape specified in the input tensor.
*
*/
message FillDynamicLayerParams {
float value = 1;
}
/**
* A layer that returns the elements either from tensor x or tensor y,
* depending on the value in the condition tensor.
*
* .. code::
*
* y = WhereBroadcastableLayer(x)
*
* Requires 3 inputs and produces 1 output.
*
*
*
*/
message WhereBroadcastableLayerParams {
}
/**
* A layer that computes elementwise trigonometric sine function.
*
*
* .. code::
*
* y = SinLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message SinLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = CosLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message CosLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = TanLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message TanLayerParams {
}
/**
* A layer that computes elementwise trigonometric sine function.
*
*
* .. code::
*
* y = AsinLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message AsinLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = AcosLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message AcosLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = AtanLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message AtanLayerParams {
}
/**
* A layer that computes elementwise trigonometric sine function.
*
*
* .. code::
*
* y = SinhLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message SinhLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = CoshLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message CoshLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = TanhLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message TanhLayerParams {
}
/**
* A layer that computes elementwise trigonometric sine function.
*
*
* .. code::
*
* y = AsinhLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message AsinhLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = AcoshLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message AcoshLayerParams {
}
/**
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = AtanhLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message AtanhLayerParams {
}
/**
* A layer that raises each element in first tensor to the power of
* corresponding element in the second tensor.
* Supports conventional numpy-like broadcasting.
*
* .. code::
*
* y = PowBroadcastableLayer(x)
*
* Requires 2 inputs and produces 1 output.
*
* Input
* - First N-Dimensional tensor
* - Second N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the broadcast shape.
*
*/
message PowBroadcastableLayerParams {
}
/**
* A layer that computes the exponential of all elements in the input tensor.
*
*
* .. code::
*
* y = Exp2Layer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*
*/
message Exp2LayerParams {
}
/**
* A layer that returns a tensor containing the indices of all non-zero
* elements of input tensor.
*
* Requires 1 input and produces 1 output.
*
*/
message WhereNonZeroLayerParams {
}
/**
* A layer that copies a tensor setting everything outside a central band in
* each inner-most matrix to zero.
*
* Requires 1 input and produces 1 output.
*
* Parameters for matrix_band_part layer
* band(m, n) = (num_lower < 0 || (m-n) <= num_lower) && (num_upper < 0 || (n-m) <= num_upper).
* output[i, j, k, ..., m, n] = band(m, n) * input[i, j, k, ..., m, n]
*/
message MatrixBandPartLayerParams {
int64 numLower = 1;
int64 numUpper = 2;
}
/**
* A layer that copies a tensor setting everything outside upper triangular to zero.
*
* Requires 1 input and produces 1 output.
*/
message UpperTriangularLayerParams {
int64 k = 1; // Diagonal below which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above
}
/**
* A layer that copies a tensor setting everything outside lower triangular to zero.
*
* Requires 1 input and produces 1 output.
*/
message LowerTriangularLayerParams {
int64 k = 1; // Diagonal above which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above
}
/**
*
* A layer that broadcasts a tensor to a new shape.
*
*
* .. code::
*
* y = BroadcastToLikeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor.
*
* Output
* A N-Dimensional tensor with the same shape as input tensor.
*
*/
message BroadcastToLikeLayerParams {
}
/**
*
* A layer that broadcasts a tensor to a new shape.
*
*
* .. code::
*
* y = BroadcastToStaticLayer(x)
*
* Requires 2 inputs and produces 1 output.
*
* Input
* A N-Dimensional tensor.
* A N-Dimensional tensor used for getting target shape.
*
* Output
* A N-Dimensional tensor of shape targetShape.
*
*/
message BroadcastToStaticLayerParams {
repeated uint64 targetShape = 1;
}
/**
*
* A layer that broadcasts a tensor to a new shape.
*
* .. code::
*
* y = BroadcastToDynamicLayer(x)
*
* Requires 2 inputs and produces 1 output.
*
* Input
*
* Output
*
*
*/
message BroadcastToDynamicLayerParams {
}
/**
* A layer that performs element-wise addition operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.