forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lengths_tile_op.cc
97 lines (85 loc) · 2.75 KB
/
lengths_tile_op.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include "caffe2/operators/lengths_tile_op.h"
namespace caffe2 {
template <>
bool LengthsTileOp<CPUContext>::RunOnDevice() {
auto& data = Input(DATA);
auto& lengths = Input(LENGTHS);
auto* output = Output(0);
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be 1-D");
CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
CAFFE_ENFORCE_EQ(lengths.numel(), data.size(0));
// Context::CopyFrom and math::Sum need the same context to avoid race
// conditions
// why? CPUContext is not used in Sum
lengths_host_.CopyFrom(lengths); // sync copy
auto lengths_size = lengths_host_.numel();
auto* lengths_data = lengths_host_.data<int32_t>();
int32_t total_length = 0;
CPUContext cpuContext;
math::Sum<int32_t, CPUContext>(
lengths_size, lengths_data, &total_length, &cpuContext);
auto shape = data.sizes().vec();
shape[0] = total_length;
output->Resize(shape);
auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
auto src = static_cast<const char*>(data.raw_data());
auto out = static_cast<char*>(output->raw_mutable_data(data.dtype()));
for (int64_t i = 0; i < lengths_size; ++i) {
auto length = lengths_data[i];
CAFFE_ENFORCE_GE(length, 0);
for (int32_t j = 0; j < length; ++j) {
context_.CopyBytesSameDevice(block_bytesize, src, out);
out += block_bytesize;
}
src += block_bytesize;
}
return true;
}
REGISTER_CPU_OPERATOR(LengthsTile, LengthsTileOp<CPUContext>);
OPERATOR_SCHEMA(LengthsTile)
.NumInputs(2)
.NumOutputs(1)
.SetDoc(R"DOC(
Given DATA tensor of rank r >= 1, and LENGTHS tensor of rank 1, duplicate each
entry of the outer-most dimension of DATA according to LENGTHS, and concatenate
them in an output tensor of rank r.
Example:
DATA = [
[1.0, 1.2],
[2.3, 3.4],
[4.5, 5.7],
[6.8, 7.9],
]
LENGTHS = [0, 1, 3, 2]
OUTPUT = [
[2.3, 3.4],
[4.5, 5.7],
[4.5, 5.7],
[4.5, 5.7],
[6.8, 7.9],
[6.8, 7.9],
]
)DOC")
.Input(
0,
"DATA",
"Tensor of rank r >= 1. First dimension must be equal to the size of "
"lengths")
.Input(1, "LENGTHS", "Tensor of int32 lengths of rank 1")
.Output(0, "OUTPUT", "Tensor of rank r");
class GetLengthsTileGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
CAFFE_ENFORCE_EQ(def_.input_size(), 2);
return SingleGradientDef(
"LengthsSum",
"",
// input 1 is the lengths used to repeat
// DATA in the forward pass
vector<string>{GO(0), I(1)},
// only concerned with the gradient on "DATA"
vector<string>{GI(0)});
}
};
REGISTER_GRADIENT(LengthsTile, GetLengthsTileGradient);
} // namespace caffe2