forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
partition_ops.cc
157 lines (138 loc) · 5.33 KB
/
partition_ops.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#include "caffe2/operators/partition_ops.h"
namespace caffe2 {
namespace {
REGISTER_CPU_OPERATOR(Partition, PartitionOp);
REGISTER_CPU_OPERATOR(LengthsPartition, LengthsPartitionOp);
REGISTER_CPU_OPERATOR(GatherByKey, GatherByKeyOp);
OPERATOR_SCHEMA(GatherByKey)
.NumInputs(2, INT_MAX)
.NumOutputs(1)
.SetDoc(R"DOC(
Inverse operation of Partition.
Takes the original, full 'keys' tensor followed by sharded value tensors,
and returns the full value tensor, combined using the same hash used in
Partition.
)DOC")
.Input(
0,
"keys",
"The first input is the full keys tensor"
" (same as the first input of Partition).")
.Input(
1,
"sharded_values",
"Subsequented inputs are sharded values tensors.")
.Output(0, "values", "Reconstructed values tensor.");
OPERATOR_SCHEMA(Partition)
.NumInputsOutputs([](int in, int out) {
return in > 0 && out > 0 && out % in == 0;
})
.SetDoc(R"DOC(
Splits the input int tensor into multiple ones according to the first tensor.
Takes the first input and partitions it to shards according to the remainder of
values modulo the number of partitions. It requires that the first tensor is of
integral type. The number of partitions is derived as (num_output / num_input).
If additional inputs are present they must have the same shape as the first
input, optionally with extra trailing dimensions. They will be partitioned
accordingly to the first input.
Optional arg 'pack_first_input' transforms the first tensor values as
X_ij / num_partitions.
Outputs are ordered as
X_0_part_0, X_1_part_0, ..., X_N-1_part_0, X_0_part_1, ..., X_N-1_part_K-1
)DOC")
.Arg(
"pack_first_input",
"(int, default 0) If set, the operator transforms "
"the first tensor values as floor(X_ij / num_partitions)")
.Input(
0,
"input",
"Input tensor containing data to be partitioned. The "
"number of input tensors might be greater than 1 but must have the "
"same shape as the previous tensors.")
.Output(
0,
"partitions",
"Output Partitions. The number of output tensors has to be a "
"multiple of the number of input tensors.");
OPERATOR_SCHEMA(LengthsPartition)
.NumInputsOutputs([](int in, int out) {
return in >= 2 && out > 0 && out % in == 0;
})
.SetDoc(R"DOC(
LengthsPartition splits the input int tensor into multiple ones according to the
second tensor. The first dimension is expected to be the tensor that describes
lengths of the elements.
Takes the second input and partitions it to shards according to the remainder of
values modulo the number of partitions. It requires the second tensor to be
a 1D-tensor of the integral type. The first tensor should be 1D-tensor of int32
that would represent the lengths of the elements in the input. The number of
partitions is derived as (num_output / num_input).
If additional inputs are present they must have the same shape as the first
input, optionally with extra trailing dimensions. They will be partitioned
accordingly to the first input.
Optional arg 'pack_first_input' transforms the first tensor values as
X_ij / num_partitions.
Outputs are ordered as
X_0_part_0, X_1_part_0, ..., X_N-1_part_0, X_0_part_1, ..., X_N-1_part_K-1
)DOC")
.Arg(
"pack_first_input",
"(int, default 0) If set, the operator transforms "
"the first tensor values as floor(X_ij / num_partitions)")
.Input(
0,
"input",
"Input tensor containing data to be partitioned. The "
"number of input tensors might be greater than 1 but must have the "
"same shape as the previous tensors.")
.Output(
0,
"partitions",
"Output Partitions. The number of output tensors has to be a "
"multiple of the number of input tensors.");
namespace {
class GetGatherByKeyGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
std::vector<OperatorDef> GetGradientDefs() override {
ArgumentHelper argsHelper(def_);
auto pack_first_input =
argsHelper.GetSingleArgument<int>("pack_first_input", 0);
Argument packArg = MakeArgument<int>("pack_first_input", pack_first_input);
if (g_output_[0].IsDense()) {
std::vector<std::string> inputs;
for (int i = 1; i < g_input_.size(); ++i) {
inputs.push_back("_" + GI(i) + "_keys");
inputs.push_back(GI(i));
}
return SingleGradientDef(
"Partition",
"",
std::vector<std::string>{I(0), GO(0)},
inputs,
std::vector<Argument>{packArg});
} else {
std::vector<std::string> inputs;
for (int i = 1; i < g_input_.size(); ++i) {
inputs.push_back("_" + GI_I(i) + "_keys");
inputs.push_back(GI_I(i));
inputs.push_back(GI_V(i));
}
return SingleGradientDef(
"Partition",
"",
std::vector<std::string>{I(0), GO_I(0), GO_V(0)},
inputs,
std::vector<Argument>{packArg});
}
}
};
} // namespace
// This should actually have gradient, but for now nothing uses it.
// Because gradient computation right now is not input/output aware it can't be
// GRADIENT_NOT_IMPLEMENTEDYET
NO_GRADIENT(Partition);
NO_GRADIENT(LengthsPartition);
REGISTER_GRADIENT(GatherByKey, GetGatherByKeyGradient);
} // namespace
} // namespace caffe2