Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[Large Tensor] Fixed RNN op (#17632)
Browse files Browse the repository at this point in the history
* Changed relevant function args to index_t

* Added nightly test for RNN

* Added fix for LSTM, GRU, RNN-ReLU, RNN-tanh

* Using const instead of literals

* Added nightly test for RNN ReLU & tanh, LSTM, GRU

* Type assertion to force evaluation of output NDArray

* Incorporated latest round of comments
  • Loading branch information
connorgoggins committed Mar 4, 2020
1 parent 21b7fc0 commit 5cffa74
Show file tree
Hide file tree
Showing 3 changed files with 259 additions and 225 deletions.
40 changes: 20 additions & 20 deletions src/operator/rnn-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ struct RNNParam : public dmlc::Parameter<RNNParam> {
bool bidirectional, state_outputs;
int mode;
float p;
int seq_length_, batch_size_, input_size_;
index_t seq_length_, batch_size_, input_size_;

bool use_sequence_length;
dmlc::optional<int> projection_size;
Expand Down Expand Up @@ -122,8 +122,8 @@ struct RNNParam : public dmlc::Parameter<RNNParam> {
}
};

inline int GetRnnParamSize(int num_layer,
int input_size,
inline index_t GetRnnParamSize(int num_layer,
index_t input_size,
int state_size,
int direction,
int mode,
Expand All @@ -140,14 +140,14 @@ inline int GetRnnParamSize(int num_layer,
size *= 3;
break;
}
int size1 = (input_size + state_size + 2) * size; // first layer size
int size2 = (state_size * direction + state_size + 2) * size; // other layers size
index_t size1 = (input_size + state_size + 2) * size; // first layer size
index_t size2 = (state_size * direction + state_size + 2) * size; // other layers size
if (projection_size.has_value()) {
int proj_size = projection_size.value();
index_t proj_size = projection_size.value();
size1 = (input_size + proj_size + 2) * size;
size2 = (proj_size * direction + proj_size + 2) * size;
}
int param_size = size1 + (num_layer - 1) * size2;
index_t param_size = size1 + (num_layer - 1) * size2;
if (projection_size.has_value()) {
param_size += projection_size.value() * state_size * num_layer * direction;
}
Expand Down Expand Up @@ -182,8 +182,8 @@ inline int GetRnnBiasSize(int num_layer,
* - output -> h[t](, c[t] additionally with Lstm) time by time(sz: NxH(x2))
* - intermediate y[1...T] as next layer's inputs(sz: TxNxHxD)
*/
inline size_t GetRNNWorkspaceSize(int seq_length,
int batch_size,
inline size_t GetRNNWorkspaceSize(index_t seq_length,
index_t batch_size,
int hidden_size,
int projection_size,
int direction,
Expand Down Expand Up @@ -214,8 +214,8 @@ inline size_t GetRNNWorkspaceSize(int seq_length,

inline size_t GetRNNReserveSpaceSize(int num_layer,
int direction,
int seq_length,
int batch_size,
index_t seq_length,
index_t batch_size,
int hidden_size,
int mode) {
size_t size = 0;
Expand Down Expand Up @@ -279,9 +279,9 @@ void RNNForwardTraining(DType* ws,
bool state_outputs,
const int num_layers,
const int direction,
const int seq_length,
const int batch_size,
const int input_size,
const index_t seq_length,
const index_t batch_size,
const index_t input_size,
const int state_size,
DType* x_ptr,
DType* hx_ptr,
Expand Down Expand Up @@ -321,9 +321,9 @@ void RNNForwardInference(DType* ws,
bool state_outputs,
const int num_layers,
const int direction,
const int seq_length,
const int batch_size,
const int input_size,
const index_t seq_length,
const index_t batch_size,
const index_t input_size,
const int state_size,
const int projection_size,
DType* x_ptr,
Expand Down Expand Up @@ -363,9 +363,9 @@ void RNNBackward(DType* ws,
DType* rs,
const int num_layers,
const int direction,
const int seq_length,
const int batch_size,
const int input_size,
const index_t seq_length,
const index_t batch_size,
const index_t input_size,
const int state_size,
DType* x_ptr,
DType* hx_ptr,
Expand Down
Loading

0 comments on commit 5cffa74

Please sign in to comment.