Skip to content

Commit

Permalink
Merged develop from [PaddlePaddle#40935] Bug fix for intermediate sup…
Browse files Browse the repository at this point in the history
…port in Yaml
  • Loading branch information
jim19930609 committed Mar 28, 2022
2 parents 62179ba + 27996fd commit 2a83dd9
Show file tree
Hide file tree
Showing 206 changed files with 11,376 additions and 1,338 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ paddle/phi/api/backward/backward_api.h
paddle/phi/api/backward/sparse_bw_api.h
paddle/phi/api/include/api.h
paddle/phi/api/include/sparse_api.h
paddle/phi/api/include/strings_api.h
paddle/phi/api/lib/api.cc
paddle/phi/api/lib/dygraph_api.*
paddle/phi/api/lib/backward_api.cc
paddle/phi/api/lib/sparse_api.cc
paddle/phi/api/lib/strings_api.cc
paddle/phi/api/lib/sparse_bw_api.cc
paddle/phi/extension.h
paddle/phi/include/*
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/cinn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ add_definitions(-w)
######################################
include(ExternalProject)
set(CINN_PREFIX_DIR ${THIRD_PARTY_PATH}/CINN)
set(CINN_GIT_TAG 56879b637e2c4db19091eedad03d7cc674e092a2)
set(CINN_GIT_TAG e11c5e672f9961e28cfa403d86f99808beb58817)
set(CINN_OPTIONAL_ARGS -DPY_VERSION=${PY_VERSION}
-DWITH_CUDA=${WITH_GPU}
-DWITH_CUDNN=${WITH_GPU}
Expand Down
97 changes: 97 additions & 0 deletions paddle/fluid/eager/amp_auto_cast.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/framework/convert_utils.h"

namespace egr {

static inline bool NeedCast(const paddle::experimental::Tensor& tensor,
const paddle::experimental::DataType& dst_dtype) {
auto place = tensor.inner_place();
auto data_type = tensor.dtype();
if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_mlu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place)) {
// CudaPinndePlace is added for varbase created by dataloader
if ((data_type == paddle::experimental::DataType::FLOAT32 ||
data_type == paddle::experimental::DataType::FLOAT16 ||
data_type == paddle::experimental::DataType::BFLOAT16) &&
(data_type != dst_dtype)) {
return true;
}
}
return false;
}

inline std::vector<paddle::experimental::Tensor> AmpAutoCasts(
const std::string& inputs_name,
const std::vector<paddle::experimental::Tensor>& inputs,
const paddle::experimental::DataType& dst_dtype, std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ").";
std::vector<paddle::experimental::Tensor> inputs_casted;
for (auto& input : inputs) {
if (NeedCast(input, dst_dtype)) {
paddle::framework::AttributeMap cast_attrs = {
{"in_dtype", paddle::framework::TransToProtoVarType(input.dtype())},
{"out_dtype", paddle::framework::TransToProtoVarType(dst_dtype)}};
inputs_casted.emplace_back(
std::move(cast_dygraph_function(input, cast_attrs)));
} else {
inputs_casted.emplace_back(input);
}
}
return inputs_casted;
}

inline paddle::experimental::Tensor AmpAutoCast(
const std::string& input_name, const paddle::experimental::Tensor& input,
const paddle::experimental::DataType& dst_dtype, std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:"
<< " input(" << input_name << ") dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ").";
if (dst_dtype == paddle::experimental::DataType::FLOAT16) {
if (op_name == "run_program") {
return input;
}
if ((op_name == "batch_norm" || op_name == "layer_norm" ||
op_name == "sync_batch_norm") &&
input_name != "X") {
return input;
}
if ((op_name == "fused_attention" || op_name == "fused_feedforward")) {
if (input_name == "LnScale" || input_name == "LnBias" ||
input_name == "Ln2Scale" || input_name == "Ln2Bias" ||
input_name == "Ln1Scale" || input_name == "Ln1Bias") {
return input;
}
}
}
if (NeedCast(input, dst_dtype)) {
paddle::framework::AttributeMap cast_attrs = {
{"in_dtype", paddle::framework::TransToProtoVarType(input.dtype())},
{"out_dtype", paddle::framework::TransToProtoVarType(dst_dtype)}};
return cast_dygraph_function(input, cast_attrs);
}
return input;
}

} // namespace egr
119 changes: 21 additions & 98 deletions paddle/fluid/eager/amp_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,27 @@
// limitations under the License.

#pragma once
#include <map>
#include <string>
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/amp_auto_cast.h"

namespace egr {

static inline paddle::experimental::DataType GetPromoteType(
const std::string& api_name,
const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>&
amp_tensors_vector,
const paddle::experimental::DataType& amp_dtype) {
auto dst_type = amp_dtype;
if (egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype() ==
"float16") {
if (api_name == "batch_norm" || api_name == "layer_norm" ||
api_name == "sync_batch_norm") {
if (op_name == "batch_norm" || op_name == "layer_norm" ||
op_name == "sync_batch_norm") {
if (amp_tensors_vector[0][0].dtype() ==
paddle::experimental::DataType::FLOAT32) {
dst_type = paddle::experimental::DataType::FLOAT32;
}
} else if (api_name == "fused_attention") {
} else if (op_name == "fused_attention") {
for (size_t i = 0; i < amp_tensors_vector.size(); i++) {
if (i != 3 || i != 4 || i != 9 || i != 10) {
if (amp_tensors_vector[i][0].dtype() ==
Expand All @@ -46,7 +43,7 @@ static inline paddle::experimental::DataType GetPromoteType(
}
}
}
} else if (api_name == "fused_feedforward") {
} else if (op_name == "fused_feedforward") {
for (size_t i = 0; i < amp_tensors_vector.size(); i++) {
if (i != 7 || i != 8 || i != 9 || i != 10) {
if (amp_tensors_vector[i][0].dtype() ==
Expand Down Expand Up @@ -78,7 +75,7 @@ static inline paddle::experimental::DataType GetPromoteType(
}
// NOTE(juncai): moving_average_abs_max_scale only consider the dtype of
// input(X)
if (api_name == "moving_average_abs_max_scale") {
if (op_name == "moving_average_abs_max_scale") {
if (amp_tensors_vector[0][0].dtype() ==
paddle::experimental::DataType::FLOAT16) {
dst_type = paddle::experimental::DataType::FLOAT16;
Expand All @@ -87,33 +84,33 @@ static inline paddle::experimental::DataType GetPromoteType(
return dst_type;
}

paddle::experimental::DataType GetAmpDestDtype(
const std::string& api_name,
inline paddle::experimental::DataType GetAmpDestDtype(
const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>&
amp_tensors_vector) {
auto amp_dtype =
egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype();
auto amp_level = egr::Controller::Instance().GetAMPLevel();
VLOG(6) << "AMP GetAmpDestDtype:"
<< " op(" << api_name << ") amp_dtype(" << amp_dtype << ") amp_level("
<< " op(" << op_name << ") amp_dtype(" << amp_dtype << ") amp_level("
<< static_cast<int>(amp_level) << ").";
if (amp_dtype == "float16") {
if (amp_level == paddle::imperative::AmpLevel::O1) {
if (paddle::imperative::AmpOperators::Instance()
.GetMutableAllowOps()
->count(api_name)) {
->count(op_name)) {
return paddle::experimental::DataType::FLOAT16;
} else if (paddle::imperative::AmpOperators::Instance()
.GetMutableBlockOps()
->count(api_name)) {
->count(op_name)) {
return paddle::experimental::DataType::FLOAT32;
} else {
auto dst_type = GetPromoteType(api_name, amp_tensors_vector,
auto dst_type = GetPromoteType(op_name, amp_tensors_vector,
paddle::experimental::DataType::FLOAT16);
if (dst_type == paddle::experimental::DataType::FLOAT16 &&
paddle::imperative::AmpOperators::Instance()
.GetMutableUnsupportedFp16Ops()
->count(api_name)) {
->count(op_name)) {
dst_type = paddle::experimental::DataType::FLOAT32;
}
return dst_type;
Expand All @@ -122,10 +119,10 @@ paddle::experimental::DataType GetAmpDestDtype(
auto dst_type = paddle::experimental::DataType::FLOAT16;
if (paddle::imperative::AmpOperators::Instance()
.GetMutableUnsupportedFp16Ops()
->count(api_name) ||
->count(op_name) ||
paddle::imperative::AmpOperators::Instance()
.GetMutableBlockOps()
->count(api_name)) {
->count(op_name)) {
dst_type = paddle::experimental::DataType::FLOAT32;
}
return dst_type;
Expand All @@ -134,20 +131,20 @@ paddle::experimental::DataType GetAmpDestDtype(
if (amp_level == paddle::imperative::AmpLevel::O1) {
if (paddle::imperative::AmpOperators::Instance()
.GetMutableAllowOps()
->count(api_name)) {
->count(op_name)) {
return paddle::experimental::DataType::BFLOAT16;
} else if (paddle::imperative::AmpOperators::Instance()
.GetMutableBlockOps()
->count(api_name)) {
->count(op_name)) {
return paddle::experimental::DataType::FLOAT32;
} else {
auto dst_type =
GetPromoteType(api_name, amp_tensors_vector,
GetPromoteType(op_name, amp_tensors_vector,
paddle::experimental::DataType::BFLOAT16);
if (dst_type == paddle::experimental::DataType::BFLOAT16 &&
paddle::imperative::AmpOperators::Instance()
.GetMutableUnsupportedBf16Ops()
->count(api_name)) {
->count(op_name)) {
dst_type = paddle::experimental::DataType::FLOAT32;
}
return dst_type;
Expand All @@ -156,10 +153,10 @@ paddle::experimental::DataType GetAmpDestDtype(
auto dst_type = paddle::experimental::DataType::BFLOAT16;
if (paddle::imperative::AmpOperators::Instance()
.GetMutableUnsupportedBf16Ops()
->count(api_name) ||
->count(op_name) ||
paddle::imperative::AmpOperators::Instance()
.GetMutableBlockOps()
->count(api_name)) {
->count(op_name)) {
dst_type = paddle::experimental::DataType::FLOAT32;
}
return dst_type;
Expand All @@ -168,78 +165,4 @@ paddle::experimental::DataType GetAmpDestDtype(
return paddle::experimental::DataType::FLOAT32;
}

static inline bool NeedCast(const paddle::experimental::Tensor& tensor,
const paddle::experimental::DataType& dst_dtype) {
auto place = tensor.inner_place();
auto data_type = tensor.dtype();
if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_mlu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place)) {
// CudaPinndePlace is added for varbase created by dataloader
if ((data_type == paddle::experimental::DataType::FLOAT32 ||
data_type == paddle::experimental::DataType::FLOAT16 ||
data_type == paddle::experimental::DataType::BFLOAT16) &&
(data_type != dst_dtype)) {
return true;
}
}
return false;
}

std::vector<paddle::experimental::Tensor> AmpAutoCasts(
const std::string& inputs_name,
const std::vector<paddle::experimental::Tensor>& inputs,
const paddle::experimental::DataType& dst_dtype, std::string api_name) {
VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ").";
std::vector<paddle::experimental::Tensor> inputs_casted;
for (auto& input : inputs) {
if (NeedCast(input, dst_dtype)) {
paddle::framework::AttributeMap cast_attrs = {
{"in_dtype", paddle::framework::TransToProtoVarType(input.dtype())},
{"out_dtype", paddle::framework::TransToProtoVarType(dst_dtype)}};
inputs_casted.emplace_back(
std::move(cast_dygraph_function(input, cast_attrs)));
} else {
inputs_casted.emplace_back(input);
}
}
return inputs_casted;
}

paddle::experimental::Tensor AmpAutoCast(
const std::string& input_name, const paddle::experimental::Tensor& input,
const paddle::experimental::DataType& dst_dtype, std::string api_name) {
VLOG(6) << "AMP AmpAutoCasts:"
<< " input(" << input_name << ") dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ").";
if (dst_dtype == paddle::experimental::DataType::FLOAT16) {
if (api_name == "run_program") {
return input;
}
if ((api_name == "batch_norm" || api_name == "layer_norm" ||
api_name == "sync_batch_norm") &&
input_name != "X") {
return input;
}
if ((api_name == "fused_attention" || api_name == "fused_feedforward")) {
if (input_name == "LnScale" || input_name == "LnBias" ||
input_name == "Ln2Scale" || input_name == "Ln2Bias" ||
input_name == "Ln1Scale" || input_name == "Ln1Bias") {
return input;
}
}
}
if (NeedCast(input, dst_dtype)) {
paddle::framework::AttributeMap cast_attrs = {
{"in_dtype", paddle::framework::TransToProtoVarType(input.dtype())},
{"out_dtype", paddle::framework::TransToProtoVarType(dst_dtype)}};
return cast_dygraph_function(input, cast_attrs);
}
return input;
}
} // namespace egr
1 change: 1 addition & 0 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2587,6 +2587,7 @@ static void GenerateForwardDygraphFile(const std::string& forward_cc_path,
"\"paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.h\"\n"
"#include \"paddle/fluid/eager/api/utils/global_utils.h\"\n"
"#include \"paddle/fluid/eager/amp_utils.h\"\n"
"#include \"paddle/fluid/eager/amp_auto_cast.h\"\n"
"#include \"paddle/fluid/platform/profiler/event_tracing.h\"\n\n";
std::string forward_cc_include_str =
paddle::string::Sprintf(FORWARD_INCLUDE_TEMPLATE);
Expand Down
Loading

0 comments on commit 2a83dd9

Please sign in to comment.