Skip to content

Commit

Permalink
Implement horizontal fusion.
Browse files Browse the repository at this point in the history
 - It reduces kernel launch overhead and increases lauch dims by horizontally
   fusing indepedent computations.
  • Loading branch information
trentlo committed Dec 5, 2019
1 parent b06d620 commit b33c788
Show file tree
Hide file tree
Showing 5 changed files with 859 additions and 0 deletions.
35 changes: 35 additions & 0 deletions tensorflow/compiler/xla/service/gpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,7 @@ cc_library(
":gpu_layout_assignment",
":gpu_sanitize_constant_names",
":gpu_scatter_expander",
":horizontal_fusion",
":instruction_fusion",
":ir_emission_utils",
":ir_emitter",
Expand Down Expand Up @@ -1686,3 +1687,37 @@ cc_library(
"@com_google_absl//absl/types:optional",
],
)

cc_library(
name = "horizontal_fusion",
srcs = ["horizontal_fusion.cc"],
hdrs = ["horizontal_fusion.h"],
deps = [
"//tensorflow/compiler/xla/service:hlo",
"//tensorflow/compiler/xla/service:hlo_creation_utils",
"//tensorflow/compiler/xla/service:hlo_pass",
"//tensorflow/compiler/xla/service:hlo_reachability",
"//tensorflow/compiler/xla:shape_util",
"//tensorflow/compiler/xla:util",
"//tensorflow/core:lib",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/strings",
],
)

tf_cc_test(
name = "horizontal_fusion_test",
srcs = ["horizontal_fusion_test.cc"],
deps = [
":fusion_merger",
":horizontal_fusion",
":instruction_fusion",
":multi_output_fusion",
"//tensorflow/compiler/jit:xla_gpu_jit",
"//tensorflow/compiler/xla/tests:hlo_test_base",
"//tensorflow/compiler/xla/tests:test_utils",
"//tensorflow/compiler/xla/tests:xla_internal_test_main",
"//tensorflow/core:lib",
"//tensorflow/core:test",
],
)
8 changes: 8 additions & 0 deletions tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_sanitize_constant_names.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_scatter_expander.h"
#include "tensorflow/compiler/xla/service/gpu/horizontal_fusion.h"
#include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
#include "tensorflow/compiler/xla/service/gpu/ir_emitter_context.h"
Expand Down Expand Up @@ -265,6 +266,13 @@ Status GpuCompiler::OptimizeHloModule(
/*only_fusion_computations=*/true);
fusion.AddPass<HloDCE>();
TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status());

HloPassFix<HloPassPipeline> horizontal_fusion("horizontal_fusion");
horizontal_fusion.AddPass<GpuHorizontalFusion>();
horizontal_fusion.AddPass<HloCSE>(/*is_layout_sensitive=*/true,
/*only_fusion_computations=*/true);
horizontal_fusion.AddPass<HloDCE>();
TF_RETURN_IF_ERROR(horizontal_fusion.Run(hlo_module).status());
}

return Status::OK();
Expand Down
Loading

0 comments on commit b33c788

Please sign in to comment.