Adds video support to color based augmentations (NVIDIA#3580)

Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com> Co-authored-by: Joaquin Anton <janton@nvidia.com>
cyyever · Jan 23, 2022 · 18973f4 · 18973f4
1 parent d55d7fc
commit 18973f4
Show file tree

Hide file tree

Showing 13 changed files with 380 additions and 697 deletions.
diff --git a/dali/benchmark/color_twist_bench.cc b/dali/benchmark/color_twist_bench.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -47,34 +47,9 @@ BENCHMARK_DEFINE_F(OperatorBench, ColorTwistGPU)(benchmark::State& st) {
     batch_size, H, W, C);
 }
 
-BENCHMARK_DEFINE_F(OperatorBench, OldColorTwistGPU)(benchmark::State& st) {
-  int batch_size = st.range(0);
-  int H = st.range(1);
-  int W = st.range(1);
-  int C = 3;
-
-  this->RunGPU<uint8_t>(
-    st,
-    OpSpec("OldColorTwist")
-      .AddArg("max_batch_size", batch_size)
-      .AddArg("num_threads", 1)
-      .AddArg("device", "gpu")
-      .AddArg("output_type", DALI_RGB)
-      .AddArg("brightness", kArgs.bri)
-      .AddArg("contrast", kArgs.con)
-      .AddArg("hue", kArgs.hue)
-      .AddArg("saturation", kArgs.sat),
-    batch_size, H, W, C);
-}
-
 BENCHMARK_REGISTER_F(OperatorBench, ColorTwistGPU)->Iterations(1000)
 ->Unit(benchmark::kMicrosecond)
 ->UseRealTime()
 ->Ranges({{1, 128}, {128, 2048}});
 
-BENCHMARK_REGISTER_F(OperatorBench, OldColorTwistGPU)->Iterations(1000)
-->Unit(benchmark::kMicrosecond)
-->UseRealTime()
-->Ranges({{1, 128}, {128, 2048}});
-
 }  // namespace dali
diff --git a/dali/operators/image/color/brightness_contrast.cc b/dali/operators/image/color/brightness_contrast.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,13 +16,6 @@
 #include "dali/kernels/imgproc/pointwise/multiply_add.h"
 
 namespace dali {
-namespace {
-
-template <typename Out, typename In>
-using TheKernel = kernels::MultiplyAddCpu<Out, In, 3>;
-
-}  // namespace
-
 
 DALI_SCHEMA(Brightness)
     .DocStr(R"code(Adjusts the brightness of the images.
@@ -47,7 +40,10 @@ the type.)code",
     .AddOptionalArg("dtype",
                     R"code(Output data type.
 
-If not set, the input type is used.)code", DALI_NO_TYPE);
+If not set, the input type is used.)code", DALI_NO_TYPE)
+    .AllowSequences()
+    .SupportVolumetric()
+    .InputLayout({"FHWC", "DHWC", "HWC"});
 
 DALI_SCHEMA(Contrast)
     .DocStr(R"code(Adjusts the contrast of the images.
@@ -70,7 +66,10 @@ the half of the input type's positive range (or 0.5 for ``float``) is used.)code
     .AddOptionalArg("dtype",
                     R"code(Output data type.
 
-If not set, the input type is used.)code", DALI_NO_TYPE);
+If not set, the input type is used.)code", DALI_NO_TYPE)
+    .AllowSequences()
+    .SupportVolumetric()
+    .InputLayout({"FHWC", "DHWC", "HWC"});
 
 DALI_SCHEMA(BrightnessContrast)
     .AddParent("Brightness")
@@ -85,60 +84,69 @@ Where the output_range is 1 for float outputs or the maximum positive value for
 
 This operator can also change the type of data.)code")
     .NumInput(1)
-    .NumOutput(1);
+    .NumOutput(1)
+    .AllowSequences()
+    .SupportVolumetric()
+    .InputLayout({"FHWC", "DHWC", "HWC"});
 
 DALI_REGISTER_OPERATOR(BrightnessContrast, BrightnessContrastCpu, CPU)
 DALI_REGISTER_OPERATOR(Brightness, BrightnessContrastCpu, CPU);
 DALI_REGISTER_OPERATOR(Contrast, BrightnessContrastCpu, CPU);
 
 
-bool BrightnessContrastCpu::SetupImpl(std::vector<OutputDesc> &output_desc,
-                                      const workspace_t<CPUBackend> &ws) {
-  KMgrResize(num_threads_, max_batch_size_);
-  const auto &input = ws.template Input<CPUBackend>(0);
-  const auto &output = ws.template Output<CPUBackend>(0);
-  output_desc.resize(1);
-  AcquireArguments(ws);
-  TYPE_SWITCH(input.type(), type2id, InputType, (uint8_t, int16_t, int32_t, float), (
-      TYPE_SWITCH(output_type_, type2id, OutputType, (uint8_t, int16_t, int32_t, float), (
-          {
-              using Kernel = TheKernel<OutputType, InputType>;
-              kernel_manager_.Initialize<Kernel>();
-              auto shapes = CallSetup<Kernel, InputType>(input);
-              output_desc[0] = {shapes, output_type_};
-          }
-      ), DALI_FAIL(make_string("Unsupported output type: ", output_type_)))  // NOLINT
-  ), DALI_FAIL(make_string("Unsupported input type: ", input.type())))  // NOLINT
-  return true;
-}
-
-
-void BrightnessContrastCpu::RunImpl(workspace_t<CPUBackend> &ws) {
+template <typename OutputType, typename InputType>
+void BrightnessContrastCpu::RunImplHelper(workspace_t<CPUBackend> &ws) {
   const auto &input = ws.template Input<CPUBackend>(0);
   auto &output = ws.template Output<CPUBackend>(0);
   output.SetLayout(input.GetLayout());
   auto out_shape = output.shape();
   auto& tp = ws.GetThreadPool();
-  TYPE_SWITCH(input.type(), type2id, InputType, (uint8_t, int16_t, int32_t, float), (
-      TYPE_SWITCH(output_type_, type2id, OutputType, (uint8_t, int16_t, int32_t, float), (
-          {
-              using Kernel = TheKernel<OutputType, InputType>;
-              for (int sample_id = 0; sample_id < input.shape().num_samples(); sample_id++) {
-                tp.AddWork([&, sample_id](int thread_id) {
-                    kernels::KernelContext ctx;
-                    auto tvin = view<const InputType, 3>(input[sample_id]);
-                    auto tvout = view<OutputType, 3>(output[sample_id]);
-                    float add, mul;
-                    OpArgsToKernelArgs<OutputType, InputType>(add, mul,
-                      brightness_[sample_id], brightness_shift_[sample_id], contrast_[sample_id]);
-                    kernel_manager_.Run<Kernel>(thread_id, sample_id, ctx, tvout, tvin,
-                                                add, mul);
-                }, out_shape.tensor_size(sample_id));
-              }
-          }
-      ), DALI_FAIL(make_string("Unsupported output type: ", output_type_)))  // NOLINT
-  ), DALI_FAIL(make_string("Unsupported input type: ", input.type())))  // NOLINT
+  TensorListShape<> sh = input.shape();
+  auto num_dims = sh.sample_dim();
+  int num_samples = input.shape().num_samples();
+
+  using Kernel = kernels::MultiplyAddCpu<OutputType, InputType, 3>;
+  kernel_manager_.Initialize<Kernel>();
+
+  for (int sample_id = 0; sample_id < num_samples; sample_id++) {
+    auto sample_shape = out_shape.tensor_shape_span(sample_id);
+    auto vol = volume(sample_shape.begin() + num_dims - 3, sample_shape.end());
+    float add, mul;
+    OpArgsToKernelArgs<OutputType, InputType>(add, mul,
+                                              brightness_[sample_id],
+                                              brightness_shift_[sample_id],
+                                              contrast_[sample_id]);
+    if (num_dims == 4) {
+      int num_frames = sample_shape[0];
+      for (int frame_id = 0; frame_id < num_frames; frame_id++) {
+        tp.AddWork([&, sample_id, frame_id, add, mul](int thread_id) {
+          kernels::KernelContext ctx;
+          auto tvin = subtensor(view<const InputType, 4>(input[sample_id]), frame_id);
+          auto tvout = subtensor(view<OutputType, 4>(output[sample_id]), frame_id);
+          kernel_manager_.Run<Kernel>(thread_id, 0, ctx, tvout, tvin, add, mul);
+        }, vol);
+      }
+    } else {
+      tp.AddWork([&, sample_id, add, mul](int thread_id) {
+        kernels::KernelContext ctx;
+        auto tvin = view<const InputType, 3>(input[sample_id]);
+        auto tvout = view<OutputType, 3>(output[sample_id]);
+        kernel_manager_.Run<Kernel>(thread_id, 0, ctx, tvout, tvin, add, mul);
+      });
+    }
+  }
   tp.RunAll();
 }
 
+void BrightnessContrastCpu::RunImpl(workspace_t<CPUBackend> &ws) {
+  const auto &input = ws.template Input<CPUBackend>(0);
+  TYPE_SWITCH(input.type(), type2id, InputType, BRIGHTNESS_CONTRAST_SUPPORTED_TYPES, (
+    TYPE_SWITCH(output_type_, type2id, OutputType, BRIGHTNESS_CONTRAST_SUPPORTED_TYPES, (
+      {
+        RunImplHelper<OutputType, InputType>(ws);
+      }
+    ), DALI_FAIL(make_string("Unsupported output type: ", output_type_)))  // NOLINT
+  ), DALI_FAIL(make_string("Unsupported input type: ", input.type())))  // NOLINT
+}
+
 }  // namespace dali
diff --git a/dali/operators/image/color/brightness_contrast.cu b/dali/operators/image/color/brightness_contrast.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,62 +17,56 @@
 #include "dali/kernels/imgproc/pointwise/multiply_add_gpu.h"
 
 namespace dali {
-namespace {
-
-template <typename Out, typename In>
-using TheKernel = kernels::MultiplyAddGpu<Out, In, 3>;
-
-}  // namespace
 
 DALI_REGISTER_OPERATOR(BrightnessContrast, BrightnessContrastGpu, GPU)
 DALI_REGISTER_OPERATOR(Brightness, BrightnessContrastGpu, GPU);
 DALI_REGISTER_OPERATOR(Contrast, BrightnessContrastGpu, GPU);
 
-
-bool BrightnessContrastGpu::SetupImpl(std::vector<OutputDesc> &output_desc,
-                                      const workspace_t<GPUBackend> &ws) {
-  KMgrResize(num_threads_, max_batch_size_);
+template <typename OutputType, typename InputType>
+void BrightnessContrastGpu::RunImplHelper(workspace_t<GPUBackend> &ws) {
   const auto &input = ws.template Input<GPUBackend>(0);
-  const auto &output = ws.template Output<GPUBackend>(0);
-  output_desc.resize(1);
-  AcquireArguments(ws);
-  int N = input.num_samples();
-  addends_.resize(N);
-  multipliers_.resize(N);
-  TYPE_SWITCH(input.type(), type2id, InputType, (uint8_t, int16_t, int32_t, float), (
-      TYPE_SWITCH(output_type_, type2id, OutputType, (uint8_t, int16_t, int32_t, float), (
-          {
-              using Kernel = TheKernel<OutputType, InputType>;
-              kernel_manager_.Initialize<Kernel>();
-              auto &shapes = CallSetup<Kernel, InputType>(ws, input);
-              output_desc[0] = {shapes, output_type_};
-          }
-      ), DALI_FAIL(make_string("Unsupported output type: ", output_type_)))  // NOLINT
-  ), DALI_FAIL(make_string("Unsupported input type: ", input.type())))  // NOLINT
-  return true;
-}
+  auto &output = ws.template Output<GPUBackend>(0);
+  output.SetLayout(input.GetLayout());
+  auto sh = input.shape();
+  int num_samples = input.num_samples();
+  auto num_dims = sh.sample_dim();
 
+  addends_.resize(num_samples);
+  multipliers_.resize(num_samples);
+  for (int i = 0; i < num_samples; i++) {
+    OpArgsToKernelArgs<OutputType, InputType>(addends_[i], multipliers_[i],
+                                              brightness_[i], brightness_shift_[i],
+                                              contrast_[i]);
+  }
+
+  TensorListView<StorageGPU, const InputType, 3> tvin;
+  TensorListView<StorageGPU, OutputType, 3> tvout;
+  if (num_dims == 4) {
+    auto collapsed_sh = collapse_dim(view<const InputType, 4>(input).shape, 0);
+    tvin = reinterpret<const InputType, 3>(view<const InputType, 4>(input), collapsed_sh, true);
+    tvout = reinterpret<OutputType, 3>(view<OutputType, 4>(output), collapsed_sh, true);
+  } else {
+    tvin = view<const InputType, 3>(input);
+    tvout = view<OutputType, 3>(output);
+  }
+
+  using Kernel = kernels::MultiplyAddGpu<OutputType, InputType, 3>;
+  kernels::KernelContext ctx;
+  ctx.gpu.stream = ws.stream();
+  kernel_manager_.Initialize<Kernel>();
+
+  kernel_manager_.Setup<Kernel>(0, ctx, tvin, brightness_, contrast_);
+  kernel_manager_.Run<Kernel>(0, 0, ctx, tvout, tvin, addends_, multipliers_);
+}
 
 void BrightnessContrastGpu::RunImpl(workspace_t<GPUBackend> &ws) {
   const auto &input = ws.template Input<GPUBackend>(0);
-  auto &output = ws.template Output<GPUBackend>(0);
-  output.SetLayout(input.GetLayout());
-  TYPE_SWITCH(input.type(), type2id, InputType, (uint8_t, int16_t, int32_t, float), (
-      TYPE_SWITCH(output_type_, type2id, OutputType, (uint8_t, int16_t, int32_t, float), (
-          {
-              using Kernel = TheKernel<OutputType, InputType>;
-              kernels::KernelContext ctx;
-              ctx.gpu.stream = ws.stream();
-              auto tvin = view<const InputType, 3>(input);
-              auto tvout = view<OutputType, 3>(output);
-              for (int i = 0; i < tvin.num_samples(); i++) {
-                OpArgsToKernelArgs<OutputType, InputType>(addends_[i], multipliers_[i],
-                      brightness_[i], brightness_shift_[i], contrast_[i]);
-              }
-              kernel_manager_.Run<Kernel>(ws.thread_idx(), 0, ctx, tvout, tvin,
-                                          addends_, multipliers_);
-          }
-      ), DALI_FAIL(make_string("Unsupported output type: ", output_type_)))  // NOLINT
+  TYPE_SWITCH(input.type(), type2id, InputType, BRIGHTNESS_CONTRAST_SUPPORTED_TYPES, (
+    TYPE_SWITCH(output_type_, type2id, OutputType, BRIGHTNESS_CONTRAST_SUPPORTED_TYPES, (
+      {
+        RunImplHelper<OutputType, InputType>(ws);
+      }
+    ), DALI_FAIL(make_string("Unsupported output type: ", output_type_)))  // NOLINT
   ), DALI_FAIL(make_string("Unsupported input type: ", input.type())))  // NOLINT
 }