Skip to content

Commit

Permalink
Merge pull request #5365 from halide/pdb_remove_hvx_v64
Browse files Browse the repository at this point in the history
Issue #3925 : Remove hvx_64
  • Loading branch information
pranavb-ca committed Oct 21, 2020
2 parents fc959e7 + d94e7a7 commit 31f1937
Show file tree
Hide file tree
Showing 71 changed files with 341 additions and 931 deletions.
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,6 @@ RUNTIME_LL_COMPONENTS = \
aarch64 \
arm \
arm_no_neon \
hvx_64 \
hvx_128 \
mips \
posix_math \
Expand Down
4 changes: 2 additions & 2 deletions apps/blur/halide_blur_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ class HalideBlur : public Halide::Generator<HalideBlur> {
default:
break;
}
} else if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
} else if (get_target().has_feature(Target::HVX)) {
// Hexagon schedule.
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
const int vector_size = 128;

blur_y.compute_root()
.hexagon()
Expand Down
29 changes: 11 additions & 18 deletions apps/camera_pipe/camera_pipe_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,8 @@ class Demosaic : public Halide::Generator<Demosaic> {
.unroll(c);
} else {
int vec = get_target().natural_vector_size(UInt(16));
bool use_hexagon = get_target().features_any_of({Target::HVX_64, Target::HVX_128});
if (get_target().has_feature(Target::HVX_64)) {
vec = 32;
} else if (get_target().has_feature(Target::HVX_128)) {
vec = 64;
}
bool use_hexagon = get_target().has_feature(Target::HVX);

for (Func f : intermediates) {
f.compute_at(intermed_compute_at)
.store_at(intermed_store_at)
Expand Down Expand Up @@ -305,7 +301,7 @@ Func CameraPipe::apply_curve(Func input) {

// How much to upsample the LUT by when sampling it.
int lutResample = 1;
if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
if (get_target().has_feature(Target::HVX)) {
// On HVX, LUT lookups are much faster if they are to LUTs not
// greater than 256 elements, so we reduce the tonemap to 256
// elements and use linear interpolation to upsample it.
Expand Down Expand Up @@ -504,26 +500,23 @@ void CameraPipe::generate() {
Expr out_width = processed.width();
Expr out_height = processed.height();

// In HVX 128, we need 2 threads to saturate HVX with work,
//and in HVX 64 we need 4 threads, and on other devices,
// we might need many threads.
// Depending on the HVX generation, we need 2 or 4 threads
// to saturate HVX with work. For simplicity, we'll just
// stick to 4 threads. On balance, the overhead should
// not be much for the 2 extra threads that we create
// on cores that have only two HVX contexts.
Expr strip_size;
if (get_target().has_feature(Target::HVX_128)) {
strip_size = processed.dim(1).extent() / 2;
} else if (get_target().has_feature(Target::HVX_64)) {
if (get_target().has_feature(Target::HVX)) {
strip_size = processed.dim(1).extent() / 4;
} else {
strip_size = 32;
}
strip_size = (strip_size / 2) * 2;

int vec = get_target().natural_vector_size(UInt(16));
if (get_target().has_feature(Target::HVX_64)) {
vec = 32;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vec = 64;
}

processed
.compute_root()
.reorder(c, x, y)
Expand Down Expand Up @@ -569,7 +562,7 @@ void CameraPipe::generate() {
demosaiced->intermed_store_at.set({processed, yo});
demosaiced->output_compute_at.set({curved, x});

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
if (get_target().has_feature(Target::HVX)) {
processed.hexagon();
denoised.align_storage(x, vec);
deinterleaved.align_storage(x, vec);
Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/conv3x3_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class Conv3x3 : public Generator<Conv3x3> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/dilate3x3_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class Dilate3x3 : public Generator<Dilate3x3> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/gaussian5x5_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class Gaussian5x5 : public Generator<Gaussian5x5> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/median3x3_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ class Median3x3 : public Generator<Median3x3> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/sobel_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class Sobel : public Generator<Sobel> {
input.dim(0).set_min(0);
input.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/AveragePool_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class AveragePool : public Generator<AveragePool> {
min(output_max_, max(output_min_, u8_sat(average(depth, x, y, batch))));

bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);
// Specifying .hexagon() on a Func will generate an RPC to run this stage
// on Hexagon. If Hexagon is the host (that is, the architecture is
// Hexagon), we have to omit the .hexagon() directive as we are already
Expand All @@ -90,9 +90,7 @@ class AveragePool : public Generator<AveragePool> {
}

int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}

Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/Convolution_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class Convolution : public Generator<Convolution> {
u8_sat(u16_sat(scaled_plus_offset(depth, x, y, batch)))));

const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);

// Specifying .hexagon() on a Func will generate an RPC to run this stage
// on Hexagon. If Hexagon is the host (that is, the architecture is
Expand All @@ -136,9 +136,7 @@ class Convolution : public Generator<Convolution> {

// Schedule for CPU and HVX.
int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}
// We only perform vectorization when the depth >= vector size.
Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/DepthwiseConvolution_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,11 @@ class DepthwiseConvolution : public Generator<DepthwiseConvolution> {

// The schedule.
int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}
const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);

// Specifying .hexagon() on a Func will generate an RPC to run this stage
// on Hexagon. If Hexagon is the host (that is, the architecture is
Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/Im2col_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,12 @@ class Im2col : public Generator<Im2col> {

// The schedule.
int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}

const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);
if (use_hexagon) {
output_.hexagon();
}
Expand Down
6 changes: 1 addition & 5 deletions apps/nn_ops/MatrixMultiply_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,7 @@ class MatrixMultiply : public Generator<MatrixMultiply> {
int vector_size_u8 = natural_vector_size<uint8_t>();
int vector_size_u32 = natural_vector_size<uint32_t>();
bool use_hexagon = false;
if (get_target().has_feature(Halide::Target::HVX_64)) {
vector_size_u8 = 64;
vector_size_u32 = 16;
use_hexagon = true;
} else if (get_target().has_feature(Halide::Target::HVX_128)) {
if (get_target().has_feature(Halide::Target::HVX)) {
vector_size_u8 = 128;
vector_size_u32 = 32;
use_hexagon = true;
Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/MaxPool_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,14 @@ class MaxPool : public Generator<MaxPool> {
// The schedule.

const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);

if (use_hexagon) {
output_.hexagon();
}

int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}

Expand Down
1 change: 1 addition & 0 deletions apps/support/Makefile.inc
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ CXX-host-cuda ?= $(CXX)
CXX-host-metal ?= $(CXX)
CXX-host-hvx_128 ?= $(CXX)
CXX-host-hvx_64 ?= $(CXX)
CXX-host-hvx ?= $(CXX)
CXX-$(HL_TARGET) ?= $(CXX)
CXX-arm-64-android ?= $(ANDROID_ARM64_TOOLCHAIN)/bin/aarch64-linux-android-c++
CXX-arm-32-android ?= $(ANDROID_ARM_TOOLCHAIN)/bin/arm-linux-androideabi-c++
Expand Down
2 changes: 1 addition & 1 deletion python_bindings/src/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void define_enums(py::module &m) {
.value("Metal", Target::Feature::Metal)
.value("CPlusPlusMangling", Target::Feature::CPlusPlusMangling)
.value("LargeBuffers", Target::Feature::LargeBuffers)
.value("HVX_64", Target::Feature::HVX_64)
.value("HVX", Target::Feature::HVX)
.value("HVX_128", Target::Feature::HVX_128)
.value("HVX_v62", Target::Feature::HVX_v62)
.value("HVX_v65", Target::Feature::HVX_v65)
Expand Down
3 changes: 1 addition & 2 deletions src/CodeGen_C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,7 @@ CodeGen_C::~CodeGen_C() {
if (target.has_feature(Target::CUDA)) {
stream << halide_internal_runtime_header_HalideRuntimeCuda_h << "\n";
}
if (target.has_feature(Target::HVX_128) ||
target.has_feature(Target::HVX_64)) {
if (target.has_feature(Target::HVX)) {
stream << halide_internal_runtime_header_HalideRuntimeHexagonHost_h << "\n";
}
if (target.has_feature(Target::Metal)) {
Expand Down

0 comments on commit 31f1937

Please sign in to comment.