Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #3925 : Remove hvx_64 #5365

Merged
merged 24 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
048999c
Remove MAKE_ID_PAIR and IdPair
pranavb-ca Oct 14, 2020
6371ef9
remove the uses of hvx_64 from Halide/src
pranavb-ca Oct 14, 2020
b121015
Remove use of hvx_64 from Halide/test
pranavb-ca Oct 14, 2020
18b704b
Remove use of hvx_64 from apps
pranavb-ca Oct 14, 2020
a33b3fc
remove use of hvx_64 from Target.cpp
pranavb-ca Oct 14, 2020
2554e60
fix intrinsic ids
pranavb-ca Oct 15, 2020
3316566
Remove all definitions of hvx_64
pranavb-ca Oct 15, 2020
609840c
Look for Target::HVX too, everywhere that we look for Target::HVX_128
pranavb-ca Oct 15, 2020
9a96a46
Clean up some nonsensical code related to hvx in apps
pranavb-ca Oct 15, 2020
09f9eda
[camera_pipe] - In hvx_128 we need 4 threads to saturate hvx with work
pranavb-ca Oct 15, 2020
13a4eb1
Merge branch 'master' into pdb_remove_hvx_v64
pranavb-ca Oct 15, 2020
4d1a4bb
Fix bad merge of test/correctness/mul_div_mod.cpp
pranavb-ca Oct 15, 2020
04cd8dd
Remove hvx_64 and hvx to python bindings
pranavb-ca Oct 15, 2020
8151b77
Check only for Target::HVX
pranavb-ca Oct 16, 2020
16604ae
Set vector_size to 128. rule out vector sizes that made sense on HVX_…
pranavb-ca Oct 16, 2020
2cde234
prefer using HVX over HVX_128
pranavb-ca Oct 16, 2020
85f143c
Address review comments
pranavb-ca Oct 19, 2020
00ae979
Merge branch 'master' into pdb_remove_hvx_v64
steven-johnson Oct 20, 2020
7fdd42c
clang-format
steven-johnson Oct 20, 2020
5e91d6f
clang-format
steven-johnson Oct 20, 2020
c2c35b3
remove hvx_64 from Halide/Makefile
pranavb-ca Oct 20, 2020
b2c9769
Merge branch 'master' into pdb_remove_hvx_v64
pranavb-ca Oct 20, 2020
e520503
Merge branch 'master' into pdb_remove_hvx_v64
steven-johnson Oct 21, 2020
d94e7a7
Update CodeGen_Hexagon.cpp
steven-johnson Oct 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions apps/blur/halide_blur_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ class HalideBlur : public Halide::Generator<HalideBlur> {
default:
break;
}
} else if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
} else if (get_target().has_feature(Target::HVX)) {
// Hexagon schedule.
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
const int vector_size = 128;

blur_y.compute_root()
.hexagon()
Expand Down
32 changes: 13 additions & 19 deletions apps/camera_pipe/camera_pipe_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,8 @@ class Demosaic : public Halide::Generator<Demosaic> {
.unroll(c);
} else {
int vec = get_target().natural_vector_size(UInt(16));
bool use_hexagon = get_target().features_any_of({Target::HVX_64, Target::HVX_128});
if (get_target().has_feature(Target::HVX_64)) {
vec = 32;
} else if (get_target().has_feature(Target::HVX_128)) {
vec = 64;
}
bool use_hexagon = get_target().has_feature(Target::HVX);

for (Func f : intermediates) {
f.compute_at(intermed_compute_at)
.store_at(intermed_store_at)
Expand Down Expand Up @@ -305,7 +301,7 @@ Func CameraPipe::apply_curve(Func input) {

// How much to upsample the LUT by when sampling it.
int lutResample = 1;
if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
if (get_target().has_feature(Target::HVX)) {
// On HVX, LUT lookups are much faster if they are to LUTs not
// greater than 256 elements, so we reduce the tonemap to 256
// elements and use linear interpolation to upsample it.
Expand Down Expand Up @@ -504,26 +500,24 @@ void CameraPipe::generate() {
Expr out_width = processed.width();
Expr out_height = processed.height();

// In HVX 128, we need 2 threads to saturate HVX with work,
//and in HVX 64 we need 4 threads, and on other devices,
// we might need many threads.
// Depending on the HVX generation, we need 2 or 4 threads
// to saturate HVX with work.
Expr strip_size;
if (get_target().has_feature(Target::HVX_128)) {
strip_size = processed.dim(1).extent() / 2;
} else if (get_target().has_feature(Target::HVX_64)) {
strip_size = processed.dim(1).extent() / 4;
if (get_target().has_feature(Target::HVX)) {
if (get_target().features_any_of({Target::HVX_v65, Target::HVX_v66})) {
pranavb-ca marked this conversation as resolved.
Show resolved Hide resolved
strip_size = processed.dim(1).extent() / 4;
} else {
strip_size = processed.dim(1).extent() / 2;
}
} else {
strip_size = 32;
}
strip_size = (strip_size / 2) * 2;

int vec = get_target().natural_vector_size(UInt(16));
if (get_target().has_feature(Target::HVX_64)) {
vec = 32;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vec = 64;
}

processed
.compute_root()
.reorder(c, x, y)
Expand Down Expand Up @@ -569,7 +563,7 @@ void CameraPipe::generate() {
demosaiced->intermed_store_at.set({processed, yo});
demosaiced->output_compute_at.set({curved, x});

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
if (get_target().has_feature(Target::HVX)) {
processed.hexagon();
denoised.align_storage(x, vec);
deinterleaved.align_storage(x, vec);
Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/conv3x3_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class Conv3x3 : public Generator<Conv3x3> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/dilate3x3_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class Dilate3x3 : public Generator<Dilate3x3> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/gaussian5x5_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class Gaussian5x5 : public Generator<Gaussian5x5> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/median3x3_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ class Median3x3 : public Generator<Median3x3> {
output.dim(0).set_min(0);
output.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
4 changes: 2 additions & 2 deletions apps/hexagon_benchmarks/sobel_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class Sobel : public Generator<Sobel> {
input.dim(0).set_min(0);
input.dim(1).set_min(0);

if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
if (get_target().has_feature(Target::HVX)) {
const int vector_size = 128;
Expr input_stride = input.dim(1).stride();
input.dim(1).set_stride((input_stride / vector_size) * vector_size);

Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/AveragePool_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class AveragePool : public Generator<AveragePool> {
min(output_max_, max(output_min_, u8_sat(average(depth, x, y, batch))));

bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);
// Specifying .hexagon() on a Func will generate an RPC to run this stage
// on Hexagon. If Hexagon is the host (that is, the architecture is
// Hexagon), we have to omit the .hexagon() directive as we are already
Expand All @@ -90,9 +90,7 @@ class AveragePool : public Generator<AveragePool> {
}

int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}

Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/Convolution_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class Convolution : public Generator<Convolution> {
u8_sat(u16_sat(scaled_plus_offset(depth, x, y, batch)))));

const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);

// Specifying .hexagon() on a Func will generate an RPC to run this stage
// on Hexagon. If Hexagon is the host (that is, the architecture is
Expand All @@ -136,9 +136,7 @@ class Convolution : public Generator<Convolution> {

// Schedule for CPU and HVX.
int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}
// We only perform vectorization when the depth >= vector size.
Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/DepthwiseConvolution_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,11 @@ class DepthwiseConvolution : public Generator<DepthwiseConvolution> {

// The schedule.
int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}
const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);

// Specifying .hexagon() on a Func will generate an RPC to run this stage
// on Hexagon. If Hexagon is the host (that is, the architecture is
Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/Im2col_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,12 @@ class Im2col : public Generator<Im2col> {

// The schedule.
int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}

const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);
if (use_hexagon) {
output_.hexagon();
}
Expand Down
6 changes: 1 addition & 5 deletions apps/nn_ops/MatrixMultiply_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,7 @@ class MatrixMultiply : public Generator<MatrixMultiply> {
int vector_size_u8 = natural_vector_size<uint8_t>();
int vector_size_u32 = natural_vector_size<uint32_t>();
bool use_hexagon = false;
if (get_target().has_feature(Halide::Target::HVX_64)) {
vector_size_u8 = 64;
vector_size_u32 = 16;
use_hexagon = true;
} else if (get_target().has_feature(Halide::Target::HVX_128)) {
if (get_target().has_feature(Halide::Target::HVX)) {
vector_size_u8 = 128;
vector_size_u32 = 32;
use_hexagon = true;
Expand Down
6 changes: 2 additions & 4 deletions apps/nn_ops/MaxPool_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,14 @@ class MaxPool : public Generator<MaxPool> {
// The schedule.

const bool use_hexagon =
get_target().features_any_of({Target::HVX_64, Target::HVX_128});
get_target().has_feature(Target::HVX);

if (use_hexagon) {
output_.hexagon();
}

int vector_size_u8 = get_target().natural_vector_size<uint8_t>();
if (get_target().has_feature(Target::HVX_64)) {
vector_size_u8 = 64;
} else if (get_target().has_feature(Target::HVX_128)) {
if (get_target().has_feature(Target::HVX)) {
vector_size_u8 = 128;
}

Expand Down
1 change: 1 addition & 0 deletions apps/support/Makefile.inc
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ CXX-host-cuda ?= $(CXX)
CXX-host-metal ?= $(CXX)
CXX-host-hvx_128 ?= $(CXX)
CXX-host-hvx_64 ?= $(CXX)
CXX-host-hvx ?= $(CXX)
CXX-$(HL_TARGET) ?= $(CXX)
CXX-arm-64-android ?= $(ANDROID_ARM64_TOOLCHAIN)/bin/aarch64-linux-android-c++
CXX-arm-32-android ?= $(ANDROID_ARM_TOOLCHAIN)/bin/arm-linux-androideabi-c++
Expand Down
2 changes: 1 addition & 1 deletion python_bindings/src/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void define_enums(py::module &m) {
.value("Metal", Target::Feature::Metal)
.value("CPlusPlusMangling", Target::Feature::CPlusPlusMangling)
.value("LargeBuffers", Target::Feature::LargeBuffers)
.value("HVX_64", Target::Feature::HVX_64)
.value("HVX", Target::Feature::HVX)
.value("HVX_128", Target::Feature::HVX_128)
.value("HVX_v62", Target::Feature::HVX_v62)
.value("HVX_v65", Target::Feature::HVX_v65)
Expand Down
3 changes: 1 addition & 2 deletions src/CodeGen_C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,7 @@ CodeGen_C::~CodeGen_C() {
if (target.has_feature(Target::CUDA)) {
stream << halide_internal_runtime_header_HalideRuntimeCuda_h << "\n";
}
if (target.has_feature(Target::HVX_128) ||
target.has_feature(Target::HVX_64)) {
if (target.has_feature(Target::HVX)) {
stream << halide_internal_runtime_header_HalideRuntimeHexagonHost_h << "\n";
}
if (target.has_feature(Target::Metal)) {
Expand Down