diff --git a/Makefile b/Makefile index 7b082a0a1a10..32092e9bd8f6 100644 --- a/Makefile +++ b/Makefile @@ -321,7 +321,6 @@ SOURCE_FILES = \ Generator.cpp \ HexagonOffload.cpp \ HexagonOptimize.cpp \ - Image.cpp \ ImageParam.cpp \ Interval.cpp \ InjectHostDevBufferCopies.cpp \ @@ -458,7 +457,7 @@ HEADER_FILES = \ HexagonOffload.h \ HexagonOptimize.h \ runtime/HalideRuntime.h \ - Image.h \ + runtime/HalideImage.h \ ImageParam.h \ Interval.h \ InjectHostDevBufferCopies.h \ @@ -1068,7 +1067,7 @@ performance_%: $(BIN_DIR)/performance_% error_%: $(BIN_DIR)/error_% @-mkdir -p $(TMP_DIR) - cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "terminating with uncaught exception|^terminate called|^Error" + cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "terminating with uncaught exception|^terminate called|^Error|Assertion.*failed" @-echo warning_%: $(BIN_DIR)/warning_% @@ -1251,6 +1250,7 @@ install: $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR mkdir -p $(PREFIX)/include $(PREFIX)/bin $(PREFIX)/lib $(PREFIX)/share/halide/tutorial/images $(PREFIX)/share/halide/tools $(PREFIX)/share/halide/tutorial/figures cp $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_EXT) $(PREFIX)/lib cp $(INCLUDE_DIR)/Halide.h $(PREFIX)/include + cp $(INCLUDE_DIR)/HalideImage.h $(PREFIX)/include cp $(INCLUDE_DIR)/HalideRuntim*.h $(PREFIX)/include cp $(ROOT_DIR)/tutorial/images/*.png $(PREFIX)/share/halide/tutorial/images cp $(ROOT_DIR)/tutorial/figures/*.gif $(PREFIX)/share/halide/tutorial/figures @@ -1261,7 +1261,6 @@ install: $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR cp $(ROOT_DIR)/tutorial/*.sh $(PREFIX)/share/halide/tutorial cp $(ROOT_DIR)/tools/mex_halide.m $(PREFIX)/share/halide/tools cp $(ROOT_DIR)/tools/GenGen.cpp $(PREFIX)/share/halide/tools - cp $(ROOT_DIR)/tools/halide_image.h $(PREFIX)/share/halide/tools cp $(ROOT_DIR)/tools/halide_image_io.h $(PREFIX)/share/halide/tools cp $(ROOT_DIR)/tools/halide_image_info.h $(PREFIX)/share/halide/tools @@ -1270,6 +1269,7 @@ $(DISTRIB_DIR)/halide.tgz: $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_ cp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(DISTRIB_DIR)/bin cp $(LIB_DIR)/libHalide.a $(DISTRIB_DIR)/lib cp $(INCLUDE_DIR)/Halide.h $(DISTRIB_DIR)/include + cp $(INCLUDE_DIR)/HalideImage.h $(DISTRIB_DIR)/include cp $(INCLUDE_DIR)/HalideRuntim*.h $(DISTRIB_DIR)/include cp $(ROOT_DIR)/tutorial/images/*.png $(DISTRIB_DIR)/tutorial/images cp $(ROOT_DIR)/tutorial/figures/*.gif $(DISTRIB_DIR)/tutorial/figures @@ -1280,12 +1280,11 @@ $(DISTRIB_DIR)/halide.tgz: $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_ cp $(ROOT_DIR)/tutorial/*.sh $(DISTRIB_DIR)/tutorial cp $(ROOT_DIR)/tools/mex_halide.m $(DISTRIB_DIR)/tools cp $(ROOT_DIR)/tools/GenGen.cpp $(DISTRIB_DIR)/tools - cp $(ROOT_DIR)/tools/halide_image.h $(DISTRIB_DIR)/tools cp $(ROOT_DIR)/tools/halide_image_io.h $(DISTRIB_DIR)/tools cp $(ROOT_DIR)/tools/halide_image_info.h $(DISTRIB_DIR)/tools cp $(ROOT_DIR)/README.md $(DISTRIB_DIR) ln -sf $(DISTRIB_DIR) halide - tar -czf $(DISTRIB_DIR)/halide.tgz halide/bin halide/lib halide/include halide/tutorial halide/README.md halide/tools/mex_halide.m halide/tools/GenGen.cpp halide/tools/halide_image.h halide/tools/halide_image_io.h halide/tools/halide_image_info.h + tar -czf $(DISTRIB_DIR)/halide.tgz halide/bin halide/lib halide/include halide/tutorial halide/README.md halide/tools/mex_halide.m halide/tools/GenGen.cpp halide/tools/halide_image_io.h halide/tools/halide_image_info.h rm -rf halide .PHONY: distrib diff --git a/apps/bilateral_grid/filter.cpp b/apps/bilateral_grid/filter.cpp index 44d38af9aa84..88d3e9386f8e 100644 --- a/apps/bilateral_grid/filter.cpp +++ b/apps/bilateral_grid/filter.cpp @@ -5,10 +5,10 @@ #include "bilateral_grid.h" #include "benchmark.h" -#include "halide_image.h" +#include "HalideImage.h" #include "halide_image_io.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { diff --git a/apps/blur/test.cpp b/apps/blur/test.cpp index 982b16612b25..2f561bee360f 100644 --- a/apps/blur/test.cpp +++ b/apps/blur/test.cpp @@ -4,9 +4,9 @@ #include #include "benchmark.h" -#include "halide_image.h" +#include "HalideImage.h" -using namespace Halide::Tools; +using namespace Halide; //#define cimg_display 0 //#include "CImg.h" diff --git a/apps/c_backend/run.cpp b/apps/c_backend/run.cpp index c3dead9c22fa..d042d54acc1b 100644 --- a/apps/c_backend/run.cpp +++ b/apps/c_backend/run.cpp @@ -2,11 +2,11 @@ #include #include -#include "halide_image.h" +#include "HalideImage.h" #include "pipeline_c.h" #include "pipeline_native.h" -using namespace Halide::Tools; +using namespace Halide; extern "C" int an_extern_func(int x, int y) { return x + y; diff --git a/apps/c_backend/run_cpp.cpp b/apps/c_backend/run_cpp.cpp index bd38e827d90d..85fd38ea5a25 100644 --- a/apps/c_backend/run_cpp.cpp +++ b/apps/c_backend/run_cpp.cpp @@ -2,11 +2,11 @@ #include #include -#include "halide_image.h" +#include "HalideImage.h" #include "pipeline_cpp_native.h" #include "pipeline_cpp_cpp.h" -using namespace Halide::Tools; +using namespace Halide; extern "C" int an_extern_c_func(int a1, float a2) { return (int)(a1 + a2); diff --git a/apps/camera_pipe/fcam/Demosaic.h b/apps/camera_pipe/fcam/Demosaic.h index 06068b4af98a..fd5ce1dac6fc 100644 --- a/apps/camera_pipe/fcam/Demosaic.h +++ b/apps/camera_pipe/fcam/Demosaic.h @@ -3,7 +3,7 @@ /** \file * Converting RAW data to RGB24 by demosiacking and gamma correcting. */ -#include "halide_image.h" +#include "HalideImage.h" namespace FCam { diff --git a/apps/camera_pipe/fcam/Demosaic_ARM.h b/apps/camera_pipe/fcam/Demosaic_ARM.h index e51e9093984b..8b901bc1e6e3 100644 --- a/apps/camera_pipe/fcam/Demosaic_ARM.h +++ b/apps/camera_pipe/fcam/Demosaic_ARM.h @@ -2,7 +2,7 @@ #define FCAM_DEMOSAIC_ARM_H //#ifdef FCAM_ARCH_ARM -#include "halide_image.h" +#include "HalideImage.h" // Arm-specific optimized post-processing routines diff --git a/apps/camera_pipe/process.cpp b/apps/camera_pipe/process.cpp index efae43b3cb9a..f7fb68ba5b16 100644 --- a/apps/camera_pipe/process.cpp +++ b/apps/camera_pipe/process.cpp @@ -3,7 +3,7 @@ #include "benchmark.h" #include "curved.h" -#include "halide_image.h" +#include "HalideImage.h" #include "halide_image_io.h" #include "halide_malloc_trace.h" @@ -12,7 +12,7 @@ #include #include -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { if (argc < 7) { diff --git a/apps/interpolate/interpolate.cpp b/apps/interpolate/interpolate.cpp index c855e2997845..349c22668fa1 100644 --- a/apps/interpolate/interpolate.cpp +++ b/apps/interpolate/interpolate.cpp @@ -8,7 +8,7 @@ using namespace Halide; #include "benchmark.h" #include "halide_image_io.h" -using namespace Halide::Tools; +using namespace Halide; using std::vector; diff --git a/apps/local_laplacian/process.cpp b/apps/local_laplacian/process.cpp index 982bd28d9cb4..f1b2d2ec6e1a 100644 --- a/apps/local_laplacian/process.cpp +++ b/apps/local_laplacian/process.cpp @@ -4,10 +4,10 @@ #include "local_laplacian.h" #include "benchmark.h" -#include "halide_image.h" +#include "HalideImage.h" #include "halide_image_io.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { if (argc < 7) { diff --git a/apps/modules/run_pipeline.cpp b/apps/modules/run_pipeline.cpp index 17bdbeb98153..6c164f8b501a 100644 --- a/apps/modules/run_pipeline.cpp +++ b/apps/modules/run_pipeline.cpp @@ -1,9 +1,9 @@ #include "pipeline.h" -#include "halide_image.h" +#include "HalideImage.h" #include "halide_image_io.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { diff --git a/apps/templates/tests/example_test.cpp b/apps/templates/tests/example_test.cpp index 0b302c0801c9..d5351b83e815 100644 --- a/apps/templates/tests/example_test.cpp +++ b/apps/templates/tests/example_test.cpp @@ -5,7 +5,7 @@ #include "HalideRuntimeOpenGL.h" #include "SimpleAppAPI.h" -#include "halide_image.h" +#include "HalideImage.h" #include "example4.h" #include "example4_glsl.h" diff --git a/apps/wavelet/wavelet.cpp b/apps/wavelet/wavelet.cpp index 7ef331c967cd..442b2391ee60 100644 --- a/apps/wavelet/wavelet.cpp +++ b/apps/wavelet/wavelet.cpp @@ -5,10 +5,10 @@ #include "daubechies_x.h" #include "inverse_daubechies_x.h" -#include "halide_image.h" +#include "HalideImage.h" #include "halide_image_io.h" -using namespace Halide::Tools; +using namespace Halide; namespace { diff --git a/src/AddImageChecks.cpp b/src/AddImageChecks.cpp index 01ddc88ceef9..9a3e2e0e37a2 100644 --- a/src/AddImageChecks.cpp +++ b/src/AddImageChecks.cpp @@ -387,7 +387,7 @@ Stmt add_image_checks(Stmt s, stride_constrained = param.stride_constraint(i); } else if (image.defined() && (int)i < image.dimensions()) { - stride_constrained = image.stride(i); + stride_constrained = image.dim(i).stride(); } std::string min0_name = buffer_name + ".0.min." + dim; @@ -404,9 +404,9 @@ Stmt add_image_checks(Stmt s, extent_constrained = Variable::make(Int(32), extent0_name); } } else if (image.defined() && (int)i < image.dimensions()) { - stride_constrained = image.stride(i); - extent_constrained = image.extent(i); - min_constrained = image.min(i); + stride_constrained = image.dim(i).stride(); + extent_constrained = image.dim(i).extent(); + min_constrained = image.dim(i).min(); } else if (param.defined()) { stride_constrained = param.stride_constraint(i); extent_constrained = param.extent_constraint(i); diff --git a/src/Argument.h b/src/Argument.h index 79ae0f657ced..eb81b701af69 100644 --- a/src/Argument.h +++ b/src/Argument.h @@ -72,6 +72,9 @@ struct Argument { << "Scalar max must not be defined for Buffer Arguments"; } + template + Argument(const Image &im) : kind(InputBuffer), dimensions(im.dimensions()), type(im.type()) {} + bool is_buffer() const { return kind == InputBuffer || kind == OutputBuffer; } bool is_scalar() const { return kind == InputScalar; } diff --git a/src/BoundaryConditions.h b/src/BoundaryConditions.h index 6e262e02283c..00114224f32a 100644 --- a/src/BoundaryConditions.h +++ b/src/BoundaryConditions.h @@ -99,7 +99,7 @@ template inline NO_INLINE Func constant_exterior(T func_like, Tuple value) { std::vector> object_bounds; for (int i = 0; i < func_like.dimensions(); i++) { - object_bounds.push_back(std::make_pair(Expr(func_like.min(i)), Expr(func_like.extent(i)))); + object_bounds.push_back(std::make_pair(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()))); } return constant_exterior(Internal::func_like_to_func(func_like), value, object_bounds); @@ -143,7 +143,7 @@ template inline NO_INLINE Func repeat_edge(T func_like) { std::vector> object_bounds; for (int i = 0; i < func_like.dimensions(); i++) { - object_bounds.push_back(std::make_pair(Expr(func_like.min(i)), Expr(func_like.extent(i)))); + object_bounds.push_back(std::make_pair(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()))); } return repeat_edge(Internal::func_like_to_func(func_like), object_bounds); @@ -178,7 +178,7 @@ template inline NO_INLINE Func repeat_image(T func_like) { std::vector> object_bounds; for (int i = 0; i < func_like.dimensions(); i++) { - object_bounds.push_back(std::make_pair(Expr(func_like.min(i)), Expr(func_like.extent(i)))); + object_bounds.push_back(std::make_pair(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()))); } return repeat_image(Internal::func_like_to_func(func_like), object_bounds); @@ -212,7 +212,7 @@ template inline NO_INLINE Func mirror_image(T func_like) { std::vector> object_bounds; for (int i = 0; i < func_like.dimensions(); i++) { - object_bounds.push_back(std::make_pair(Expr(func_like.min(i)), Expr(func_like.extent(i)))); + object_bounds.push_back(std::make_pair(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()))); } return mirror_image(Internal::func_like_to_func(func_like), object_bounds); @@ -249,7 +249,7 @@ template inline NO_INLINE Func mirror_interior(T func_like) { std::vector> object_bounds; for (int i = 0; i < func_like.dimensions(); i++) { - object_bounds.push_back(std::make_pair(Expr(func_like.min(i)), Expr(func_like.extent(i)))); + object_bounds.push_back(std::make_pair(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()))); } return mirror_interior(Internal::func_like_to_func(func_like), object_bounds); diff --git a/src/Bounds.cpp b/src/Bounds.cpp index 7fd88275de23..4edc01a284fd 100644 --- a/src/Bounds.cpp +++ b/src/Bounds.cpp @@ -1527,12 +1527,13 @@ void bounds_test() { vector input_site_2 = {2*x+1}; vector output_site = {x+1}; - Buffer in(Int(32), {10}, nullptr, "input"); + Image in(10); + Buffer in_buf(in, "input"); Stmt loop = For::make("x", 3, 10, ForType::Serial, DeviceAPI::Host, Provide::make("output", - {Add::make(Call::make(in, input_site_1), - Call::make(in, input_site_2))}, + {Add::make(Call::make(in_buf, input_site_1), + Call::make(in_buf, input_site_2))}, output_site)); map r; diff --git a/src/Buffer.cpp b/src/Buffer.cpp index 06ea304a3c15..0230e2f9ba86 100644 --- a/src/Buffer.cpp +++ b/src/Buffer.cpp @@ -4,101 +4,17 @@ #include "JITModule.h" #include "runtime/HalideRuntime.h" #include "Target.h" +#include "Var.h" +#include "IREquality.h" +#include "IROperator.h" namespace Halide { namespace Internal { -namespace { - -uint64_t multiply_buffer_size_check_overflow(uint64_t size, uint64_t factor, const std::string &name) { - // Ignore the dimensions for which the extent is zero. - if (!factor) return size; - - // Multiply and check for 64-bit overflow - uint64_t result = size * factor; - bool overflow = (result / factor) != size; - - // Check against the limits Halide internally assumes in its compiled code. - overflow |= (sizeof(size_t) == 4) && ((result >> 31) != 0); - - // In 64-bit with LargeBuffers *not* set, the limit above is the - // correct one, however at Buffer creation time we don't know what - // pipelines it will be used in, so we must be conservative and - // defer the error until the user actually passes the buffer into - // a pipeline they shouldn't have. - overflow |= (sizeof(size_t) == 8) && ((result >> 63) != 0); - - // Assert there was no overflow. - user_assert(!overflow) - << "Total size of buffer " << name << " exceeds 2^" << ((sizeof(size_t) * 8) - 1) << " - 1\n"; - return result; -} - -} - struct BufferContents { - /** The buffer_t object we're wrapping. */ - buffer_t buf; - - /** The type of the allocation. buffer_t's don't currently track this so we do it here. */ - Type type; - - /** If we made the allocation ourselves via a Buffer constructor, - * and hence should delete it when this buffer dies, then this - * pointer is set to the memory we need to free. Otherwise it's - * nullptr. */ - uint8_t *allocation; - - /** How many Buffer objects point to this BufferContents */ - mutable RefCount ref_count; - - /** What is the name of the buffer? Useful for debugging symbols. */ + Image image; std::string name; - - BufferContents(Type t, int x_size, int y_size, int z_size, int w_size, - uint8_t* data, const std::string &n) : - type(t), allocation(nullptr), name(n.empty() ? unique_name('b') : n) { - user_assert(t.lanes() == 1) << "Can't create of a buffer of a vector type"; - buf.elem_size = t.bytes(); - uint64_t size = 1; - size = multiply_buffer_size_check_overflow(size, x_size, name); - size = multiply_buffer_size_check_overflow(size, y_size, name); - size = multiply_buffer_size_check_overflow(size, z_size, name); - size = multiply_buffer_size_check_overflow(size, w_size, name); - size = multiply_buffer_size_check_overflow(size, buf.elem_size, name); - - if (!data) { - // There's no way for this to overflow without the buffer already being > 2^63-1 - size += 32; - allocation = (uint8_t *)calloc(1, (size_t)size); - user_assert(allocation) << "Out of memory allocating buffer " << name << " of size " << size << "\n"; - buf.host = allocation; - while ((size_t)(buf.host) & 0x1f) buf.host++; - } else { - buf.host = data; - } - buf.dev = 0; - buf.host_dirty = false; - buf.dev_dirty = false; - buf.extent[0] = x_size; - buf.extent[1] = y_size; - buf.extent[2] = z_size; - buf.extent[3] = w_size; - buf.stride[0] = 1; - buf.stride[1] = x_size; - buf.stride[2] = x_size*y_size; - buf.stride[3] = x_size*y_size*z_size; - buf.min[0] = 0; - buf.min[1] = 0; - buf.min[2] = 0; - buf.min[3] = 0; - } - - BufferContents(Type t, const buffer_t *b, const std::string &n) : - type(t), allocation(nullptr), name(n.empty() ? unique_name('b') : n) { - buf = *b; - user_assert(t.lanes() == 1) << "Can't create of a buffer of a vector type"; - } + mutable RefCount ref_count; }; template<> @@ -108,156 +24,96 @@ EXPORT RefCount &ref_count(const BufferContents *p) { template<> EXPORT void destroy(const BufferContents *p) { - // Ignore errors. We may be cleaning up a buffer after an earlier - // error, and asserting would re-raise it. - halide_device_free(nullptr, const_cast(&p->buf)); - free(p->allocation); delete p; } - } namespace { -int32_t size_or_zero(const std::vector &sizes, size_t index) { - return (index < sizes.size()) ? sizes[index] : 0; -} - std::string make_buffer_name(const std::string &n, Buffer *b) { if (n.empty()) { - return Internal::make_entity_name(b, "Halide::Buffer", 'b'); + return Internal::make_entity_name(b, "Halide::Internal::Buffer", 'b'); } else { return n; } } } -Buffer::Buffer(Type t, int x_size, int y_size, int z_size, int w_size, - uint8_t* data, const std::string &name) : - contents(new Internal::BufferContents(t, x_size, y_size, z_size, w_size, data, - make_buffer_name(name, this))) { -} - -Buffer::Buffer(Type t, const std::vector &sizes, - uint8_t* data, const std::string &name) : - contents(new Internal::BufferContents(t, - size_or_zero(sizes, 0), - size_or_zero(sizes, 1), - size_or_zero(sizes, 2), - size_or_zero(sizes, 3), - data, - make_buffer_name(name, this))) { - user_assert(sizes.size() <= 4) << "Buffer dimensions greater than 4 are not supported."; -} - -Buffer::Buffer(Type t, const buffer_t *buf, const std::string &name) : - contents(new Internal::BufferContents(t, buf, - make_buffer_name(name, this))) { -} - -void *Buffer::host_ptr() const { - user_assert(defined()) << "Buffer is undefined\n"; - return (void *)contents->buf.host; -} - -buffer_t *Buffer::raw_buffer() const { - user_assert(defined()) << "Buffer is undefined\n"; - return &(contents->buf); -} - -uint64_t Buffer::device_handle() const { - user_assert(defined()) << "Buffer is undefined\n"; - return contents->buf.dev; -} +Buffer::Buffer(const Image &buf, std::string name) : + contents(new Internal::BufferContents {Image(buf), make_buffer_name(name, this)}) {} -bool Buffer::host_dirty() const { - user_assert(defined()) << "Buffer is undefined\n"; - return contents->buf.host_dirty; -} +Buffer::Buffer(Type t, const buffer_t &buf, std::string name) : + contents(new Internal::BufferContents {Image(t, buf), make_buffer_name(name, this)}) {} -void Buffer::set_host_dirty(bool dirty) { - user_assert(defined()) << "Buffer is undefined\n"; - contents->buf.host_dirty = dirty; -} - -bool Buffer::device_dirty() const { - user_assert(defined()) << "Buffer is undefined\n"; - return contents->buf.dev_dirty; -} +Buffer::Buffer(Type t, const std::vector &size, std::string name) : + contents(new Internal::BufferContents {Image(t, size), make_buffer_name(name, this)}) {} -void Buffer::set_device_dirty(bool dirty) { - user_assert(defined()) << "Buffer is undefined\n"; - contents->buf.dev_dirty = dirty; +bool Buffer::same_as(const Buffer &other) const { + return contents.same_as(other.contents); } -int Buffer::dimensions() const { - for (int i = 0; i < 4; i++) { - if (extent(i) == 0) return i; - } - return 4; +Image &Buffer::get() { + return contents->image; } -int Buffer::extent(int dim) const { - user_assert(defined()) << "Buffer is undefined\n"; - user_assert(dim >= 0 && dim < 4) << "We only support 4-dimensional buffers for now"; - return contents->buf.extent[dim]; +const Image &Buffer::get() const { + return contents->image; } -int Buffer::stride(int dim) const { - user_assert(defined()); - user_assert(dim >= 0 && dim < 4) << "We only support 4-dimensional buffers for now"; - return contents->buf.stride[dim]; +bool Buffer::defined() const { + return contents->image; } -int Buffer::min(int dim) const { - user_assert(defined()) << "Buffer is undefined\n"; - user_assert(dim >= 0 && dim < 4) << "We only support 4-dimensional buffers for now"; - return contents->buf.min[dim]; +const std::string &Buffer::name() const { + return contents->name; } -void Buffer::set_min(int m0, int m1, int m2, int m3) { - user_assert(defined()) << "Buffer is undefined\n"; - contents->buf.min[0] = m0; - contents->buf.min[1] = m1; - contents->buf.min[2] = m2; - contents->buf.min[3] = m3; +Buffer::operator Argument() const { + return Argument(name(), Argument::InputBuffer, type(), dimensions()); } Type Buffer::type() const { - user_assert(defined()) << "Buffer is undefined\n"; - return contents->type; + return contents->image.type(); } -bool Buffer::same_as(const Buffer &other) const { - return contents.same_as(other.contents); -} - -bool Buffer::defined() const { - return contents.defined(); -} - -const std::string &Buffer::name() const { - return contents->name; +int Buffer::dimensions() const { + return contents->image.dimensions(); } -Buffer::operator Argument() const { - return Argument(name(), Argument::InputBuffer, type(), dimensions()); +Image::Dimension Buffer::dim(int i) const { + return contents->image.dim(i); } -int Buffer::copy_to_host() { - return halide_copy_to_host(nullptr, raw_buffer()); +buffer_t *Buffer::raw_buffer() const { + return contents->image.raw_buffer(); } -int Buffer::device_sync() { - return halide_device_sync(nullptr, raw_buffer()); +size_t Buffer::size_in_bytes() const { + return contents->image.size_in_bytes(); } -int Buffer::copy_to_device() { - return halide_copy_to_device(nullptr, raw_buffer(), nullptr); +uint8_t *Buffer::host_ptr() const { + return raw_buffer()->host; } -int Buffer::free_dev_buffer() { - return halide_device_free(nullptr, raw_buffer()); +Expr Buffer::operator()(const std::vector &args) const { + // Cast the inputs to int32 + std::vector int_args; + for (Expr e : args) { + user_assert(Int(32).can_represent(e.type())) + << "Args to a call to an Image must be representable as 32-bit integers.\n"; + if (equal(e, _)) { + // Expand the _ into the appropriate number of implicit vars. + int missing_dimensions = dimensions() - (int)args.size() + 1; + for (int i = 0; i < missing_dimensions; i++) { + int_args.push_back(Var::implicit(i)); + } + } else if (e.type() == Int(32)) { + int_args.push_back(e); + } else { + int_args.push_back(cast(e)); + } + } + return Internal::Call::make(*this, int_args); } - } diff --git a/src/Buffer.h b/src/Buffer.h index aadbb09bb1f7..683f1ad3bcad 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -5,103 +5,37 @@ * Defines Buffer - A c++ wrapper around a buffer_t. */ -#include - -#include "runtime/HalideRuntime.h" // For buffer_t -#include "IntrusivePtr.h" -#include "Error.h" -#include "Type.h" +#include "runtime/HalideImage.h" +#include "Expr.h" +#include "Util.h" #include "Argument.h" namespace Halide { namespace Internal { struct BufferContents; -struct JITModule; } -/** The internal representation of an image, or other dense array - * data. The Image type provides a typed view onto a buffer for the - * purposes of direct manipulation. A buffer may be stored in main - * memory, or some other memory space (e.g. a gpu). If you want to use - * this as an Image, see the Image class. Casting a Buffer to an Image - * will do any appropriate copy-back. This class is a fairly thin - * wrapper on a buffer_t, which is the C-style type Halide uses for - * passing buffers around. - */ +/** A named reference-counted handle on an Image of unknown type and dimensionality */ class Buffer { private: Internal::IntrusivePtr contents; public: Buffer() : contents(nullptr) {} + EXPORT Buffer(const Image &buf, std::string name = ""); + EXPORT Buffer(Type t, const buffer_t &buf, std::string name = ""); - EXPORT Buffer(Type t, int x_size = 0, int y_size = 0, int z_size = 0, int w_size = 0, - uint8_t* data = nullptr, const std::string &name = ""); - - EXPORT Buffer(Type t, const std::vector &sizes, - uint8_t* data = nullptr, const std::string &name = ""); - - EXPORT Buffer(Type t, const buffer_t *buf, const std::string &name = ""); + template Buffer(const Image &buf, std::string name = "") : + Buffer(Image(buf), name) {} - /** Get a pointer to the host-side memory. */ - EXPORT void *host_ptr() const; - - /** Get a pointer to the raw buffer_t struct that this class wraps. */ - EXPORT buffer_t *raw_buffer() const; - - /** Get the device-side pointer/handle for this buffer. Will be - * zero if no device was involved in the creation of this - * buffer. */ - EXPORT uint64_t device_handle() const; - - /** Has this buffer been modified on the cpu since last copied to a - * device. Not meaningful unless there's a device involved. */ - EXPORT bool host_dirty() const; - - /** Let Halide know that the host-side memory backing this buffer - * has been externally modified. You shouldn't normally need to - * call this, because it is done for you when you cast a Buffer to - * an Image in order to modify it. */ - EXPORT void set_host_dirty(bool dirty = true); - - /** Has this buffer been modified on device since last copied to - * the cpu. Not meaninful unless there's a device involved. */ - EXPORT bool device_dirty() const; - - /** Let Halide know that the device-side memory backing this - * buffer has been externally modified, and so the cpu-side memory - * is invalid. A copy-back will occur the next time you cast this - * Buffer to an Image, or the next time this buffer is accessed on - * the host in a halide pipeline. */ - EXPORT void set_device_dirty(bool dirty = true); - - /** Get the dimensionality of this buffer. Uses the convention - * that the extent field of a buffer_t should contain zero when - * the dimensions end. */ - EXPORT int dimensions() const; - - /** Get the extent of this buffer in the given dimension. */ - EXPORT int extent(int dim) const; - - /** Get the distance in memory (measured in the type of the buffer - * elements, not bytes) between adjacent elements of this buffer - * along the given dimension. For the innermost dimension, this - * will usually be one. */ - EXPORT int stride(int dim) const; - - /** Get the coordinate in the function that this buffer represents - * that corresponds to the base address of the buffer. */ - EXPORT int min(int dim) const; - - /** Set the coordinate in the function that this buffer represents - * that corresponds to the base address of the buffer. */ - EXPORT void set_min(int m0, int m1 = 0, int m2 = 0, int m3 = 0); - - /** Get the Halide type of the contents of this buffer. */ - EXPORT Type type() const; + EXPORT Buffer(Type t, const std::vector &size, std::string name = ""); /** Compare two buffers for identity (not equality of data). */ - EXPORT bool same_as(const Buffer &other) const; + bool same_as(const Buffer &other) const; + + /** Get the underlying Image */ + EXPORT Image &get(); + EXPORT const Image &get() const; /** Check if this buffer handle actually points to data. */ EXPORT bool defined() const; @@ -112,35 +46,58 @@ class Buffer { /** Convert this buffer to an argument to a halide pipeline. */ EXPORT operator Argument() const; - /** If this buffer was created *on-device* by a jit-compiled - * realization, then copy it back to the cpu-side memory. This is - * usually achieved by casting the Buffer to an Image. */ - EXPORT int copy_to_host(); - - /** If this buffer was created by a jit-compiled realization on a - * device-aware target (e.g. PTX), then copy the cpu-side data to - * the device-side allocation. TODO: I believe this currently - * aborts messily if no device-side allocation exists. You might - * think you want to do this because you've modified the data - * manually on the host before calling another Halide pipeline, - * but what you actually want to do in that situation is set the - * host_dirty bit so that Halide can manage the copy lazily for - * you. Casting the Buffer to an Image sets the dirty bit for - * you. */ - EXPORT int copy_to_device(); - - /** If this buffer exists on a GPU, then finish any currently - * running computation on that GPU. Useful for benchmarking. */ - EXPORT int device_sync(); - - /** If this buffer was created by a jit-compiled realization on a - * device-aware target (e.g. PTX), then free the device-side - * allocation, if there is one. Done automatically when the last - * reference to this buffer dies. */ - EXPORT int free_dev_buffer(); + /** Get the Halide type of the underlying buffer */ + EXPORT Type type() const; + + /** Get the dimensionality of the underlying buffer */ + EXPORT int dimensions() const; + + /** Get a dimension from the underlying buffer. */ + EXPORT Image::Dimension dim(int i) const; + /** Access to the mins, strides, extents. Will be deprecated. Do not use. */ + // @{ + int min(int i) const { return dim(i).min(); } + int extent(int i) const { return dim(i).extent(); } + int stride(int i) const { return dim(i).stride(); } + // @} + + /** Get the size in bytes of the allocation */ + EXPORT size_t size_in_bytes() const; + + /** Get a pointer to the raw buffer */ + EXPORT buffer_t *raw_buffer() const; + + /** Get the host pointer */ + EXPORT uint8_t *host_ptr() const; + + /** Convert a buffer to a typed and dimensioned Image. Does + * runtime type checks. */ + template + operator Image() const { + return Image(get()); + } + + /** Make a Call node to a specific site in this buffer. */ + // @{ + Expr operator()(const std::vector &loc) const; + + template::value)>> + Expr operator()(Expr first, Args... rest) const { + const std::vector vec = {first, rest...}; + return (*this)(vec); + } + // @} }; +/** An adaptor so that it's possible to access a Halide::Image using Exprs. */ +template::value)>> +NO_INLINE Expr image_accessor(const Image &im, Expr first, Args... rest) { + return Buffer(im)(first, rest...); +} + } #endif diff --git a/src/Closure.cpp b/src/Closure.cpp index 88ee42fc2b59..63e91c2f92d8 100644 --- a/src/Closure.cpp +++ b/src/Closure.cpp @@ -48,14 +48,8 @@ void Closure::visit(const Load *op) { // If reading an image/buffer, compute the size. if (op->image.defined()) { + ref.size = op->image.size_in_bytes(); ref.dimensions = op->image.dimensions(); - // The size is the offset of one beyond the last element. - // TODO(abadams): replace this with halide_buffer_t::size_in_bytes. - ref.size = 1; - for (int i = 0; i < op->image.dimensions(); i++) { - ref.size += (op->image.extent(i) - 1)*op->image.stride(i); - } - ref.size *= op->image.type().bytes(); } } else { debug(3) << "Not adding " << op->name << " to closure\n"; diff --git a/src/FastIntegerDivide.cpp b/src/FastIntegerDivide.cpp index a1ee55c90647..98a16b163118 100644 --- a/src/FastIntegerDivide.cpp +++ b/src/FastIntegerDivide.cpp @@ -2,6 +2,7 @@ #include "FastIntegerDivide.h" #include "IntegerDivisionTable.h" +#include "IROperator.h" namespace Halide { diff --git a/src/FastIntegerDivide.h b/src/FastIntegerDivide.h index 9398528a211d..761462056daf 100644 --- a/src/FastIntegerDivide.h +++ b/src/FastIntegerDivide.h @@ -2,7 +2,6 @@ #define HALIDE_FAST_INTEGER_DIVIDE_H #include "IR.h" -#include "Image.h" namespace Halide { diff --git a/src/Func.cpp b/src/Func.cpp index 1275e1f3f90d..c84d665376f8 100644 --- a/src/Func.cpp +++ b/src/Func.cpp @@ -16,7 +16,6 @@ #include "Function.h" #include "Argument.h" #include "Lower.h" -#include "Image.h" #include "Param.h" #include "PrintLoopNest.h" #include "Debug.h" @@ -2340,33 +2339,40 @@ Realization Func::realize(std::vector sizes, const Target &target) { } Realization Func::realize(int x_size, int y_size, int z_size, int w_size, const Target &target) { - user_assert(defined()) << "Can't realize undefined Func.\n"; - vector outputs(func.outputs()); - for (size_t i = 0; i < outputs.size(); i++) { - outputs[i] = Buffer(func.output_types()[i], x_size, y_size, z_size, w_size); - } - Realization r(outputs); - realize(r, target); - return r; + return realize({x_size, y_size, z_size, w_size}, target); } Realization Func::realize(int x_size, int y_size, int z_size, const Target &target) { - return realize(x_size, y_size, z_size, 0, target); + return realize({x_size, y_size, z_size}, target); } Realization Func::realize(int x_size, int y_size, const Target &target) { - return realize(x_size, y_size, 0, 0, target); + return realize({x_size, y_size}, target); } Realization Func::realize(int x_size, const Target &target) { - return realize(x_size, 0, 0, 0, target); + return realize(std::vector{x_size}, target); +} + +Realization Func::realize(const Target &target) { + return realize(std::vector{}, target); } void Func::infer_input_bounds(int x_size, int y_size, int z_size, int w_size) { user_assert(defined()) << "Can't infer input bounds on an undefined Func.\n"; vector outputs(func.outputs()); + int sizes[] = {x_size, y_size, z_size, w_size}; for (size_t i = 0; i < outputs.size(); i++) { - outputs[i] = Buffer(func.output_types()[i], x_size, y_size, z_size, w_size, (uint8_t *)1); + // We're not actually going to read from these outputs, so + // make the allocation tiny, then expand them with unsafe + // cropping. + Image im = Image::make_scalar(func.output_types()[i]); + for (int s : sizes) { + if (!s) break; + im.add_dimension(); + im.crop(im.dimensions()-1, 0, s); + } + outputs[i] = im; } Realization r(outputs); infer_input_bounds(r); @@ -2561,4 +2567,4 @@ EXPORT Var _("_"); EXPORT Var _0("_0"), _1("_1"), _2("_2"), _3("_3"), _4("_4"), _5("_5"), _6("_6"), _7("_7"), _8("_8"), _9("_9"); -} \ No newline at end of file +} diff --git a/src/Func.h b/src/Func.h index 40567e47d621..9b6da713dcd7 100644 --- a/src/Func.h +++ b/src/Func.h @@ -14,7 +14,6 @@ #include "Argument.h" #include "RDom.h" #include "JITModule.h" -#include "Image.h" #include "Target.h" #include "Tuple.h" #include "Module.h" @@ -469,6 +468,12 @@ class Func { * Function object. */ EXPORT explicit Func(Internal::Function f); + /** Construct a new Func to wrap an Image. */ + template + NO_INLINE explicit Func(const Image &im) : Func() { + (*this)(_) = im(_); + } + /** Evaluate this function over some rectangular domain and return * the resulting buffer or buffers. Performs compilation if the * Func has not previously been realized and jit_compile has not @@ -509,8 +514,9 @@ class Func { const Target &target = Target()); EXPORT Realization realize(int x_size, int y_size, const Target &target = Target()); - EXPORT Realization realize(int x_size = 0, + EXPORT Realization realize(int x_size, const Target &target = Target()); + EXPORT Realization realize(const Target &target = Target()); // @} /** Evaluate this function into an existing allocated buffer or @@ -522,11 +528,9 @@ class Func { EXPORT void realize(Realization dst, const Target &target = Target()); EXPORT void realize(Buffer dst, const Target &target = Target()); - template - NO_INLINE void realize(Image dst, const Target &target = Target()) { - // Images are expected to exist on-host. + template + NO_INLINE void realize(Image &dst, const Target &target = Target()) { realize(Buffer(dst), target); - dst.copy_to_host(); } // @} @@ -539,6 +543,17 @@ class Func { EXPORT void infer_input_bounds(int x_size = 0, int y_size = 0, int z_size = 0, int w_size = 0); EXPORT void infer_input_bounds(Realization dst); EXPORT void infer_input_bounds(Buffer dst); + + template + NO_INLINE void infer_input_bounds(Image &im) { + // It's possible for bounds inference to also manipulate + // output buffers if their host pointer is null, so we must + // take Images by reference and communicate the bounds query + // result by modifying the argument. + Buffer b(im); + infer_input_bounds(b); + im = b.get(); + } // @} /** Statically compile this function to llvm bitcode, with the @@ -1834,7 +1849,7 @@ NO_INLINE T evaluate(Expr e) { Func f; f() = e; Image im = f.realize(); - return im(0); + return im(); } /** JIT-compile and run enough code to evaluate a Halide Tuple. */ @@ -1853,8 +1868,8 @@ NO_INLINE void evaluate(Tuple t, A *a, B *b) { Func f; f() = t; Realization r = f.realize(); - *a = Image(r[0])(0); - *b = Image(r[1])(0); + *a = Image(r[0])(); + *b = Image(r[1])(); } template @@ -1875,9 +1890,9 @@ NO_INLINE void evaluate(Tuple t, A *a, B *b, C *c) { Func f; f() = t; Realization r = f.realize(); - *a = Image(r[0])(0); - *b = Image(r[1])(0); - *c = Image(r[2])(0); + *a = Image(r[0])(); + *b = Image(r[1])(); + *c = Image(r[2])(); } template @@ -1902,10 +1917,10 @@ NO_INLINE void evaluate(Tuple t, A *a, B *b, C *c, D *d) { Func f; f() = t; Realization r = f.realize(); - *a = Image(r[0])(0); - *b = Image(r[1])(0); - *c = Image(r[2])(0); - *d = Image(r[3])(0); + *a = Image(r[0])(); + *b = Image(r[1])(); + *c = Image(r[2])(); + *d = Image(r[3])(); } // @} @@ -1938,7 +1953,7 @@ NO_INLINE T evaluate_may_gpu(Expr e) { f() = e; Internal::schedule_scalar(f); Image im = f.realize(); - return im(0); + return im(); } /** JIT-compile and run enough code to evaluate a Halide Tuple. Can @@ -1959,8 +1974,8 @@ NO_INLINE void evaluate_may_gpu(Tuple t, A *a, B *b) { f() = t; Internal::schedule_scalar(f); Realization r = f.realize(); - *a = Image(r[0])(0); - *b = Image(r[1])(0); + *a = Image(r[0])(); + *b = Image(r[1])(); } template @@ -1981,9 +1996,9 @@ NO_INLINE void evaluate_may_gpu(Tuple t, A *a, B *b, C *c) { f() = t; Internal::schedule_scalar(f); Realization r = f.realize(); - *a = Image(r[0])(0); - *b = Image(r[1])(0); - *c = Image(r[2])(0); + *a = Image(r[0])(); + *b = Image(r[1])(); + *c = Image(r[2])(); } template @@ -2009,10 +2024,10 @@ NO_INLINE void evaluate_may_gpu(Tuple t, A *a, B *b, C *c, D *d) { f() = t; Internal::schedule_scalar(f); Realization r = f.realize(); - *a = Image(r[0])(0); - *b = Image(r[1])(0); - *c = Image(r[2])(0); - *d = Image(r[3])(0); + *a = Image(r[0])(); + *b = Image(r[1])(); + *c = Image(r[2])(); + *d = Image(r[3])(); } // @} diff --git a/src/Function.h b/src/Function.h index 571e20bcb370..3033efe0a739 100644 --- a/src/Function.h +++ b/src/Function.h @@ -34,6 +34,9 @@ struct ExternFuncArgument { ExternFuncArgument(Buffer b): arg_type(BufferArg), buffer(b) {} + template + ExternFuncArgument(const Image &im) : arg_type(BufferArg), buffer(im) {} + ExternFuncArgument(Expr e): arg_type(ExprArg), expr(e) {} ExternFuncArgument(int e): arg_type(ExprArg), expr(e) {} ExternFuncArgument(float e): arg_type(ExprArg), expr(e) {} diff --git a/src/HexagonOffload.cpp b/src/HexagonOffload.cpp index cc239be410ef..17bf970d5e3c 100644 --- a/src/HexagonOffload.cpp +++ b/src/HexagonOffload.cpp @@ -3,15 +3,15 @@ #include #include "HexagonOffload.h" -#include "IRMutator.h" -#include "Substitute.h" #include "Closure.h" -#include "Param.h" -#include "Image.h" -#include "LLVM_Output.h" -#include "RemoveTrivialForLoops.h" #include "InjectHostDevBufferCopies.h" +#include "IRMutator.h" +#include "IROperator.h" +#include "LLVM_Output.h" #include "LLVM_Headers.h" +#include "Param.h" +#include "RemoveTrivialForLoops.h" +#include "Substitute.h" namespace Halide { namespace Internal { @@ -85,9 +85,10 @@ class InjectHexagonRpc : public IRMutator { Expr state_var(const std::string& name, Type type) { Expr& var = state_vars[name]; if (!var.defined()) { - Buffer storage(type, {}, nullptr, name + "_buf"); - *(void **)storage.host_ptr() = nullptr; - var = Load::make(type_of(), name + "_buf", 0, storage, Parameter()); + Image storage = Image::make_scalar(); + storage() = nullptr; + Buffer buf(storage, name + "_buf"); + var = Load::make(type_of(), name + "_buf", 0, buf, Parameter()); } return var; } @@ -108,10 +109,10 @@ class InjectHexagonRpc : public IRMutator { // Create a Buffer containing the given buffer/size, and return an // expression for a pointer to the first element. Expr buffer_ptr(const uint8_t* buffer, size_t size, const char* name) { - Buffer code(type_of(), {(int)size}, nullptr, name); - memcpy(code.host_ptr(), buffer, (int)size); - - Expr ptr_0 = Load::make(type_of(), name, 0, code, Parameter()); + Image code((int)size); + memcpy(code.data(), buffer, (int)size); + Buffer buf(code, name); + Expr ptr_0 = Load::make(type_of(), name, 0, buf, Parameter()); return Call::make(Handle(), Call::address_of, {ptr_0}, Call::Intrinsic); } diff --git a/src/Image.h b/src/Image.h deleted file mode 100644 index e23cd499f62d..000000000000 --- a/src/Image.h +++ /dev/null @@ -1,304 +0,0 @@ -#ifndef HALIDE_IMAGE_H -#define HALIDE_IMAGE_H - -/** \file - * Defines Halide's Image data type - */ - -#include "Var.h" -#include "Tuple.h" -#include "Target.h" - -namespace Halide { - -/** A base class for Images, which are typed accessors on - * Buffers. This exists to make the implementations of certain methods - * of Image private, so that they can safely throw errors without the - * risk of being inlined (which in turns messes up reporting of line - * numbers). */ -class ImageBase { -protected: - /** The underlying memory object */ - Buffer buffer; - - /** The address of the zero coordinate. The buffer_t stores the - * address of the min coordinate, but it's easier to index off the - * zero coordinate. */ - void *origin; - - /** The strides. These fields are also stored in the buffer, but - * they're cached here in the handle to make operator() fast. This - * is safe to do because the buffer is never modified. - */ - int stride_0, stride_1, stride_2, stride_3; - - /** The dimensionality. */ - int dims; - - /** The size of each element. */ - int elem_size; - - /** Prepare the buffer to be used as an image. Makes sure that the - * cached strides are correct, and that the image data is on the - * host. */ - void prepare_for_direct_pixel_access(); - - bool add_implicit_args_if_placeholder(std::vector &args, - Expr last_arg, - int total_args, - bool placeholder_seen) const; -public: - /** Construct an undefined image handle */ - ImageBase() : origin(nullptr), stride_0(0), stride_1(0), stride_2(0), stride_3(0), dims(0) {} - - /** Allocate an image with the given dimensions. */ - EXPORT ImageBase(Type t, int x, int y = 0, int z = 0, int w = 0, const std::string &name = ""); - - /** Wrap a buffer in an Image object, so that we can directly - * access its pixels in a type-safe way. */ - EXPORT ImageBase(Type t, const Buffer &buf); - - /** Wrap a single-element realization in an Image object. */ - EXPORT ImageBase(Type t, const Realization &r); - - /** Wrap a buffer_t in an Image object, so that we can access its - * pixels. */ - EXPORT ImageBase(Type t, const buffer_t *b, const std::string &name = ""); - - /** Get the name of this image. */ - EXPORT const std::string &name() const; - - /** Manually copy-back data to the host, if it's on a device. This - * is done for you if you construct an image from a buffer, but - * you might need to call this if you realize a gpu kernel into an - * existing image */ - EXPORT void copy_to_host(); - - /** Mark the buffer as dirty-on-host. is done for you if you - * construct an image from a buffer, but you might need to call - * this if you realize a gpu kernel into an existing image, or - * modify the data via some other back-door. */ - EXPORT void set_host_dirty(bool dirty = true); - - /** Check if this image handle points to actual data */ - EXPORT bool defined() const; - - /** Get the dimensionality of the data. Typically two for grayscale images, and three for color images. */ - EXPORT int dimensions() const; - - /** Get the size of a dimension */ - EXPORT int extent(int dim) const; - - /** Get the min coordinate of a dimension. The top left of the - * image represents this point in a function that was realized - * into this image. */ - EXPORT int min(int dim) const; - - /** Set the min coordinates of a dimension. */ - EXPORT void set_min(int m0, int m1 = 0, int m2 = 0, int m3 = 0); - - /** Get the number of elements in the buffer between two adjacent - * elements in the given dimension. For example, the stride in - * dimension 0 is usually 1, and the stride in dimension 1 is - * usually the extent of dimension 0. This is not necessarily true - * though. */ - EXPORT int stride(int dim) const; - - /** Get the extent of dimension 0, which by convention we use as - * the width of the image. Unlike extent(0), returns one if the - * buffer is zero-dimensional. */ - EXPORT int width() const; - - /** Get the extent of dimension 1, which by convention we use as - * the height of the image. Unlike extent(1), returns one if the - * buffer has fewer than two dimensions. */ - EXPORT int height() const; - - /** Get the extent of dimension 2, which by convention we use as - * the number of color channels (often 3). Unlike extent(2), - * returns one if the buffer has fewer than three dimensions. */ - EXPORT int channels() const; - - /** Get the minimum coordinate in dimension 0, which by convention - * is the coordinate of the left edge of the image. Returns zero - * for zero-dimensional images. */ - EXPORT int left() const; - - /** Get the maximum coordinate in dimension 0, which by convention - * is the coordinate of the right edge of the image. Returns zero - * for zero-dimensional images. */ - EXPORT int right() const; - - /** Get the minimum coordinate in dimension 1, which by convention - * is the top of the image. Returns zero for zero- or - * one-dimensional images. */ - EXPORT int top() const; - - /** Get the maximum coordinate in dimension 1, which by convention - * is the bottom of the image. Returns zero for zero- or - * one-dimensional images. */ - EXPORT int bottom() const; - - /** Construct an expression which loads from this image. The - * location is extended with enough implicit variables to match - * the dimensionality of the image (see \ref Var::implicit) */ - // @{ - EXPORT Expr operator()() const; - EXPORT Expr operator()(Expr x) const; - EXPORT Expr operator()(Expr x, Expr y) const; - EXPORT Expr operator()(Expr x, Expr y, Expr z) const; - EXPORT Expr operator()(Expr x, Expr y, Expr z, Expr w) const; - EXPORT Expr operator()(std::vector) const; - EXPORT Expr operator()(std::vector) const; - // @} - - /** Get a pointer to the raw buffer_t that this image holds */ - EXPORT buffer_t *raw_buffer() const; - - /** Get the address of a particular pixel. */ - void *address_of(int x, int y = 0, int z = 0, int w = 0) const { - uint8_t *ptr = (uint8_t *)origin; - ptrdiff_t offset = ((ptrdiff_t)x*stride_0 + - (ptrdiff_t)y*stride_1 + - (ptrdiff_t)z*stride_2 + - (ptrdiff_t)w*stride_3); - return (void *)(ptr + offset * elem_size); - } -}; - -/** A reference-counted handle on a dense multidimensional array - * containing scalar values of type T. Can be directly accessed and - * modified. May have up to four dimensions. Color images are - * represented as three-dimensional, with the third dimension being - * the color channel. In general we store color images in - * color-planes, as opposed to packed RGB, because this tends to - * vectorize more cleanly. */ -template -class Image : public ImageBase { -public: - typedef T ElemType; - - /** Construct an undefined image handle */ - Image() : ImageBase() {} - - /** Allocate an image with the given dimensions. */ - // @{ - NO_INLINE Image(int x, int y = 0, int z = 0, int w = 0, const std::string &name = "") : - ImageBase(type_of(), x, y, z, w, name) {} - - NO_INLINE Image(int x, int y, int z, const std::string &name) : - ImageBase(type_of(), x, y, z, 0, name) {} - - NO_INLINE Image(int x, int y, const std::string &name) : - ImageBase(type_of(), x, y, 0, 0, name) {} - - NO_INLINE Image(int x, const std::string &name) : - ImageBase(type_of(), x, 0, 0, 0, name) {} - // @} - - /** Wrap a buffer in an Image object, so that we can directly - * access its pixels in a type-safe way. */ - NO_INLINE Image(const Buffer &buf) : ImageBase(type_of(), buf) {} - - /** Wrap a single-element realization in an Image object. */ - NO_INLINE Image(const Realization &r) : ImageBase(type_of(), r) {} - - /** Wrap a buffer_t in an Image object, so that we can access its - * pixels. */ - NO_INLINE Image(const buffer_t *b, const std::string &name = "") : - ImageBase(type_of(), b, name) {} - - /** Get a pointer to the element at the min location. */ - NO_INLINE T *data() const { - user_assert(defined()) << "data of undefined Image\n"; - return (T *)buffer.host_ptr(); - } - - using ImageBase::operator(); - - /** Assuming this image is one-dimensional, get the value of the - * element at position x */ - const T &operator()(int x) const { - return *((T *)(address_of(x))); - } - - /** Assuming this image is two-dimensional, get the value of the - * element at position (x, y) */ - const T &operator()(int x, int y) const { - return *((T *)(address_of(x, y))); - } - - /** Assuming this image is three-dimensional, get the value of the - * element at position (x, y, z) */ - const T &operator()(int x, int y, int z) const { - return *((T *)(address_of(x, y, z))); - } - - /** Assuming this image is four-dimensional, get the value of the - * element at position (x, y, z, w) */ - const T &operator()(int x, int y, int z, int w) const { - return *((T *)(address_of(x, y, z, w))); - } - - /** Assuming this image is one-dimensional, get a reference to the - * element at position x */ - T &operator()(int x) { - return *((T *)(address_of(x))); - } - - /** Assuming this image is two-dimensional, get a reference to the - * element at position (x, y) */ - T &operator()(int x, int y) { - return *((T *)(address_of(x, y))); - } - - /** Assuming this image is three-dimensional, get a reference to the - * element at position (x, y, z) */ - T &operator()(int x, int y, int z) { - return *((T *)(address_of(x, y, z))); - } - - /** Assuming this image is four-dimensional, get a reference to the - * element at position (x, y, z, w) */ - T &operator()(int x, int y, int z, int w) { - return *((T *)(address_of(x, y, z, w))); - } - - /** Get a handle on the Buffer that this image holds */ - operator Buffer() const { - return buffer; - } - - /** Convert this image to an argument to a halide pipeline. */ - operator Argument() const { - return Argument(buffer); - } - - /** Convert this image to an argument to an extern stage. */ - operator ExternFuncArgument() const { - return ExternFuncArgument(buffer); - } - - /** Treating the image as an Expr is equivalent to call it with no - * arguments. For example, you can say: - * - \code - Image im(10, 10); - Func f; - f = im*2; - \endcode - * - * This will define f as a two-dimensional function with value at - * position (x, y) equal to twice the value of the image at the - * same location. - */ - operator Expr() const { - return (*this)(_); - } - - -}; - -} - -#endif diff --git a/src/ImageParam.h b/src/ImageParam.h index 4c40d55c6c6e..832e1266c514 100644 --- a/src/ImageParam.h +++ b/src/ImageParam.h @@ -37,7 +37,13 @@ class ImageParam : public OutputImageParam { EXPORT ImageParam(Type t, int d, const std::string &n); /** Bind a buffer or image to this ImageParam. Only relevant for jitting */ + // @{ EXPORT void set(Buffer b); + template + NO_INLINE void set(const Image &im) { + set(Buffer(im)); + } + // @} /** Get the buffer bound to this ImageParam. Only relevant for jitting */ EXPORT Buffer get() const; @@ -49,7 +55,7 @@ class ImageParam : public OutputImageParam { */ // @{ template - Expr operator()(Args&&... args) const { + NO_INLINE Expr operator()(Args&&... args) const { return func(std::forward(args)...); } EXPORT Expr operator()(std::vector) const; diff --git a/src/Introspection.cpp b/src/Introspection.cpp index 63e15bbcfdaa..8ecca69d23a3 100644 --- a/src/Introspection.cpp +++ b/src/Introspection.cpp @@ -712,7 +712,7 @@ class DebugSections { // Look up n stack frames and get the source location as filename:line std::string get_source_location() { - + return ""; debug(5) << "Finding source location\n"; if (!source_lines.size()) { diff --git a/src/Module.cpp b/src/Module.cpp index 01ca74c5727a..871ed39cb2d8 100644 --- a/src/Module.cpp +++ b/src/Module.cpp @@ -23,16 +23,16 @@ namespace { class TemporaryObjectFileDir final { public: TemporaryObjectFileDir() : dir_path(dir_make_temp()) {} - ~TemporaryObjectFileDir() { + ~TemporaryObjectFileDir() { for (const auto &f : dir_files) { debug(1) << "file_unlink: " << f << "\n"; file_unlink(f); } debug(1) << "dir_rmdir: " << dir_path << "\n"; - dir_rmdir(dir_path); + dir_rmdir(dir_path); } - std::string add_temp_object_file(const std::string &base_path_name, - const std::string &suffix, + std::string add_temp_object_file(const std::string &base_path_name, + const std::string &suffix, const Target &target, bool in_front = false) { const char* ext = target.os == Target::Windows && !target.has_feature(Target::MinGW) ? ".obj" : ".o"; @@ -254,7 +254,7 @@ void Module::compile(const Outputs &output_files) const { Outputs compile_standalone_runtime(const Outputs &output_files, Target t) { Module empty("standalone_runtime", t.without_feature(Target::NoRuntime).without_feature(Target::JIT)); // For runtime, it only makes sense to output object files or static_library, so ignore - // everything else. + // everything else. Outputs actual_outputs = Outputs().object(output_files.object_name).static_library(output_files.static_library_name); empty.compile(actual_outputs); return actual_outputs; @@ -264,9 +264,9 @@ void compile_standalone_runtime(const std::string &object_filename, Target t) { compile_standalone_runtime(Outputs().object(object_filename), t); } -void compile_multitarget(const std::string &fn_name, +void compile_multitarget(const std::string &fn_name, const Outputs &output_files, - const std::vector &targets, + const std::vector &targets, ModuleProducer module_producer) { user_assert(!fn_name.empty()) << "Function name must be specified.\n"; user_assert(!targets.empty()) << "Must specify at least one target.\n"; @@ -356,7 +356,7 @@ void compile_multitarget(const std::string &fn_name, // and add that to the result. if (!base_target.has_feature(Target::NoRuntime)) { const Target runtime_target = base_target.without_feature(Target::NoRuntime); - compile_standalone_runtime(Outputs().object(temp_dir.add_temp_object_file(output_files.static_library_name, "_runtime", runtime_target)), + compile_standalone_runtime(Outputs().object(temp_dir.add_temp_object_file(output_files.static_library_name, "_runtime", runtime_target)), runtime_target); } @@ -386,7 +386,7 @@ void compile_multitarget(const std::string &fn_name, // may get optimized away at link time. wrapper_module.compile(Outputs().object(temp_dir.add_temp_object_file(output_files.static_library_name, "_wrapper", base_target, /* in_front*/ true))); - if (!output_files.c_header_name.empty()) { + if (!output_files.c_header_name.empty()) { debug(1) << "compile_multitarget: c_header_name " << output_files.c_header_name << "\n"; wrapper_module.compile(Outputs().c_header(output_files.c_header_name)); } diff --git a/src/Module.h b/src/Module.h index 144065ca33e4..989f36ddaa8b 100644 --- a/src/Module.h +++ b/src/Module.h @@ -7,7 +7,7 @@ */ #include - + #include "IR.h" #include "Buffer.h" #include "ModulusRemainder.h" @@ -110,9 +110,9 @@ EXPORT Outputs compile_standalone_runtime(const Outputs &output_files, Target t) typedef std::function ModuleProducer; -EXPORT void compile_multitarget(const std::string &fn_name, +EXPORT void compile_multitarget(const std::string &fn_name, const Outputs &output_files, - const std::vector &targets, + const std::vector &targets, ModuleProducer module_producer); } diff --git a/src/OutputImageParam.cpp b/src/OutputImageParam.cpp index d30944dd5269..a093ac71afee 100644 --- a/src/OutputImageParam.cpp +++ b/src/OutputImageParam.cpp @@ -1,5 +1,5 @@ #include "OutputImageParam.h" - +#include "IROperator.h" namespace Halide { @@ -19,88 +19,119 @@ bool OutputImageParam::defined() const { return param.defined(); } -Expr OutputImageParam::min(int x) const { +OutputImageParam::Dimension OutputImageParam::dim(int i) { + user_assert(defined()) + << "Can't access the dimensions of an undefined ImageParam\n"; + user_assert(i >= 0 && i < dimensions()) + << "Can't access dimension " << i + << " of a " << dimensions() << "-dimensional ImageParam\n"; + return OutputImageParam::Dimension(param, i); +} + +const OutputImageParam::Dimension OutputImageParam::dim(int i) const { + user_assert(defined()) + << "Can't access the dimensions of an undefined ImageParam\n"; + user_assert(i >= 0 && i < dimensions()) + << "Can't access dimension " << i + << " of a " << dimensions() << "-dimensional ImageParam\n"; + return OutputImageParam::Dimension(param, i); +} + +Expr OutputImageParam::Dimension::min() const { std::ostringstream s; - s << name() << ".min." << x; + s << param.name() << ".min." << d; return Internal::Variable::make(Int(32), s.str(), param); } -Expr OutputImageParam::extent(int x) const { +Expr OutputImageParam::Dimension::extent() const { std::ostringstream s; - s << name() << ".extent." << x; + s << param.name() << ".extent." << d; return Internal::Variable::make(Int(32), s.str(), param); } -Expr OutputImageParam::stride(int x) const { +Expr OutputImageParam::Dimension::max() const { + return min() + extent() - 1; +} + +Expr OutputImageParam::Dimension::stride() const { std::ostringstream s; - s << name() << ".stride." << x; + s << param.name() << ".stride." << d; return Internal::Variable::make(Int(32), s.str(), param); } int OutputImageParam::host_alignment() const { return param.host_alignment(); } -OutputImageParam &OutputImageParam::set_extent(int dim, Expr extent) { - param.set_extent_constraint(dim, extent); +OutputImageParam::Dimension OutputImageParam::Dimension::set_extent(Expr extent) { + param.set_extent_constraint(d, extent); return *this; } -OutputImageParam &OutputImageParam::set_min(int dim, Expr min) { - param.set_min_constraint(dim, min); +OutputImageParam::Dimension OutputImageParam::Dimension::set_min(Expr min) { + param.set_min_constraint(d, min); return *this; } -OutputImageParam &OutputImageParam::set_stride(int dim, Expr stride) { - param.set_stride_constraint(dim, stride); +OutputImageParam::Dimension OutputImageParam::Dimension::set_stride(Expr stride) { + param.set_stride_constraint(d, stride); return *this; } + +OutputImageParam::Dimension OutputImageParam::Dimension::set_bounds(Expr min, Expr extent) { + return set_min(min).set_extent(extent); +} + +OutputImageParam::Dimension OutputImageParam::Dimension::dim(int i) { + return OutputImageParam::Dimension(param, i); +} + +const OutputImageParam::Dimension OutputImageParam::Dimension::dim(int i) const { + return OutputImageParam::Dimension(param, i); +} + OutputImageParam &OutputImageParam::set_host_alignment(int bytes) { param.set_host_alignment(bytes); return *this; } -OutputImageParam &OutputImageParam::set_bounds(int dim, Expr min, Expr extent) { - return set_min(dim, min).set_extent(dim, extent); -} - int OutputImageParam::dimensions() const { return param.dimensions(); } Expr OutputImageParam::left() const { user_assert(dimensions() > 0) << "Can't ask for the left of a zero-dimensional image\n"; - return min(0); + return dim(0).min(); } Expr OutputImageParam::right() const { user_assert(dimensions() > 0) << "Can't ask for the right of a zero-dimensional image\n"; - return Internal::Add::make(min(0), Internal::Sub::make(extent(0), 1)); + return dim(0).max(); } Expr OutputImageParam::top() const { user_assert(dimensions() > 1) << "Can't ask for the top of a zero- or one-dimensional image\n"; - return min(1); + return dim(1).min(); } Expr OutputImageParam::bottom() const { user_assert(dimensions() > 1) << "Can't ask for the bottom of a zero- or one-dimensional image\n"; - return Internal::Add::make(min(1), Internal::Sub::make(extent(1), 1)); + return dim(1).max(); } Expr OutputImageParam::width() const { user_assert(dimensions() > 0) << "Can't ask for the width of a zero-dimensional image\n"; - return extent(0); + return dim(0).extent(); } Expr OutputImageParam::height() const { user_assert(dimensions() > 1) << "Can't ask for the height of a zero or one-dimensional image\n"; - return extent(1); + return dim(1).extent(); } Expr OutputImageParam::channels() const { user_assert(dimensions() > 2) << "Can't ask for the channels of an image with fewer than three dimensions\n"; - return extent(2); + return dim(2).extent(); } Internal::Parameter OutputImageParam::parameter() const { diff --git a/src/OutputImageParam.h b/src/OutputImageParam.h index 7956f278ac4d..960c444582c2 100644 --- a/src/OutputImageParam.h +++ b/src/OutputImageParam.h @@ -20,9 +20,89 @@ class OutputImageParam { /** Is this an input or an output? OutputImageParam is the base class for both. */ Argument::Kind kind; + void add_implicit_args_if_placeholder(std::vector &args, + Expr last_arg, + int total_args, + bool *placeholder_seen) const; public: - /** Construct a nullptr image parameter handle. */ + struct Dimension { + /** Get an expression representing the minimum coordinates of this image + * parameter in the given dimension. */ + EXPORT Expr min() const; + + /** Get an expression representing the extent of this image + * parameter in the given dimension */ + EXPORT Expr extent() const; + + /** Get an expression representing the maximum coordinates of + * this image parameter in the given dimension. */ + EXPORT Expr max() const; + + /** Get an expression representing the stride of this image in the + * given dimension */ + EXPORT Expr stride() const; + + /** Set the min in a given dimension to equal the given + * expression. Setting the mins to zero may simplify some + * addressing math. */ + EXPORT Dimension set_min(Expr e); + + /** Set the extent in a given dimension to equal the given + * expression. Images passed in that fail this check will generate + * a runtime error. Returns a reference to the ImageParam so that + * these calls may be chained. + * + * This may help the compiler generate better + * code. E.g: + \code + im.dim(0).set_extent(100); + \endcode + * tells the compiler that dimension zero must be of extent 100, + * which may result in simplification of boundary checks. The + * value can be an arbitrary expression: + \code + im.dim(0).set_extent(im.dim(1).extent()); + \endcode + * declares that im is a square image (of unknown size), whereas: + \code + im.dim(0).set_extent((im.dim(0).extent()/32)*32); + \endcode + * tells the compiler that the extent is a multiple of 32. */ + EXPORT Dimension set_extent(Expr e); + + /** Set the stride in a given dimension to equal the given + * value. This is particularly helpful to set when + * vectorizing. Known strides for the vectorized dimension + * generate better code. */ + EXPORT Dimension set_stride(Expr e); + + /** Set the min and extent in one call. */ + EXPORT Dimension set_bounds(Expr min, Expr extent); + + /** Get a different dimension of the same buffer */ + // @{ + EXPORT Dimension dim(int i); + EXPORT const Dimension dim(int i) const; + // @} + + private: + friend class OutputImageParam; + + /** Construct a Dimension representing dimension d of some + * Internal::Parameter p. Only OutputImageParam may construct + * these. */ + Dimension(const Internal::Parameter &p, int d) : param(p), d(d) {} + + /** Only OutputImageParam may copy these, too. This prevents + * users removing constness by making a non-const copy. */ + Dimension(const Dimension &) = default; + + Internal::Parameter param; + int d; + }; + + /** Construct a null image parameter handle. */ OutputImageParam() {} /** Construct an OutputImageParam that wraps an Internal Parameter object. */ @@ -37,66 +117,32 @@ class OutputImageParam { /** Is this parameter handle non-nullptr */ EXPORT bool defined() const; - /** Get an expression representing the minimum coordinates of this image - * parameter in the given dimension. */ - EXPORT Expr min(int x) const; - - /** Get an expression representing the extent of this image - * parameter in the given dimension */ - EXPORT Expr extent(int x) const; - - /** Get an expression representing the stride of this image in the - * given dimension */ - EXPORT Expr stride(int x) const; - - /** Get the ailgnment of the host pointer. Use set_host_alignment - * to change the default value of 1. */ + /** Get a handle on one of the dimensions for the purposes of + * inspecting or constraining its min, extent, or stride. */ + EXPORT Dimension dim(int i); + + /** Get a handle on one of the dimensions for the purposes of + * inspecting its min, extent, or stride. */ + EXPORT const Dimension dim(int i) const; + + /** Get or constrain the shape of the dimensions. Soon to be + * deprecated. Do not use. */ + // @{ + OutputImageParam set_min(int i, Expr e) {dim(i).set_min(e); return *this;} + OutputImageParam set_extent(int i, Expr e) {dim(i).set_extent(e); return *this;} + OutputImageParam set_bounds(int i, Expr a, Expr b) {dim(i).set_bounds(a, b); return *this;} + OutputImageParam set_stride(int i, Expr e) {dim(i).set_stride(e); return *this;} + Expr min(int i) {return dim(i).min();} + Expr extent(int i) {return dim(i).extent();} + Expr stride(int i) {return dim(i).stride();} + // @} + + /** Get the alignment of the host pointer in bytes. Defaults to + * the size of type. */ EXPORT int host_alignment() const; - /** Set the extent in a given dimension to equal the given - * expression. Images passed in that fail this check will generate - * a runtime error. Returns a reference to the ImageParam so that - * these calls may be chained. - * - * This may help the compiler generate better - * code. E.g: - \code - im.set_extent(0, 100); - \endcode - * tells the compiler that dimension zero must be of extent 100, - * which may result in simplification of boundary checks. The - * value can be an arbitrary expression: - \code - im.set_extent(0, im.extent(1)); - \endcode - * declares that im is a square image (of unknown size), whereas: - \code - im.set_extent(0, (im.extent(0)/32)*32); - \endcode - * tells the compiler that the extent is a multiple of 32. */ - EXPORT OutputImageParam &set_extent(int dim, Expr extent); - - /** Set the min in a given dimension to equal the given - * expression. Setting the mins to zero may simplify some - * addressing math. */ - EXPORT OutputImageParam &set_min(int dim, Expr min); - - /** Set the stride in a given dimension to equal the given - * value. This is particularly helpful to set when - * vectorizing. Known strides for the vectorized dimension - * generate better code. */ - EXPORT OutputImageParam &set_stride(int dim, Expr stride); - - /** Set the alignment of the host pointer. On some architectures - * an unaligned load/store is significantly more expensive in - * terms of performance than an aligned load/store. This allows - * the user to align external buffers favorably so that halide - * can generate aligned loads/stores as appropriate. The alignment - * should be a power of 2. */ - EXPORT OutputImageParam &set_host_alignment(int bytes); - - /** Set the min and extent in one call. */ - EXPORT OutputImageParam &set_bounds(int dim, Expr min, Expr extent); + /** Set the expected alignment of the host pointer in bytes. */ + EXPORT OutputImageParam &set_host_alignment(int); /** Get the dimensionality of this image parameter */ EXPORT int dimensions() const; diff --git a/src/Pipeline.cpp b/src/Pipeline.cpp index bbc05fa89511..07f866542415 100644 --- a/src/Pipeline.cpp +++ b/src/Pipeline.cpp @@ -261,7 +261,7 @@ void Pipeline::compile_to_static_library(const string &filename_prefix, m.compile(outputs); } -void Pipeline::compile_to_multitarget_static_library(const std::string &filename_prefix, +void Pipeline::compile_to_multitarget_static_library(const std::string &filename_prefix, const std::vector &args, const std::vector &targets) { auto module_producer = [this, &args](const std::string &name, const Target &target) -> Module { @@ -940,16 +940,14 @@ vector Pipeline::prepare_jit_call_arguments(Realization dst, const Type type = output_buffer_types[i].type; user_assert(dst[i].dimensions() == dims) << "Can't realize Func \"" << func.name() - << "\" into Buffer \"" << dst[i].name() - << "\" because Buffer \"" << dst[i].name() - << "\" is " << dst[i].dimensions() << "-dimensional" - << ", but Func \"" << func.name() + << "\" into Buffer at " << (void *)dst[i].host_ptr() + << " because Buffer is " << dst[i].dimensions() + << "-dimensional, but Func \"" << func.name() << "\" is " << dims << "-dimensional.\n"; user_assert(dst[i].type() == type) << "Can't realize Func \"" << func.name() - << "\" into Buffer \"" << dst[i].name() - << "\" because Buffer \"" << dst[i].name() - << "\" has type " << dst[i].type() + << "\" into Buffer at " << (void *)dst[i].host_ptr() + << " because Buffer has type " << Type(dst[i].type()) << ", but Func \"" << func.name() << "\" has type " << type << ".\n"; } @@ -983,12 +981,10 @@ vector Pipeline::prepare_jit_call_arguments(Realization dst, const } // Then the outputs - for (Buffer buf : dst.as_vector()) { - internal_assert(buf.defined()) << "Can't realize into an undefined Buffer\n"; + for (const Buffer &buf : dst.as_vector()) { arg_values.push_back(buf.raw_buffer()); const void *ptr = arg_values.back(); - debug(1) << "JIT output buffer " << buf.name() - << " @ " << ptr << "\n"; + debug(1) << "JIT output buffer @ " << ptr << "\n"; } return arg_values; @@ -1237,42 +1233,24 @@ void Pipeline::infer_input_bounds(Realization dst) { << buf.min[2] + buf.extent[2] << "," << buf.min[3] + buf.extent[3] << ")\n"; - // Figure out how much memory to allocate for this buffer - size_t min_idx = 0, max_idx = 0; - for (int d = 0; d < 4; d++) { - if (buf.stride[d] > 0) { - min_idx += buf.min[d] * buf.stride[d]; - max_idx += (buf.min[d] + buf.extent[d] - 1) * buf.stride[d]; - } else { - max_idx += buf.min[d] * buf.stride[d]; - min_idx += (buf.min[d] + buf.extent[d] - 1) * buf.stride[d]; - } - } - size_t total_size = (max_idx - min_idx); - while (total_size & 0x1f) total_size++; - - // Allocate enough memory with the right dimensionality. - Buffer buffer(ia.param.type(), total_size, - buf.extent[1] > 0 ? 1 : 0, - buf.extent[2] > 0 ? 1 : 0, - buf.extent[3] > 0 ? 1 : 0); - - // Rewrite the buffer fields to match the ones returned - for (int d = 0; d < 4; d++) { - buffer.raw_buffer()->min[d] = buf.min[d]; - buffer.raw_buffer()->stride[d] = buf.stride[d]; - buffer.raw_buffer()->extent[d] = buf.extent[d]; - } - ia.param.set_buffer(buffer); + Image im(ia.param.type(), buf); + im.allocate(); + ia.param.set_buffer(im); } } void Pipeline::infer_input_bounds(int x_size, int y_size, int z_size, int w_size) { user_assert(defined()) << "Can't infer input bounds on an undefined Pipeline.\n"; + vector size; + if (x_size) size.push_back(x_size); + if (y_size) size.push_back(y_size); + if (z_size) size.push_back(z_size); + if (w_size) size.push_back(w_size); + vector bufs; for (Type t : contents->outputs[0].output_types()) { - bufs.push_back(Buffer(t, x_size, y_size, z_size, w_size)); + bufs.push_back(Buffer(t, size)); } Realization r(bufs); infer_input_bounds(r); diff --git a/src/Pipeline.h b/src/Pipeline.h index 7be4b1879b73..89b25254b5ae 100644 --- a/src/Pipeline.h +++ b/src/Pipeline.h @@ -11,7 +11,6 @@ #include "Buffer.h" #include "IntrusivePtr.h" -#include "Image.h" #include "JITModule.h" #include "Module.h" #include "Tuple.h" @@ -363,8 +362,8 @@ class Pipeline { EXPORT void realize(Realization dst, const Target &target = Target()); EXPORT void realize(Buffer dst, const Target &target = Target()); - template - NO_INLINE void realize(Image dst, const Target &target = Target()) { + template + NO_INLINE void realize(Image dst, const Target &target = Target()) { // Images are expected to exist on-host. realize(Buffer(dst), target); dst.copy_to_host(); @@ -380,6 +379,17 @@ class Pipeline { EXPORT void infer_input_bounds(int x_size = 0, int y_size = 0, int z_size = 0, int w_size = 0); EXPORT void infer_input_bounds(Realization dst); EXPORT void infer_input_bounds(Buffer dst); + + template + NO_INLINE void infer_input_bounds(Image &im) { + // It's possible for bounds inference to also manipulate + // output buffers if their host pointer is null, so we must + // take Images by reference and communicate the bounds query + // result by modifying the argument. + Buffer b(im); + infer_input_bounds(b); + im = b.get(); + } // @} /** Infer the arguments to the Pipeline, sorted into a canonical order: diff --git a/src/RDom.cpp b/src/RDom.cpp index 408987ad82b1..f2c1417bb2fc 100644 --- a/src/RDom.cpp +++ b/src/RDom.cpp @@ -147,8 +147,8 @@ RDom::RDom(Buffer b) { for (int i = 0; i < b.dimensions(); i++) { ReductionVariable var = { b.name() + "$" + var_names[i], - b.min(i), - b.extent(i) + b.dim(i).min(), + b.dim(i).extent() }; vars.push_back(var); } diff --git a/src/RDom.h b/src/RDom.h index 08b7785a0974..dd2a4fa3b6f6 100644 --- a/src/RDom.h +++ b/src/RDom.h @@ -215,6 +215,8 @@ class RDom { // @{ EXPORT RDom(Buffer); EXPORT RDom(ImageParam); + template + NO_INLINE RDom(const Image &im) : RDom(Buffer(im)) {} // @} /** Construct a reduction domain that wraps an Internal ReductionDomain object. */ diff --git a/src/Tuple.h b/src/Tuple.h index 7af0adce062f..8749b4e9be27 100644 --- a/src/Tuple.h +++ b/src/Tuple.h @@ -89,9 +89,16 @@ class Realization { return buffers[0]; } + /** Single-element realizations are implicitly castable to Images. */ + template + operator Image() const { + return buffers[0]; + } + /** Construct a Realization from some Buffers. */ //@{ - template + template::value>> Realization(Buffer a, Buffer b, Args&&... args) { buffers = std::vector{a, b, std::forward(args)...}; } diff --git a/tools/halide_image.h b/src/runtime/HalideImage.h similarity index 66% rename from tools/halide_image.h rename to src/runtime/HalideImage.h index 11ca5190a891..35472709dc51 100644 --- a/tools/halide_image.h +++ b/src/runtime/HalideImage.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,9 +22,7 @@ struct halide_dimension_t { int min, extent, stride; }; - namespace Halide { -namespace Tools { template void for_each_element(const buffer_t &buf, Fn &&f); @@ -31,9 +30,12 @@ void for_each_element(const buffer_t &buf, Fn &&f); // Forward-declare our Image class template class Image; -// This template exists so that Image is extensible with custom -// operator()(Args...) methods. -template struct ImageAccessor; +// This declaration exists so that Image is extensible with custom +// operator()(Args...) methods. Add implementations of it for whatever +// types you like. Use enable_if if necessary to stop the overloads +// being ambiguous. +template +Ret image_accessor(const Image &, Args...); // A helper to check if a parameter pack is entirely implicitly // int-convertible to use with std::enable_if @@ -65,16 +67,50 @@ struct AllInts { static const bool value = false; }; -/** A class that wraps buffer_t and adds functionality. Acts as a base - * class for the typed version below. Templated on the maximum - * dimensionality it supports. Use it only when the the element type - * is unknown, or generic. See the comments on the Image class below - * for more details. */ -template -class Buffer { - static_assert(D <= 4, "buffer_t supports a maximum of four dimensions"); +/** A struct acting as a header for allocations owned by the Image + * class itself. */ +struct AllocationHeader { + void (*deallocate_fn)(void *); + std::atomic ref_count; +}; + +/** A templated Image class that wraps buffer_t and adds + * functionality. When using Halide from C++, this is the preferred + * way to create input and output buffers. The overhead of using this + * class relative to a naked buffer_t is minimal - it uses another + * ~100 bytes on the stack, and does no dynamic allocations when using + * it to represent existing memory. This overhead will shrink further + * in the future once buffer_t is deprecated. + * + * The template parameter T is the element type, and D is the maximum + * number of dimensions. It must be less than or equal to 4 for now. + * + * The class optionally allocates and owns memory for the image using + * a shared pointer allocated with the provided allocator. If they are + * null, malloc and free are used. Any device-side allocation is + * considered as owned if and only if the host-side allocation is + * owned. + * + * For accessing the shape and type, this class provides both the + * buffer_t interface (extent[i], min[i], and stride[i] arrays, the + * elem_size field), and also the interface of the yet-to-come + * halide_buffer_t, which will replace buffer_t. This is intended to + * allow a gradual transition to halide_buffer_t. New code should + * access the shape via dim[i].extent, dim[i].min, dim[i].stride, and + * the type via the 'type' field. */ +template +class Image { + // Some helpers for checking properties of T + static const bool T_is_void = std::is_same::value; + using not_void_T = typename std::conditional::type; -protected: + // Get the Halide type of T. Callers should not use the result if + // T is void. + static halide_type_t static_halide_type() { + return halide_type_of(); + } + + static_assert(D <= 4, "buffer_t supports a maximum of four dimensions"); buffer_t buf = {0}; @@ -88,7 +124,30 @@ class Buffer { /** The allocation owned by this Image. NULL if the Image does not * own the memory. */ - std::shared_ptr alloc; + AllocationHeader *alloc = nullptr; + + /** Increment the reference count of any allocation */ + void incref() { + if (alloc) { + alloc->ref_count++; + } + } + + /** Decrement the reference count of any allocation and free host + * and device memory if it hits zero. Sets alloc to nullptr. */ + void decref() { + if (alloc) { + int result = --(alloc->ref_count); + if (result == 0) { + if (buf.dev) { + device_free(); + } + void (*fn)(void *) = alloc->deallocate_fn; + fn(alloc); + } + alloc = nullptr; + } + } /** A temporary helper function to get the number of dimensions in * a buffer_t. Will disappear when halide_buffer_t is merged. */ @@ -125,6 +184,19 @@ class Buffer { void initialize_shape(int) { } + /** Initialize the shape from a vector of extents */ + void initialize_shape(const std::vector &sizes) { + for (size_t i = 0; i < sizes.size(); i++) { + buf.min[i] = 0; + buf.extent[i] = sizes[i]; + if (i == 0) { + buf.stride[i] = 1; + } else { + buf.stride[i] = buf.stride[i-1] * buf.extent[i-1]; + } + } + } + /** Initialize the shape from the static shape of an array */ template void initialize_shape_from_array_shape(int next, Array (&vals)[N]) { @@ -139,30 +211,30 @@ class Buffer { } /** Base case for the template recursion above. */ - template - void initialize_shape_from_array_shape(int, const T &) { + template + void initialize_shape_from_array_shape(int, const T2 &) { } /** Get the dimensionality of a multi-dimensional C array */ template static int dimensionality_of_array(Array (&vals)[N]) { - return Buffer::dimensionality_of_array(vals[0]) + 1; + return dimensionality_of_array(vals[0]) + 1; } - template - static int dimensionality_of_array(const T &) { + template + static int dimensionality_of_array(const T2 &) { return 0; } /** Get the underlying halide_type_t of an array's element type. */ template static halide_type_t scalar_type_of_array(Array (&vals)[N]) { - return Buffer::scalar_type_of_array(vals[0]); + return scalar_type_of_array(vals[0]); } - template - static halide_type_t scalar_type_of_array(const T &) { - return halide_type_of::type>(); + template + static halide_type_t scalar_type_of_array(const T2 &) { + return halide_type_of::type>(); } /** Check if any args in a parameter pack are zero */ @@ -176,8 +248,17 @@ class Buffer { return false; } + static bool any_zero(const std::vector &v) { + for (int i : v) { + if (i == 0) return true; + } + return false; + } + public: + typedef T ElemType; + /** Read-only access to the shape */ class Dimension { const buffer_t &buf; @@ -218,6 +299,13 @@ class Buffer { return Dimension(buf, i); } + /** Access to the mins, strides, extents. Will be deprecated. Do not use. */ + // @{ + int min(int i) const { return dim(i).min(); } + int extent(int i) const { return dim(i).extent(); } + int stride(int i) const { return dim(i).stride(); } + // @} + /** The total number of elements this buffer represents. Equal to * the product of the extents */ size_t number_of_elements() const { @@ -240,18 +328,18 @@ class Buffer { /** A pointer to the element with the lowest address. If all * strides are positive, equal to the host pointer. */ - uint8_t *begin() const { + T *begin() const { ptrdiff_t index = 0; for (int i = 0; i < dimensions(); i++) { if (dim(i).stride() < 0) { index += dim(i).stride() * (dim(i).extent() - 1); } } - return buf.host + index * buf.elem_size; + return (T *)(buf.host + index * buf.elem_size); } /** A pointer to one beyond the element with the highest address. */ - uint8_t *end() const { + T *end() const { ptrdiff_t index = 0; for (int i = 0; i < dimensions(); i++) { if (dim(i).stride() > 0) { @@ -259,87 +347,167 @@ class Buffer { } } index += 1; - return buf.host + index * buf.elem_size; + return (T *)(buf.host + index * buf.elem_size); } /** The total number of bytes spanned by the data in memory. */ size_t size_in_bytes() const { - return (size_t)(end() - begin()); + return (size_t)((uint8_t *)end() - (uint8_t *)begin()); } - Buffer() {} + Image() {} /** Make a buffer from a buffer_t */ - Buffer(const buffer_t &buf) { + Image(const buffer_t &buf) : ty(static_halide_type()) { + static_assert(!T_is_void, "Can't construct an Image from a buffer_t. Type is unknown."); initialize_from_buffer(buf); } - /** Give Buffers access to the members of Buffers of different dimensionalities. */ - template friend class Buffer; + Image(halide_type_t t, const buffer_t &buf) : ty(t) { + initialize_from_buffer(buf); + } - /** Make a Buffer from another Buffer of possibly-different - * dimensionality. Asserts if D is less than the dimensionality of - * the argument. */ - template - Buffer(const Buffer &other) : buf(other.buf), - dims(other.dims), - ty(other.ty), - alloc(other.alloc) { + /** Give Images access to the members of Images of different dimensionalities and types. */ + template friend class Image; + + /** Fail an assertion at runtime or compile-time if an Image + * cannot be constructed from some other Image type. */ + template + void assert_can_convert_from(const Image &other) { + static_assert((std::is_same::type, T2>::value || + T_is_void || + std::is_same::value), + "type mismatch constructing Image"); if (D < D2) { assert(other.dimensions() <= D); } + if (std::is_same::value && !T_is_void) { + assert(other.ty == static_halide_type()); + } + } + + /** Make a Image from another Image of possibly-different + * dimensionality and type. Asserts if D is less than the + * dimensionality of the argument, or if there's a type + * mismatch. */ + template + Image(const Image &other) : buf(other.buf), + dims(other.dims), + ty(other.ty), + alloc(other.alloc) { + incref(); + assert_can_convert_from(other); + } + + Image(const Image &other) : buf(other.buf), + dims(other.dims), + ty(other.ty), + alloc(other.alloc) { + incref(); } - /** Move-construct a Buffer from another Buffer of + /** Move-construct an Image from another Image of * possibly-different dimensionality. Asserts if D is less than - * the dimensionality of the argument. */ - template - Buffer(const Buffer &&other) : buf(other.buf), - dims(other.dims), - ty(other.ty), - alloc(std::move(other.alloc)) { - if (D < D2) { - assert(other.dimensions() <= D); - } + * the dimensionality of the argument, or if there's a type + * mismatch. */ + template + Image(Image &&other) : buf(other.buf), + dims(other.dims), + ty(other.ty), + alloc(other.alloc) { + other.alloc = nullptr; + assert_can_convert_from(other); } + Image(Image &&other) : buf(other.buf), + dims(other.dims), + ty(other.ty), + alloc(other.alloc) { + other.alloc = nullptr; + } - /** Assign from another Buffer of possibly-different - * dimensionality. Asserts if D is less than the dimensionality of - * the argument. */ - template - Buffer &operator=(const Buffer &other) { - if (D < D2) { - assert(other.dimensions() <= D); + /** Assign from another Image of possibly-different dimensionality + * and type. Asserts if D is less than the dimensionality of the + * argument, or if there's a type mismatch. */ + template + Image &operator=(const Image &other) { + assert_can_convert_from(other); + buf = other.buf; + ty = other.ty; + dims = other.dims; + if (alloc != other.alloc) { + // Drop existing allocation + decref(); + // Share other allocation + alloc = other.alloc; + incref(); } + return *this; + } + + Image &operator=(const Image &other) { buf = other.buf; ty = other.ty; dims = other.dims; - alloc = other.alloc; + if (alloc != other.alloc) { + // Drop existing allocation + decref(); + // Share other allocation + alloc = other.alloc; + incref(); + } return *this; } - /** Move from another Buffer of possibly-different - * dimensionality. Asserts if D is less than the dimensionality of - * the argument. */ - template - Buffer &operator=(const Buffer &&other) { - if (D < D2) { - assert(other.dimensions() <= D); + /** Move from another Image of possibly-different dimensionality + * and type. Asserts if D is less than the dimensionality of the + * argument, or if there's a type mismatch. */ + template + Image &operator=(Image &&other) { + assert_can_convert_from(other); + buf = other.buf; + ty = other.ty; + dims = other.dims; + if (alloc != other.alloc) { + // Drop existing allocation + decref(); + // Steal other allocation + alloc = other.alloc; + other.alloc = nullptr; } + return *this; + } + + Image &operator=(Image &&other) { buf = other.buf; ty = other.ty; dims = other.dims; - alloc = std::move(other.alloc); + if (alloc != other.alloc) { + // Drop existing allocation + decref(); + // Steal other allocation + alloc = other.alloc; + other.alloc = nullptr; + } return *this; } + /** Check the product of the extents fits in memory. */ + void check_overflow() { + size_t size = 1; + for (int i = 0; i < dimensions(); i++) { + size *= dim(i).extent(); + } + for (int i = 0; i < dimensions(); i++) { + size /= dim(i).extent(); + } + assert(size == 1 && "Error: Overflow computing total size of buffer."); + } + /** Allocate memory for this Image. Drops the reference to any * existing memory. */ void allocate(void *(*allocate_fn)(size_t) = nullptr, void (*deallocate_fn)(void *) = nullptr) { - assert(buf.dev == 0); - if (!allocate_fn) { allocate_fn = malloc; } @@ -347,35 +515,80 @@ class Buffer { deallocate_fn = free; } + // Drop any existing allocation + decref(); + // Conservatively align images to 128 bytes. This is enough // alignment for all the platforms we might use. size_t size = size_in_bytes(); const size_t alignment = 128; size = (size + alignment - 1) & ~(alignment - 1); - uint8_t *ptr; - ptr = (uint8_t *)allocate_fn(size + alignment - 1); - alloc.reset(ptr, deallocate_fn); - buf.host = (uint8_t *)((uintptr_t)(ptr + alignment - 1) & ~(alignment - 1)); + alloc = (AllocationHeader *)allocate_fn(size + sizeof(AllocationHeader) + alignment - 1); + alloc->deallocate_fn = deallocate_fn; + alloc->ref_count = 1; + uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader); + buf.host = (uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1)); + } + + /** Allocate a new image of the given size with a runtime + * type. Only used when you do know what size you want but you + * don't know statically what type the elements are. Pass zeroes + * to make a buffer suitable for bounds query calls. */ + template + Image(halide_type_t t, int first, Args&&... rest) : ty(t) { + if (!T_is_void) { + assert(static_halide_type() == t); + } + static_assert(sizeof...(rest) < D, + "Too many arguments to constructor. Use Image, " + "where D is at least the desired number of dimensions"); + initialize_shape(0, first, int(rest)...); + buf.elem_size = ty.bytes(); + dims = 1 + (int)(sizeof...(rest)); + if (!any_zero(first, int(rest)...)) { + check_overflow(); + allocate(); + } } - /** Allocate a new image of the given size. Pass zeroes to make a + + /** Allocate a new image of the given size. Pass zeroes to make a * buffer suitable for bounds query calls. */ template - Buffer(halide_type_t t, int first, Args&&... rest) : ty(t) { + Image(int first, Args&&... rest) : ty(static_halide_type()) { + static_assert(!T_is_void, + "To construct an Image, pass a halide_type_t as the first argument to the constructor"); static_assert(sizeof...(rest) < D, - "Too many arguments to constructor. Use Image, where D is at least the desired number of dimensions"); + "Too many arguments to constructor. Use Image, " + "where D is at least the desired number of dimensions"); initialize_shape(0, first, int(rest)...); buf.elem_size = ty.bytes(); dims = 1 + (int)(sizeof...(rest)); if (!any_zero(first, int(rest)...)) { + check_overflow(); + allocate(); + } + } + + /** Allocate a new image of unknown type using a vector of ints as the size. */ + Image(halide_type_t t, const std::vector &sizes) : ty(t) { + if (!T_is_void) { + assert(static_halide_type() == t); + } + assert(sizes.size() <= D); + initialize_shape(sizes); + buf.elem_size = ty.bytes(); + dims = (int)sizes.size(); + if (!any_zero(sizes)) { + check_overflow(); allocate(); } } - /** Make a Buffer that refers to a statically sized array. Does not + /** Make an Image that refers to a statically sized array. Does not * take ownership of the data. */ template - explicit Buffer(Array (&vals)[N]) { + explicit Image(Array (&vals)[N]) { dims = dimensionality_of_array(vals); initialize_shape_from_array_shape(dims - 1, vals); ty = scalar_type_of_array(vals); @@ -383,13 +596,32 @@ class Buffer { buf.host = (uint8_t *)vals; } - /** Initialize a Buffer from a pointer and some sizes. Assumes + /** Initialize an Image of runtime type from a pointer and some + * sizes. Assumes dense row-major packing and a min coordinate of + * zero. Does not take ownership of the data. */ + template + explicit Image(halide_type_t t, void *data, int first, Args&&... rest) { + if (!T_is_void) { + assert(static_halide_type() == t); + } + static_assert(sizeof...(rest) < D, + "Too many arguments to constructor. Use Image, " + "where D is at least the desired number of dimensions"); + ty = t; + initialize_shape(0, first, int(rest)...); + buf.elem_size = ty.bytes(); + dims = 1 + (int)(sizeof...(rest)); + buf.host = (uint8_t *)data; + } + + /** Initialize an Image from a pointer and some sizes. Assumes * dense row-major packing and a min coordinate of zero. Does not * take ownership of the data. */ - template - explicit Buffer(T *data, int first, Args&&... rest) { + template + explicit Image(T *data, int first, Args&&... rest) { static_assert(sizeof...(rest) < D, - "Too many arguments to constructor. Use Image, where D is at least the desired number of dimensions"); + "Too many arguments to constructor. Use Image, " + "where D is at least the desired number of dimensions"); ty = halide_type_of::type>(); initialize_shape(0, first, int(rest)...); buf.elem_size = sizeof(T); @@ -400,8 +632,27 @@ class Buffer { /** Initialize an Image from a pointer to the min coordinate and * an array describing the shape. Does not take ownership of the * data. */ - template::type> - explicit Buffer(T *data, halide_dimension_t shape[N]) { + template::type> + explicit Image(halide_type_t t, void *data, halide_dimension_t shape[N]) { + if (!T_is_void) { + assert(static_halide_type() == t); + } + ty = t; + dims = N; + for (int i = 0; i < N; i++) { + buf.min[i] = shape[i].min; + buf.extent[i] = shape[i].extent; + buf.stride[i] = shape[i].stride; + } + buf.elem_size = ty.bytes(); + buf.host = (uint8_t *)data; + } + + /** Initialize an Image from a pointer to the min coordinate and + * an array describing the shape. Does not take ownership of the + * data. */ + template::type> + explicit Image(T *data, halide_dimension_t shape[N]) { ty = halide_type_of::type>(); dims = N; for (int i = 0; i < N; i++) { @@ -413,8 +664,10 @@ class Buffer { buf.host = (uint8_t *)data; } - template - explicit Buffer(T *data, halide_dimension_t shape[D]) { + explicit Image(halide_type_t t, void *data, halide_dimension_t shape[D]) { + if (!T_is_void) { + assert(static_halide_type() == t); + } ty = halide_type_of::type>(); dims = 0; for (int i = 0; i < D; i++) { @@ -428,18 +681,22 @@ class Buffer { buf.host = (uint8_t *)data; } - /** If you use the (x, y, c) indexing convention, then Halide - * Images are stored planar by default. This function constructs - * an interleaved RGB or RGBA image that can still be indexed - * using (x, y, c). Passing it to a generator requires that the - * generator has been compiled with support for interleaved (also - * known as packed or chunky) memory layouts. */ - static Buffer make_interleaved(halide_type_t t, int width, int height, int channels) { - static_assert(D >= 3, "Not enough dimensions to make an interleaved image"); - Buffer im(t, channels, width, height); - im.transpose(0, 1); - im.transpose(1, 2); - return im; + explicit Image(T *data, halide_dimension_t shape[D]) { + ty = halide_type_of::type>(); + dims = 0; + for (int i = 0; i < D; i++) { + if (!shape[i].extent) break; + dims++; + buf.min[i] = shape[i].min; + buf.extent[i] = shape[i].extent; + buf.stride[i] = shape[i].stride; + } + buf.elem_size = sizeof(T); + buf.host = (uint8_t *)data; + } + + ~Image() { + decref(); } /** Get a pointer to the raw buffer_t this wraps. */ @@ -453,6 +710,16 @@ class Buffer { } // @} + /** Access to the untyped host pointer */ + // @{ + const void *host_ptr() const { + return buf.host; + } + void *host_ptr() { + return buf.host; + } + // @} + /** Provide a cast operator to buffer_t *, so that instances can * be passed directly to Halide filters. */ operator buffer_t *() { @@ -476,9 +743,9 @@ class Buffer { * or slice followed by copy to make a copy of only a portion of * the image. The new image uses the same memory layout as the * original, with holes compacted away. */ - Buffer copy(void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr) const { - Buffer src = *this; + Image copy(void *(*allocate_fn)(size_t) = nullptr, + void (*deallocate_fn)(void *) = nullptr) const { + Image src = *this; // Reorder the dimensions of src to have strides in increasing order int swaps[(D*(D+1))/2]; @@ -493,12 +760,12 @@ class Buffer { } // Make a copy of it using this dimension ordering - Buffer dst = src; + Image dst = src; dst.allocate(allocate_fn, deallocate_fn); // Concatenate dense inner dimensions into contiguous memcpy tasks - Buffer src_slice = src; - Buffer dst_slice = dst; + Image src_slice = src; + Image dst_slice = dst; int64_t slice_size = 1; while (src_slice.dimensions && src_slice.dim(0).stride() == slice_size) { assert(dst_slice.dim(0).stride() == slice_size); @@ -526,10 +793,10 @@ class Buffer { * the given dimension. Does not assert the crop region is within * the existing bounds. The cropped image drops any device * handle. */ - Buffer cropped(int d, int min, int extent) const { + Image cropped(int d, int min, int extent) const { // Make a fresh copy of the underlying buffer (but not a fresh // copy of the allocation, if there is one). - Buffer im = *this; + Image im = *this; // Drop the reference to any device allocation. It won't be // valid for the cropped image. im.buf.dev = 0; @@ -551,10 +818,10 @@ class Buffer { /** Make an image that refers to a sub-rectangle of this image along * the first N dimensions. Does not assert the crop region is within * the existing bounds. The cropped image drops any device handle. */ - Buffer cropped(const std::vector> &rect) const { + Image cropped(const std::vector> &rect) const { // Make a fresh copy of the underlying buffer (but not a fresh // copy of the allocation, if there is one). - Buffer im = *this; + Image im = *this; // Drop the reference to any device allocation. It won't be // valid for the cropped image. im.buf.dev = 0; @@ -573,8 +840,8 @@ class Buffer { * translated coordinates in the given dimension. Positive values * move the image data to the right or down relative to the * coordinate system. Drops any device handle. */ - Buffer translated(int d, int dx) const { - Buffer im = *this; + Image translated(int d, int dx) const { + Image im = *this; im.buf.dev = 0; im.translate(d, dx); return im; @@ -588,7 +855,7 @@ class Buffer { /** Make an image which refers to the same data translated along * the first N dimensions. */ void translated(const std::vector &delta) { - Buffer im = *this; + Image im = *this; im.buf.dev = 0; im.translate(delta); return im; @@ -601,10 +868,21 @@ class Buffer { } } + /** Set the min coordinate of an image in the first N dimensions */ + template + void set_min(Args... args) { + static_assert(sizeof...(args) <= D, "Too many arguments for dimensionality of Image"); + assert(sizeof...(args) <= (size_t)dimensions()); + const int x[] = {args...}; + for (size_t i = 0; i < sizeof...(args); i++) { + buf.min[i] = x[i]; + } + } + /** Make an image which refers to the same data using a different * ordering of the dimensions. */ - Buffer transposed(int d1, int d2) const { - Buffer im = *this; + Image transposed(int d1, int d2) const { + Image im = *this; im.transpose(d1, d2); return im; } @@ -618,11 +896,11 @@ class Buffer { /** Make a lower-dimensional image that refers to one slice of this * image. Drops any device handle. */ - Buffer sliced(int d, int pos) const { - Buffer im = *this; + Image sliced(int d, int pos) const { + Image im = *this; im.buf.dev = 0; im.slice(d, pos); - return Buffer(std::move(im)); + return Image(std::move(im)); } /** Slice an image in-place */ @@ -651,9 +929,9 @@ class Buffer { &im(x, y, c) == &im2(x, 17, y, c); \endcode */ - Buffer embedded(int d, int pos) const { + Image embedded(int d, int pos) const { assert(d >= 0 && d <= dimensions()); - Buffer im(*this); + Image im(*this); im.buf.dev = 0; im.add_dimension(); im.translate(im.dimensions() - 1, pos); @@ -696,7 +974,7 @@ class Buffer { * for_each_element below for more details. */ template void for_each_element(Fn f) const { - Halide::Tools::for_each_element(buf, f); + Halide::for_each_element(buf, f); } /** Methods for managing any GPU allocation. */ @@ -717,157 +995,39 @@ class Buffer { buf.dev_dirty = v; } - void copy_to_host() { + void copy_to_host(void *ctx = nullptr) { if (device_dirty()) { - halide_copy_to_host(NULL, &buf); + halide_copy_to_host(ctx, &buf); } } - void copy_to_device(const struct halide_device_interface *device_interface) { + void copy_to_device(const struct halide_device_interface *device_interface, void *ctx = nullptr) { if (host_dirty()) { - halide_copy_to_device(NULL, &buf, device_interface); + halide_copy_to_device(ctx, &buf, device_interface); } } - void device_free() { - halide_device_free(nullptr, &buf); - } - // @} -}; - -/** A templated Image class that wraps buffer_t and adds - * functionality. When using Halide from C++, this is the preferred - * way to create input and output buffers. The overhead of using this - * class relative to a naked buffer_t is minimal - it uses another - * ~100 bytes on the stack, and does no dynamic allocations when using - * it to represent existing memory. This overhead will shrink further - * in the future once buffer_t is deprecated. - * - * The template parameter T is the element type, and D is the maximum - * number of dimensions. It must be less than or equal to 4 for now. - * - * The class optionally allocates and owns memory for the image using - * a std::shared_ptr allocated with the provided allocator. If they - * are null, malloc and free are used. Any device-side allocation is - * not owned, and must be freed manually using device_free. - * - * For accessing the shape and type, this class provides both the - * buffer_t interface (extent[i], min[i], and stride[i] arrays, the - * elem_size field), and also the interface of the yet-to-come - * halide_buffer_t, which will replace buffer_t. This is intended to - * allow a gradual transition to halide_buffer_t. New code should - * access the shape via dim[i].extent, dim[i].min, dim[i].stride, and - * the type via the 'type' field. */ -template -class Image : public Buffer { - static_assert(D <= 4, "buffer_t supports a maximum of four dimensions"); - -public: - typedef T ElemType; - - /** Get the type of the elements. Overridden here because we - * statically know the type. */ - halide_type_t type() const { - return halide_type_of::type>(); - } - - Image() {} - - Image(const buffer_t &buf) : Buffer(buf) {} - - /** Allocate a new image of the given size. Pass zeroes to make a - * buffer suitable for bounds query calls. */ - template - Image(int first, Args&&... rest) : - Buffer(halide_type_of::type>(), first, int(rest)...) {} - - /** Make an image that refers to a statically sized array. Does not - * take ownership of the data. */ - template - explicit Image(Array (&vals)[N]) : - Buffer(vals) {} - - /** Initialize an Image from a pointer and some sizes. Assumes - * dense row-major packing and a min coordinate of zero. Does not - * take ownership of the data. */ - template - explicit Image(T *data, int first, Args&&... rest) : - Buffer(data, first, int(rest)...) {} - - /** Initialize an Image from a pointer to the min coordinate and - * an array describing the shape. Does not take ownership of the - * data. */ - template::type> - explicit Image(T *data, halide_dimension_t shape[N]) : Buffer(data, shape) {} - - /** Initialize an Image from a pointer to the min coordinate and - * an array describing the shape. Does not take ownership of the - * data. This version exists so that there's a non-templated - * version to use in case the Image is a derived type and so N - * can't be inferred in the version above. */ - explicit Image(T *data, halide_dimension_t shape[D]) : - Buffer(data, shape) {} - - /** Construct a typed Image from an untyped Buffer. Asserts at - * runtime if there's a type mismatch, or if the dimensionality of - * the buffer is less than D. */ - template - Image(const Buffer &buf) : Buffer(buf) { - assert(halide_type_of::type>() == buf.type()); - } - - /** Move-construct a typed Image from an untyped Buffer. Asserts - * at runtime if there's a type mismatch, or if the dimensionality - * of the buffer is less than D. */ - template - Image(const Buffer &&buf) : Buffer(buf) { - assert(halide_type_of::type>() == buf.type()); - } - - /** Construct an Image from an Image of a different - * dimensionality. Asserts at runtime the other dimensionality is - * greater than D. Asserts at compile-time if the element type - * doesn't match. This constructor is templated on the element - * type of the argument so that the Buffer constructor above is - * not used for Images with mismatched types. - */ - template - Image(const Image &buf) : Buffer(buf) { - static_assert(std::is_same::type, T2>::value, - "Can't construct an Image from an Image of different element type, " - "with the exception of casting an Image to an Image."); - } - - /** Move-construct an Image from an Image of a different - * dimensionality. Asserts at runtime the other dimensionality is - * greater than D. Asserts at compile-time if the element type - * doesn't match. - */ - template - Image(const Image &&buf) : Buffer(buf) { - static_assert(std::is_same::type, T2>::value, - "Can't construct an Image from an Image of different element type, " - "with the exception of casting an Image to an Image."); + void device_free(void *ctx = nullptr) { + halide_device_free(ctx, &buf); } - /** Assign an Image from an Image of a different - * dimensionality. Asserts at runtime the other dimensionality is - * greater than D. - */ - template - Image &operator=(const Image &other) { - Buffer::operator=(other); - return *this; + void device_sync(void *ctx = nullptr) { + halide_device_sync(ctx, &buf); } + // @} - /** Move-assign an Image from an Image of a different - * dimensionality. Asserts at runtime the other dimensionality is - * greater than D. - */ - template - Image &operator=(const Image &&other) { - Buffer::operator=(other); - return *this; + /** If you use the (x, y, c) indexing convention, then Halide + * Images are stored planar by default. This function constructs + * an interleaved RGB or RGBA image that can still be indexed + * using (x, y, c). Passing it to a generator requires that the + * generator has been compiled with support for interleaved (also + * known as packed or chunky) memory layouts. */ + static Image make_interleaved(halide_type_t t, int width, int height, int channels) { + static_assert(D >= 3, "Not enough dimensions to make an interleaved image"); + Image im(t, channels, width, height); + im.transpose(0, 1); + im.transpose(1, 2); + return im; } /** If you use the (x, y, c) indexing convention, then Halide @@ -884,12 +1044,36 @@ class Image : public Buffer { return im; } + /** Wrap an existing interleaved image. */ + static Image make_interleaved(halide_type_t t, T *data, int width, int height, int channels) { + static_assert(D >= 3, "Not enough dimensions to make an interleaved image"); + Image im(t, data, channels, width, height); + im.transpose(0, 1); + im.transpose(1, 2); + return im; + } + + /** Wrap an existing interleaved image. */ + static Image make_interleaved(T *data, int width, int height, int channels) { + static_assert(D >= 3, "Not enough dimensions to make an interleaved image"); + Image im(data, channels, width, height); + im.transpose(0, 1); + im.transpose(1, 2); + return im; + } + + /** Make a zero-dimensional Image */ + static Image make_scalar(halide_type_t t) { + return Image(t, 1).sliced(0, 0); + } + /** Make a zero-dimensional Image */ static Image make_scalar() { return Image(1).sliced(0, 0); } private: + template __attribute__((always_inline)) T *address_of(int d, int first, Args... rest) const { @@ -931,57 +1115,72 @@ class Image : public Buffer { //@{ template __attribute__((always_inline)) - typename std::enable_if::value, const T &>::type + typename std::enable_if::value, const not_void_T &>::type operator()(int first, Args... rest) const { - return *(address_of(0, first, rest...)); + static_assert(!T_is_void, + "Cannot use operator() on Image types"); + return *((const not_void_T *)(address_of(0, first, rest...))); } __attribute__((always_inline)) - const T &operator()() const { - return *(address_of(0)); + const not_void_T & + operator()() const { + static_assert(!T_is_void, + "Cannot use operator() on Image types"); + return *((const not_void_T *)(data())); } __attribute__((always_inline)) - const T &operator()(const int *pos) const { - return *((T *)address_of(pos)); + const not_void_T & + operator()(const int *pos) const { + static_assert(!T_is_void, + "Cannot use operator() on Image types"); + return *((const not_void_T *)(address_of(pos))); } template __attribute__((always_inline)) - typename std::enable_if::value, T &>::type + typename std::enable_if::value, not_void_T &>::type operator()(int first, Args... rest) { - return *(address_of(0, first, rest...)); + static_assert(!T_is_void, + "Cannot use operator() on Image types"); + return *((not_void_T *)(address_of(0, first, rest...))); } __attribute__((always_inline)) - T &operator()() { - return *(address_of(0)); + not_void_T & + operator()() { + static_assert(!T_is_void, + "Cannot use operator() on Image types"); + return *((not_void_T *)(data())); } __attribute__((always_inline)) - T &operator()(const int *pos) { - return *((T *)address_of(pos)); + not_void_T & + operator()(const int *pos) { + static_assert(!T_is_void, + "Cannot use operator() on Image types"); + return *((not_void_T *)(address_of(pos))); } // @} /** Other calls to operator()(Args...) get redirected to a call to - * ImageAccessor::operator(const Image &, - * Args...). This makes it possible for later code to add new - * Image access methods for types not convertible to int - * (e.g. Exprs). To add a custom accessor, define a template - * specialization of ImageAccessor with an operator() method that - * takes the expected arguments. See + * image_accessor(const Image &, Args...). This makes it + * possible for later code to add new Image access methods for + * types not convertible to int (e.g. Exprs). To add a custom + * accessor, define an overload of image_accessor that takes the + * expected arguments. See * test/correctness/custom_image_accessor.cpp for an example. */ template auto operator()(Args... args) const -> - decltype(ImageAccessor()(*this, args...)) { - return ImageAccessor()(*this, args...); + decltype(image_accessor(*this, args...)) { + return image_accessor(*this, args...); } template auto operator()(Args... args) -> - decltype(ImageAccessor()(*this, args...)) { - return ImageAccessor()(*this, args...); + decltype(image_accessor(*this, args...)) { + return image_accessor(*this, args...); } private: @@ -989,9 +1188,9 @@ class Image : public Buffer { // lambda of the correct dimensionality. template typename std::enable_if<(sizeof...(Args) < D)>::type - fill_helper(T val, Args... args) { - if (sizeof...(Args) == Buffer::dimensions()) { - Buffer::for_each_element([&](Args... args) {(*this)(args...) = val;}); + fill_helper(not_void_T val, Args... args) { + if (sizeof...(Args) == dimensions()) { + for_each_element([&](Args... args) {(*this)(args...) = val;}); } else { fill_helper(val, 0, args...); } @@ -999,74 +1198,18 @@ class Image : public Buffer { template typename std::enable_if<(sizeof...(Args) == D)>::type - fill_helper(T val, Args...) { - Buffer::for_each_element([&](Args... args) {(*this)(args...) = val;}); + fill_helper(not_void_T val, Args...) { + for_each_element([&](Args... args) {(*this)(args...) = val;}); } public: /** Set every value in the buffer to the given value */ - void fill(T val) { + template> + void fill(not_void_T val) { fill_helper(val); } - /** Make a new image which is a deep copy of this image. Use crop - * or slice followed by copy to make a copy of only a portion of - * the image. The new image uses the same memory layout as the - * original, with holes compacted away. */ - Image copy(void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr) const { - return Image(Buffer::copy(allocate_fn, deallocate_fn)); - } - - /** Make an image that refers to a sub-range of this image along - * the given dimension. Does not assert the crop region is within - * the existing bounds. */ - Image cropped(int d, int min, int extent) const { - return Image(Buffer::cropped(d, min, extent)); - } - - /** Make an image that refers to a sub-rectangle of this image along - * the first N dimensions. Does not assert the crop region is within - * the existing bounds. The cropped image drops any device handle. */ - Image cropped(const std::vector> &rect) const { - return Image(Buffer::cropped(rect)); - } - - /** Make an image which refers to the same data with using - * translated coordinates in the given dimension. Positive values - * move the image data to the right or down relative to the - * coordinate system. */ - Image translated(int d, int dx) const { - return Image(Buffer::translated(d, dx)); - } - - /** Make an image which refers to the same data with using - * translated coordinates along the first N dimensions. Positive - * values move the image data to the right or down relative to the - * coordinate system. */ - Image translated(const std::vector &delta) const { - return Image(Buffer::translated(delta)); - } - - /** Make an image which refers to the same data using a different - * ordering of the dimensions. */ - Image transposed(int d1, int d2) const { - return Image(Buffer::transposed(d1, d2)); - } - - /** Make a lower-dimensional image that refers to one slice of this - * image. */ - Image sliced(int d, int pos) const { - return Image(Buffer::sliced(d, pos)); - } - - /** Make a higher-dimensional image in which this image is one - * slice. The opposite of sliced. */ - Image embedded(int d, int pos) const { - return Image(Buffer::embedded(d, pos)); - } - }; /** Some helpers for for_each_element. */ @@ -1263,7 +1406,8 @@ void for_each_element(const buffer_t &buf, Fn &&f) { for_each_element_helpers::for_each_element(0, buf, std::forward(f)); } -} // namespace Tools + + } // namespace Halide #endif // HALIDE_RUNTIME_IMAGE_H diff --git a/test/correctness/constant_type.cpp b/test/correctness/constant_type.cpp index fa3e868a3de4..411812189507 100644 --- a/test/correctness/constant_type.cpp +++ b/test/correctness/constant_type.cpp @@ -16,13 +16,13 @@ bool test_type() { return false; } - Expr add_one = im + 1; + Expr add_one = im(_) + 1; if (add_one.type() != t) { std::cout << "Add 1 changed type from " << t << " to " << add_one.type() << "\n"; return false; } - Expr one_add = 1 + im; + Expr one_add = 1 + im(_); if (one_add.type() != t) { std::cout << "Pre-add 1 changed type from " << t << " to " << one_add.type() << "\n"; return false; diff --git a/test/correctness/custom_image_accessor.cpp b/test/correctness/custom_image_accessor.cpp index 9cd911dd7624..a7dc745d33f9 100644 --- a/test/correctness/custom_image_accessor.cpp +++ b/test/correctness/custom_image_accessor.cpp @@ -1,7 +1,7 @@ -#include "halide_image.h" +#include "Halide.h" #include -using namespace Halide::Tools; +using namespace Halide; // First a very simple example. We'll make it possible to access an // image with a custom 3D coordinate type. @@ -9,8 +9,10 @@ struct Coord { int x, y, z; }; + namespace Halide { -namespace Tools { + +/* template struct ImageAccessor { // We want to use this accessor to assign to locations too, so @@ -22,8 +24,18 @@ struct ImageAccessor { return im(c.x, c.y, c.z); } }; +*/ +template +T image_accessor(const Image &im, Coord c) { + return im(c.x, c.y, c.z); } + +template +T &image_accessor(Image &im, Coord c) { + return im(c.x, c.y, c.z); +} + } // Next we'll use a more complex variadic example. We'll extend @@ -71,11 +83,11 @@ struct AllFloatConvertible { }; namespace Halide { -namespace Tools { // Then we define a partial specialization of // Halide::Tools::ImageAccessor that catches any access where the all // args are float-convertible. +/* template struct ImageAccessor::value, float>::type, D, Args...> { float operator()(const Image &im, Args... args) { @@ -83,8 +95,15 @@ struct ImageAccessor::value return MultiLinearSampler()(im, coords); } }; +*/ +template +typename std::enable_if::value, float>::type +image_accessor(const Image &im, Args... args) { + float coords[] = {float(args)...}; + return MultiLinearSampler()(im, coords); } + } int main(int argc, char **argv) { diff --git a/test/correctness/div_mod.cpp b/test/correctness/div_mod.cpp index 3da609d6177b..4194ef30f744 100644 --- a/test/correctness/div_mod.cpp +++ b/test/correctness/div_mod.cpp @@ -312,7 +312,7 @@ bool div_mod(int vector_width, ScheduleVariant scheduling, const Target &target) f.compute_root().hexagon(); break; }; - + Realization R = f.realize(WIDTH, HEIGHT, target); Image q(R[0]); Image r(R[1]); @@ -395,7 +395,7 @@ bool f_mod() { // Compute modulus result and check it. Func f; - f(_) = a % b; // Using Halide mod operation. + f(_) = a(_) % b(_); // Using Halide mod operation. f.realize(out); // Explicit checks of the simplifier for consistency with runtime computation diff --git a/test/correctness/for_each_element.cpp b/test/correctness/for_each_element.cpp index a034a167a95f..e622b2e711d1 100644 --- a/test/correctness/for_each_element.cpp +++ b/test/correctness/for_each_element.cpp @@ -1,6 +1,6 @@ -#include "halide_image.h" +#include "Halide.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { // Try several different ways of accessing a the pixels of an image, diff --git a/test/correctness/gpu_non_contiguous_copy.cpp b/test/correctness/gpu_non_contiguous_copy.cpp index e81d85825320..50d96a8e1950 100644 --- a/test/correctness/gpu_non_contiguous_copy.cpp +++ b/test/correctness/gpu_non_contiguous_copy.cpp @@ -26,7 +26,7 @@ int main(int argc, char **argv) { cropped.stride[1] *= 2; cropped.stride[2] *= 2; cropped.stride[3] *= 2; - Buffer out(Int(32), &cropped); + Image out(cropped); // Make a bitmask representing the region inside the crop. Image in_subregion(80, 60, 10, 10); diff --git a/test/correctness/input_larger_than_two_gigs.cpp b/test/correctness/input_larger_than_two_gigs.cpp index a90c01ec6fcd..737f6443599a 100644 --- a/test/correctness/input_larger_than_two_gigs.cpp +++ b/test/correctness/input_larger_than_two_gigs.cpp @@ -25,7 +25,7 @@ int main(int argc, char **argv) { buf.stride[2] = 0; buf.elem_size = 1; - Buffer param_buf(UInt(8), &buf); + Image param_buf(buf); ImageParam input(UInt(8), 3); input.set(param_buf); diff --git a/test/correctness/interleave.cpp b/test/correctness/interleave.cpp index 2c8b87082333..435ac294ec12 100644 --- a/test/correctness/interleave.cpp +++ b/test/correctness/interleave.cpp @@ -145,23 +145,19 @@ int main(int argc, char **argv) { .set_stride(1, 1) .set_extent(1, 3); - Buffer buff3; - buff3 = Buffer(Float(32), 16, 3, 0, 0, (uint8_t *)0); - buff3.raw_buffer()->stride[0] = 3; - buff3.raw_buffer()->stride[1] = 1; + Image buff3(3, 16); + buff3.transpose(0, 1); - Realization r3({buff3}); - interleaved.realize(r3); + interleaved.realize(buff3); check_interleave_count(interleaved, 1); - Image result3 = r3[0]; for (int x = 0; x < 16; x++) { for (int y = 0; y < 3; y++) { float correct = 3*x + y; - float delta = result3(x,y) - correct; + float delta = buff3(x, y) - correct; if (delta > 0.01 || delta < -0.01) { - printf("result(%d) = %f instead of %f\n", x, result3(x,y), correct); + printf("result(%d) = %f instead of %f\n", x, buff3(x,y), correct); return -1; } } @@ -197,21 +193,17 @@ int main(int argc, char **argv) { check_interleave_count(output4, 1); - Buffer buff4; - buff4 = Buffer(Float(32), 16, 4, 0, 0, (uint8_t *)0); - buff4.raw_buffer()->stride[0] = 4; - buff4.raw_buffer()->stride[1] = 1; + Image buff4(4, 16); + buff4.transpose(0, 1); - Realization r4({buff4}); - output4.realize(r4); + output4.realize(buff4); - Image result4 = r4[0]; for (int x = 0; x < 16; x++) { for (int y = 0; y < 4; y++) { float correct = sin((y+1)*x); - float delta = result4(x,y) - correct; + float delta = buff4(x, y) - correct; if (delta > 0.01 || delta < -0.01) { - printf("result(%d) = %f instead of %f\n", x, result4(x,y), correct); + printf("result(%d) = %f instead of %f\n", x, buff4(x,y), correct); return -1; } } @@ -240,21 +232,17 @@ int main(int argc, char **argv) { check_interleave_count(output5, 1); - Buffer buff5; - buff5 = Buffer(Float(32), 16, 5, 0, 0, (uint8_t *)0); - buff5.raw_buffer()->stride[0] = 5; - buff5.raw_buffer()->stride[1] = 1; + Image buff5(5, 16); + buff5.transpose(0, 1); - Realization r5({buff5}); - output5.realize(r5); + output5.realize(buff5); - Image result5 = r5[0]; for (int x = 0; x < 16; x++) { for (int y = 0; y < 5; y++) { float correct = sin((y+1)*x); - float delta = result5(x,y) - correct; + float delta = buff5(x, y) - correct; if (delta > 0.01 || delta < -0.01) { - printf("result(%d) = %f instead of %f\n", x, result5(x,y), correct); + printf("result(%d) = %f instead of %f\n", x, buff5(x,y), correct); return -1; } } @@ -382,8 +370,8 @@ int main(int argc, char **argv) { .set_stride(0,1).set_stride(1,8) .set_extent(0,8).set_extent(1,8); - Image result6(8,8); - Image result7(8,8); + Image result6(8, 8); + Image result7(8, 8); trans1.realize(result6); trans2.realize(result7); diff --git a/test/correctness/interleave_rgb.cpp b/test/correctness/interleave_rgb.cpp index f3cca9176c2b..9b66a71f2ddd 100644 --- a/test/correctness/interleave_rgb.cpp +++ b/test/correctness/interleave_rgb.cpp @@ -25,20 +25,14 @@ bool test_interleave() { } else { interleaved.vectorize(x, target.natural_vector_size()).unroll(c); } - Buffer buff(type_of(), 256, 128, 3); - buff.raw_buffer()->stride[0] = 3; - buff.raw_buffer()->stride[1] = 3 * buff.extent(0); - buff.raw_buffer()->stride[2] = 1; - - Realization r({buff}); - interleaved.realize(r, target); - Image out = r[0]; - for (int y = 0; y < out.height(); y++) { - for (int x = 0; x < out.width(); x++) { + Image buff = Image::make_interleaved(256, 128, 3); + interleaved.realize(buff, target); + for (int y = 0; y < buff.height(); y++) { + for (int x = 0; x < buff.width(); x++) { for (int c = 0; c < 3; c++) { T correct = x * 3 + y * 5 + c; - if (out(x, y, c) != correct) { - printf("out(%d, %d, %d) = %d instead of %d\n", x, y, c, out(x, y, c), correct); + if (buff(x, y, c) != correct) { + printf("out(%d, %d, %d) = %d instead of %d\n", x, y, c, buff(x, y, c), correct); return false; } } diff --git a/test/correctness/math.cpp b/test/correctness/math.cpp index 46dd39f950d4..a617bd145a2f 100644 --- a/test/correctness/math.cpp +++ b/test/correctness/math.cpp @@ -63,7 +63,7 @@ uint32_t absd(uint32_t a, uint32_t b) { return a < b ? b - a : a - b; } Var x("x"); \ ImageParam input(type_of(), 1); \ test_##name(x) = name(input(x)); \ - Buffer in_buffer(type_of(), in_buf); \ + Image in_buffer(*in_buf); \ input.set(in_buffer); \ if (target.has_gpu_feature()) { \ test_##name.gpu_tile(x, 8); \ @@ -89,7 +89,7 @@ uint32_t absd(uint32_t a, uint32_t b) { return a < b ? b - a : a - b; } Var x("x"); \ ImageParam input(type_of(), 2); \ test_##name(x) = name(input(0, x), input(1, x)); \ - Buffer in_buffer(type_of(), in_buf); \ + Image in_buffer(*in_buf); \ input.set(in_buffer); \ if (target.has_gpu_feature()) { \ test_##name.gpu_tile(x, 8); \ diff --git a/test/correctness/min_extent.cpp b/test/correctness/min_extent.cpp index 182aebea4d38..5548d90631a7 100644 --- a/test/correctness/min_extent.cpp +++ b/test/correctness/min_extent.cpp @@ -21,12 +21,8 @@ int main(int argc, char **argv) { // but we initialize them anyway. Image input(5); Image out(10); - for (int i = 0; i < input.width(); i++) { - input(i) = 0; - } - for (int i = 0; i < out.width(); i++) { - out(i) = 0; - } + input.fill(0); + out.fill(0); // Change coordinate origin of input and output buffer so that they are // aligned as follows: @@ -34,14 +30,14 @@ int main(int argc, char **argv) { // out |-----------------| const int INOFF = 4; const int OUTOFF = 1; - in.set(input); input.set_min(INOFF); out.set_min(OUTOFF); + in.set(input); f.realize(out); // Check correctness of result int expected[] = { -10, -20, -30, 4, 5, 6, 7, 8, 90, 100 }; - for (int i=0; i out_buf(nullptr, 7, 8); + out_buf.set_min(2, 2); out.infer_input_bounds(out_buf); diff --git a/test/correctness/output_larger_than_two_gigs.cpp b/test/correctness/output_larger_than_two_gigs.cpp index da85bc5ae763..fe97381c698a 100644 --- a/test/correctness/output_larger_than_two_gigs.cpp +++ b/test/correctness/output_larger_than_two_gigs.cpp @@ -31,7 +31,7 @@ int main(int argc, char **argv) { identity_uint8.set_error_handler(&halide_error); - Buffer output_buf(UInt(8), &buf); + Image output_buf(buf); Target t = get_jit_target_from_environment(); if (t.bits != 32) { diff --git a/test/correctness/process_some_tiles.cpp b/test/correctness/process_some_tiles.cpp index e3d3a1b24067..b13cea3f8715 100644 --- a/test/correctness/process_some_tiles.cpp +++ b/test/correctness/process_some_tiles.cpp @@ -68,7 +68,8 @@ int main(int argc, char **argv) { output.compile_jit(); Image bitmap_buf(10, 10); - bitmap_buf(5, 5) = 1; + bitmap_buf.fill(false); + bitmap_buf(5, 5) = true; bitmap.set(bitmap_buf); Image image_buf = lambda(x, y, (sin(x+y)+1)/2).realize(10 * tile_size, 10 * tile_size); diff --git a/test/correctness/realize_over_shifted_domain.cpp b/test/correctness/realize_over_shifted_domain.cpp index dc10035e4501..9174bd479ba5 100644 --- a/test/correctness/realize_over_shifted_domain.cpp +++ b/test/correctness/realize_over_shifted_domain.cpp @@ -4,7 +4,7 @@ using namespace Halide; int main(int argc, char **argv) { - Image input(100, 50, "input"); + Image input(100, 50); // This image represents the range [100, 199]*[50, 99] input.set_min(100, 50); diff --git a/test/correctness/rfactor.cpp b/test/correctness/rfactor.cpp index e588f669aeab..9e77d38a94f5 100644 --- a/test/correctness/rfactor.cpp +++ b/test/correctness/rfactor.cpp @@ -455,13 +455,15 @@ int histogram_rfactor_test(bool compile_module) { reference_hist[uint8_t(in(x, y))] += 1; } } + // Wrap the image in a buffer, so that we know its name. + Buffer in_buf(in); Func hist("hist"), g("g"); Var x("x"); RDom r(in); hist(x) = 0; - hist(clamp(cast(in(r.x, r.y)), 0, 255)) += 1; + hist(clamp(cast(in_buf(r.x, r.y)), 0, 255)) += 1; hist.compute_root(); Var u("u"); @@ -482,7 +484,7 @@ int histogram_rfactor_test(bool compile_module) { {hist.name(), {}}, {hist.update(0).name(), {intm.name(), hist.name()}}, {intm.name(), {}}, - {intm.update(0).name(), {in.name(), intm.name()}}, + {intm.update(0).name(), {in_buf.name(), intm.name()}}, }; if (check_call_graphs(checker.calls, expected) != 0) { diff --git a/test/correctness/shifted_image.cpp b/test/correctness/shifted_image.cpp index 66fbdd1eca7a..db00650e77c9 100644 --- a/test/correctness/shifted_image.cpp +++ b/test/correctness/shifted_image.cpp @@ -20,7 +20,7 @@ int main(int argc, char **argv) { buf.stride[3] = 1000; buf.elem_size = 4; - Image im(&buf); + Image im(buf); ((int *)buf.host)[0] = 17; buf.host[0] = 17; diff --git a/test/correctness/simd_op_check.cpp b/test/correctness/simd_op_check.cpp index 1ad77e71786d..7b018f9dede7 100644 --- a/test/correctness/simd_op_check.cpp +++ b/test/correctness/simd_op_check.cpp @@ -166,8 +166,8 @@ void check(string op, int vector_width, Expr e) { bool can_run_the_code = can_run_code(); if (can_run_the_code) { - Realization r = error.realize(0, target.without_feature(Target::NoRuntime)); - double e = Image(r[0])(0); + Realization r = error.realize(target.without_feature(Target::NoRuntime)); + double e = Image(r[0])(); // Use a very loose tolerance for floating point tests. The // kinds of bugs we're looking for are codegen bugs that // return the wrong value entirely, not floating point diff --git a/test/error/bad_host_alignment.cpp b/test/error/bad_host_alignment.cpp index 332d61c7fce5..29e7740fedb5 100644 --- a/test/error/bad_host_alignment.cpp +++ b/test/error/bad_host_alignment.cpp @@ -8,21 +8,11 @@ IRPrinter irp(std::cerr); int main(int argc, char **argv) { Func f; Var x, y; - int arr[11][10]; - uint8_t *ptr = reinterpret_cast(arr); - ptr += 1; - buffer_t buf; - buf.host = ptr; - buf.extent[0] = 10; - buf.extent[1] = 10; - buf.stride[0] = 1; - buf.stride[1] = 10; - buf.elem_size = 1; - buf.min[0] = 0; - buf.min[1] = 0; - Buffer param_buf(UInt(8), &buf); ImageParam in(UInt(8), 2); + Image param_buf(11, 10); + param_buf.crop(0, 1, 10); + in.set_host_alignment(512); f(x, y) = in(x, y); f.compute_root(); diff --git a/test/error/buffer_larger_than_two_gigs.cpp b/test/error/buffer_larger_than_two_gigs.cpp index c84386bb9291..d6d97fa415a5 100644 --- a/test/error/buffer_larger_than_two_gigs.cpp +++ b/test/error/buffer_larger_than_two_gigs.cpp @@ -4,9 +4,9 @@ using namespace Halide; int main(int argc, char **argv) { if (sizeof(void *) == 8) { - Buffer result(UInt(8), 1 << 24, 1 << 24, 1 << 24); + Image result(1 << 24, 1 << 24, 1 << 24); } else { - Buffer result(UInt(8), 1 << 12, 1 << 12, 1 << 8); + Image result(1 << 12, 1 << 12, 1 << 8); } printf("Success!\n"); } diff --git a/test/generator/acquire_release_aottest.cpp b/test/generator/acquire_release_aottest.cpp index b191a1b1a91d..e3f36efa8f39 100644 --- a/test/generator/acquire_release_aottest.cpp +++ b/test/generator/acquire_release_aottest.cpp @@ -10,14 +10,14 @@ int main(int argc, char **argv) { #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include #include "acquire_release.h" -using namespace Halide::Tools; +using namespace Halide; const int W = 256, H = 256; diff --git a/test/generator/argvcall_aottest.cpp b/test/generator/argvcall_aottest.cpp index 4ed40f22311d..03892e89f469 100644 --- a/test/generator/argvcall_aottest.cpp +++ b/test/generator/argvcall_aottest.cpp @@ -1,12 +1,12 @@ #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include #include "argvcall.h" -using namespace Halide::Tools; +using namespace Halide; const int kSize = 32; diff --git a/test/generator/cleanup_on_error_aottest.cpp b/test/generator/cleanup_on_error_aottest.cpp index ddba4fa2c54d..88a29addabf4 100644 --- a/test/generator/cleanup_on_error_aottest.cpp +++ b/test/generator/cleanup_on_error_aottest.cpp @@ -1,5 +1,5 @@ #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" // Grab the internal device_interface functions #define WEAK @@ -10,13 +10,13 @@ #include "cleanup_on_error.h" -using namespace Halide::Tools; +using namespace Halide; const int size = 64; int successful_mallocs = 0, failed_mallocs = 0, frees = 0, errors = 0, device_mallocs = 0, device_frees = 0; - void *my_halide_malloc(void *user_context, size_t x) { +void *my_halide_malloc(void *user_context, size_t x) { // Only the first malloc succeeds if (successful_mallocs) { failed_mallocs++; diff --git a/test/generator/cxx_mangling_aottest.cpp b/test/generator/cxx_mangling_aottest.cpp index 8f61819668c6..12a1320dcd46 100644 --- a/test/generator/cxx_mangling_aottest.cpp +++ b/test/generator/cxx_mangling_aottest.cpp @@ -1,13 +1,13 @@ #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include #include "cxx_mangling.h" -using namespace Halide::Tools; +using namespace Halide; int32_t extract_value_global(int32_t *arg) { return *arg; diff --git a/test/generator/cxx_mangling_define_extern_aottest.cpp b/test/generator/cxx_mangling_define_extern_aottest.cpp index 823b81e0dcb4..265fa93f8cdb 100644 --- a/test/generator/cxx_mangling_define_extern_aottest.cpp +++ b/test/generator/cxx_mangling_define_extern_aottest.cpp @@ -1,14 +1,14 @@ #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include #include "cxx_mangling_define_extern.h" -using namespace Halide::Tools; +using namespace Halide; int32_t extract_value_global(int32_t *arg) { return *arg; diff --git a/test/generator/embed_image_aottest.cpp b/test/generator/embed_image_aottest.cpp index 31d29185396d..6274ea764a2d 100644 --- a/test/generator/embed_image_aottest.cpp +++ b/test/generator/embed_image_aottest.cpp @@ -2,9 +2,9 @@ #include #include "embed_image.h" -#include "halide_image.h" +#include "HalideImage.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { Image input(10, 10, 3); diff --git a/test/generator/example_aottest.cpp b/test/generator/example_aottest.cpp index 3412279fa164..e16ac188852e 100644 --- a/test/generator/example_aottest.cpp +++ b/test/generator/example_aottest.cpp @@ -1,12 +1,12 @@ #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include #include "example.h" -using namespace Halide::Tools; +using namespace Halide; const int kSize = 32; diff --git a/test/generator/extended_buffer_t_aottest.cpp b/test/generator/extended_buffer_t_aottest.cpp index b9f5df47c63c..15df4172c6a9 100644 --- a/test/generator/extended_buffer_t_aottest.cpp +++ b/test/generator/extended_buffer_t_aottest.cpp @@ -3,9 +3,9 @@ #include "extended_buffer_t_common.h" #include "extended_buffer_t.h" -#include "halide_image.h" +#include "HalideImage.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { Image input(10, 10); diff --git a/test/generator/gpu_object_lifetime_aottest.cpp b/test/generator/gpu_object_lifetime_aottest.cpp index 52f92d34b0e6..0a8a4809fad0 100644 --- a/test/generator/gpu_object_lifetime_aottest.cpp +++ b/test/generator/gpu_object_lifetime_aottest.cpp @@ -1,7 +1,7 @@ #include #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #if COMPILING_FOR_CUDA @@ -14,7 +14,7 @@ #include "../common/gpu_object_lifetime.h" -using namespace Halide::Tools; +using namespace Halide; void my_halide_print(void *user_context, const char *str) { printf("%s", str); diff --git a/test/generator/gpu_only_aottest.cpp b/test/generator/gpu_only_aottest.cpp index d65dc45a1a55..7ae8c8ec447e 100644 --- a/test/generator/gpu_only_aottest.cpp +++ b/test/generator/gpu_only_aottest.cpp @@ -1,7 +1,7 @@ #include #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #if defined(TEST_OPENCL) #include "HalideRuntimeOpenCL.h" @@ -10,7 +10,7 @@ #endif #include "gpu_only.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { #if defined(TEST_OPENCL) || defined(TEST_CUDA) diff --git a/test/generator/image_from_array_aottest.cpp b/test/generator/image_from_array_aottest.cpp index d8cae5a9d0b9..b21d8697630b 100644 --- a/test/generator/image_from_array_aottest.cpp +++ b/test/generator/image_from_array_aottest.cpp @@ -1,4 +1,4 @@ -#include "halide_image.h" +#include "HalideImage.h" #include #include @@ -7,7 +7,7 @@ #include using namespace std; -using namespace Halide::Tools; +using namespace Halide; //----------------------------------------------------------------------------- // Returns the dimension sizes of a statically sized array from inner to outer. diff --git a/test/generator/mandelbrot_aottest.cpp b/test/generator/mandelbrot_aottest.cpp index 77dbad24941f..b2ede4d1f0c0 100644 --- a/test/generator/mandelbrot_aottest.cpp +++ b/test/generator/mandelbrot_aottest.cpp @@ -3,9 +3,9 @@ #include #include "mandelbrot.h" -#include "halide_image.h" +#include "HalideImage.h" -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { Image output(100, 30); diff --git a/test/generator/memory_profiler_mandelbrot_aottest.cpp b/test/generator/memory_profiler_mandelbrot_aottest.cpp index 3299267d7e26..c6f52a2c8160 100644 --- a/test/generator/memory_profiler_mandelbrot_aottest.cpp +++ b/test/generator/memory_profiler_mandelbrot_aottest.cpp @@ -6,10 +6,10 @@ #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include "memory_profiler_mandelbrot.h" -using namespace Halide::Tools; +using namespace Halide; using std::map; using std::string; diff --git a/test/generator/metadata_tester_aottest.cpp b/test/generator/metadata_tester_aottest.cpp index 8fbb43a47989..aa770ec73e91 100644 --- a/test/generator/metadata_tester_aottest.cpp +++ b/test/generator/metadata_tester_aottest.cpp @@ -1,5 +1,5 @@ #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include @@ -10,7 +10,7 @@ #include "metadata_tester.h" #include "metadata_tester_ucon.h" -using namespace Halide::Tools; +using namespace Halide; const int kSize = 32; diff --git a/test/generator/multitarget_aottest.cpp b/test/generator/multitarget_aottest.cpp index 4303b6bdb422..9bb01b1b0250 100644 --- a/test/generator/multitarget_aottest.cpp +++ b/test/generator/multitarget_aottest.cpp @@ -3,9 +3,9 @@ #include #include "HalideRuntime.h" #include "multitarget.h" -#include "halide_image.h" +#include "HalideImage.h" -using namespace Halide::Tools; +using namespace Halide; void my_error_handler(void *user_context, const char *message) { printf("Saw Error: (%s)\n", message); diff --git a/test/generator/pyramid_aottest.cpp b/test/generator/pyramid_aottest.cpp index e10a5cae80e8..0afef4fcd1bc 100644 --- a/test/generator/pyramid_aottest.cpp +++ b/test/generator/pyramid_aottest.cpp @@ -2,11 +2,11 @@ #include #include "pyramid.h" -#include "halide_image.h" +#include "HalideImage.h" #include using std::vector; -using namespace Halide::Tools; +using namespace Halide; int main(int argc, char **argv) { Image input(1024, 1024); diff --git a/test/generator/tiled_blur_aottest.cpp b/test/generator/tiled_blur_aottest.cpp index a7625460e6fd..7c3b1beb9130 100644 --- a/test/generator/tiled_blur_aottest.cpp +++ b/test/generator/tiled_blur_aottest.cpp @@ -1,12 +1,12 @@ #include #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include "tiled_blur.h" -using namespace Halide::Tools; +using namespace Halide; const int W = 80, H = 80; diff --git a/test/generator/tiled_blur_interleaved_aottest.cpp b/test/generator/tiled_blur_interleaved_aottest.cpp index 284cb64f4a53..648d27035617 100644 --- a/test/generator/tiled_blur_interleaved_aottest.cpp +++ b/test/generator/tiled_blur_interleaved_aottest.cpp @@ -1,12 +1,12 @@ #include #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include "tiled_blur_interleaved.h" -using namespace Halide::Tools; +using namespace Halide; const int W = 80, H = 80; diff --git a/test/generator/user_context_aottest.cpp b/test/generator/user_context_aottest.cpp index 23e11e3510af..5ec05a6aa7ad 100644 --- a/test/generator/user_context_aottest.cpp +++ b/test/generator/user_context_aottest.cpp @@ -3,10 +3,10 @@ #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include "user_context.h" -using namespace Halide::Tools; +using namespace Halide; static void *context_pointer = (void *)0xf00dd00d; diff --git a/test/generator/user_context_insanity_aottest.cpp b/test/generator/user_context_insanity_aottest.cpp index a28c198dab9d..651bf30bd81a 100644 --- a/test/generator/user_context_insanity_aottest.cpp +++ b/test/generator/user_context_insanity_aottest.cpp @@ -3,10 +3,10 @@ #include #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include "user_context_insanity.h" -using namespace Halide::Tools; +using namespace Halide; const int num_launcher_tasks = 1000; diff --git a/test/generator/variable_num_threads_aottest.cpp b/test/generator/variable_num_threads_aottest.cpp index 8ab5b21e56fb..b39cbd272a2f 100644 --- a/test/generator/variable_num_threads_aottest.cpp +++ b/test/generator/variable_num_threads_aottest.cpp @@ -1,5 +1,5 @@ #include "HalideRuntime.h" -#include "halide_image.h" +#include "HalideImage.h" #include #include @@ -10,7 +10,7 @@ bool stop = false; int max_threads = 1; -using namespace Halide::Tools; +using namespace Halide; void mess_with_num_threads(void *) { while (!stop) { diff --git a/test/opengl/lut.cpp b/test/opengl/lut.cpp index e4534ef6d65d..590cd8454791 100644 --- a/test/opengl/lut.cpp +++ b/test/opengl/lut.cpp @@ -13,9 +13,9 @@ int test_lut1d() { Var y("y"); Var c("c"); - Image input(8, 8, 3, "input"); - for (int y=0; y input(8, 8, 3); + for (int y = 0; y < input.height(); y++) { + for (int x = 0; x < input.width(); x++) { float v = (1.0f/16.0f) + (float)x/8.0f; input(x, y, 0) = (uint8_t)(v * 255.0f); input(x, y, 1) = (uint8_t)((1.0f - v)*255.0f); @@ -24,7 +24,7 @@ int test_lut1d() { } // 1D Look Up Table case - Image lut1d(8, 1, 3, "lut1d"); + Image lut1d(8, 1, 3); for (int c = 0; c != 3; ++c) { for (int i = 0; i != 8; ++i) { lut1d(i, 0, c) = (float)(1 + i); @@ -36,7 +36,7 @@ int test_lut1d() { f0(x, y, c) = lut1d(clamp(e, 0, 7), 0, c); - Image out0(8, 8, 3,"out"); + Image out0(8, 8, 3); f0.bound(c, 0, 3); f0.glsl(x, y, c); diff --git a/test/opengl/produce.cpp b/test/opengl/produce.cpp index 2384496562e5..2de9155eaf40 100644 --- a/test/opengl/produce.cpp +++ b/test/opengl/produce.cpp @@ -14,7 +14,7 @@ int test_lut1d() { Var y("y"); Var c("c"); - Image input(8, 8, 3, "input"); + Image input(8, 8, 3); for (int y = 0; y < input.height(); y++) { for (int x = 0; x < input.width(); x++) { float v = (1.0f / 16.0f) + (float)x / 8.0f; @@ -37,7 +37,7 @@ int test_lut1d() { f0.bound(c, 0, 3); f0.glsl(x, y, c); - Image out0(8, 8, 3, "out"); + Image out0(8, 8, 3); f0.realize(out0); out0.copy_to_host(); diff --git a/test/opengl/rewrap_texture.cpp b/test/opengl/rewrap_texture.cpp index 870eccc4c468..d37b43acea24 100644 --- a/test/opengl/rewrap_texture.cpp +++ b/test/opengl/rewrap_texture.cpp @@ -37,9 +37,9 @@ int main() { const int height = 10; Image input(width, height, 3); - Buffer out1(UInt(8), width, height, 3); - Buffer out2(UInt(8), width, height, 3); - Buffer out3(UInt(8), width, height, 3); + Image out1(width, height, 3); + Image out2(width, height, 3); + Image out3(width, height, 3); Var x, y, c; Func g; diff --git a/test/opengl/save_state.cpp b/test/opengl/save_state.cpp index 39bf45626d09..8493297fc940 100644 --- a/test/opengl/save_state.cpp +++ b/test/opengl/save_state.cpp @@ -277,7 +277,7 @@ int main() { KnownState known_state; Image input(255, 10, 3); - Buffer out(UInt(8), 255, 10, 3); + Image out(UInt(8), 255, 10, 3); Var x, y, c; Func g; diff --git a/test/performance/boundary_conditions.cpp b/test/performance/boundary_conditions.cpp index 4a152f72181b..e0e394752447 100644 --- a/test/performance/boundary_conditions.cpp +++ b/test/performance/boundary_conditions.cpp @@ -28,11 +28,10 @@ struct Test { Image out = g.realize(W, H); - Buffer buf(out); // best of 10 x 5 runs. time = benchmark(10, 5, [&]() { - g.realize(buf); - buf.device_sync(); + g.realize(out); + out.device_sync(); }); printf("%-20s: %f us\n", name, time * 1e6); @@ -55,10 +54,9 @@ struct Test { Image out = g.realize(W, H); // best of 3 x 3 runs. - Buffer buf(out); time = benchmark(3, 3, [&]() { - g.realize(buf); - buf.device_sync(); + g.realize(out); + out.device_sync(); }); printf("%-20s: %f us\n", name, time * 1e6); diff --git a/test/performance/packed_planar_fusion.cpp b/test/performance/packed_planar_fusion.cpp index 1dc9fb0e128c..c16f45f38f16 100644 --- a/test/performance/packed_planar_fusion.cpp +++ b/test/performance/packed_planar_fusion.cpp @@ -39,29 +39,11 @@ double test_copy(Image src, Image dst) { } Image make_packed(uint8_t *host, int W, int H) { - buffer_t buf = {0}; - buf.host = host; - buf.extent[0] = W; - buf.stride[0] = 3; - buf.extent[1] = H; - buf.stride[1] = buf.stride[0] * buf.extent[0]; - buf.extent[2] = 3; - buf.stride[2] = 1; - buf.elem_size = 1; - return Image(&buf); + return Image::make_interleaved(host, W, H, 3); } Image make_planar(uint8_t *host, int W, int H) { - buffer_t buf = {0}; - buf.host = host; - buf.extent[0] = W; - buf.stride[0] = 1; - buf.extent[1] = H; - buf.stride[1] = buf.stride[0] * buf.extent[0]; - buf.extent[2] = 3; - buf.stride[2] = buf.stride[1] * buf.extent[1]; - buf.elem_size = 1; - return Image(&buf); + return Image(host, W, H, 3); } int main(int argc, char **argv) { diff --git a/test/performance/rgb_interleaved.cpp b/test/performance/rgb_interleaved.cpp index 2236b946c6c8..6cd614a5fc62 100644 --- a/test/performance/rgb_interleaved.cpp +++ b/test/performance/rgb_interleaved.cpp @@ -12,63 +12,32 @@ void test_deinterleave() { dst(x, y, c) = src(x, y, c); - src.set_stride(0, 3); - src.set_stride(2, 1); - src.set_extent(2, 3); + src.dim(0).set_stride(3) + .dim(2).set_stride(1).set_bounds(0, 3); // This is the default format for Halide, but made explicit for illustration. - dst.output_buffer().set_stride(0, 1); - dst.output_buffer().set_extent(2, 3); + dst.output_buffer() + .dim(0).set_stride(1) + .dim(2).set_extent(3); dst.reorder(c, x, y).unroll(c); dst.vectorize(x, 16); - // Run test many times to avoid timing jitter - const int iterations = 20; // Allocate two 16 megapixel, 3 channel, 8-bit images -- input and output - const int32_t buffer_side_length = (1 << 12); - const int32_t buffer_size = buffer_side_length * buffer_side_length; - - uint8_t *src_storage(new uint8_t[buffer_size * 3]); - uint8_t *dst_storage(new uint8_t[buffer_size * 3]); - - buffer_t src_buffer; - buffer_t dst_buffer; // Setup src to be RGB interleaved, with no extra padding between channels or rows. - memset(&src_buffer, 0, sizeof(src_buffer)); - src_buffer.host = src_storage; - src_buffer.extent[0] = buffer_side_length; - src_buffer.stride[0] = 3; - src_buffer.extent[1] = buffer_side_length; - src_buffer.stride[1] = src_buffer.stride[0] * src_buffer.extent[0]; - src_buffer.extent[2] = 3; - src_buffer.stride[2] = 1; - src_buffer.elem_size = 1; + Image src_image = Image::make_interleaved(1 << 12, 1 << 12, 3); // Setup dst to be planar, with no extra padding between channels or rows. - memset(&dst_buffer, 0, sizeof(dst_buffer)); - dst_buffer.host = dst_storage; - dst_buffer.extent[0] = buffer_side_length; - dst_buffer.stride[0] = 1; - dst_buffer.extent[1] = buffer_side_length; - dst_buffer.stride[1] = dst_buffer.stride[0] * dst_buffer.extent[0]; - dst_buffer.extent[2] = 3; - dst_buffer.stride[2] = dst_buffer.stride[1] * dst_buffer.extent[1]; - dst_buffer.elem_size = 1; - - Image src_image(&src_buffer, "src_image"); - Image dst_image(&dst_buffer, "dst_image"); - - for (int32_t x = 0; x < buffer_side_length; x++) { - for (int32_t y = 0; y < buffer_side_length; y++) { - src_image(x, y, 0) = 0; - src_image(x, y, 1) = 128; - src_image(x, y, 2) = 255; - } - } - memset(dst_storage, 0, buffer_size); + Image dst_image(1 << 12, 1 << 12, 3); + + src_image.for_each_element([&](int x, int y) { + src_image(x, y, 0) = 0; + src_image(x, y, 1) = 128; + src_image(x, y, 2) = 255; + }); + dst_image.fill(0); src.set(src_image); @@ -77,49 +46,36 @@ void test_deinterleave() { // Warm up caches, etc. dst.realize(dst_image); - double t1 = benchmark(1, iterations, [&]() { + double t1 = benchmark(1, 20, [&]() { dst.realize(dst_image); }); - printf("Interleaved to planar bandwidth %.3e byte/s.\n", buffer_size / t1); + printf("Interleaved to planar bandwidth %.3e byte/s.\n", + dst_image.number_of_elements() / t1); - for (int32_t x = 0; x < buffer_side_length; x++) { - for (int32_t y = 0; y < buffer_side_length; y++) { + dst_image.for_each_element([&](int x, int y) { assert(dst_image(x, y, 0) == 0); assert(dst_image(x, y, 1) == 128); assert(dst_image(x, y, 2) == 255); - } - } + }); // Setup a semi-planar output case. - memset(&dst_buffer, 0, sizeof(dst_buffer)); - dst_buffer.host = dst_storage; - dst_buffer.extent[0] = buffer_side_length; - dst_buffer.stride[0] = 1; - dst_buffer.extent[1] = buffer_side_length; - dst_buffer.stride[1] = dst_buffer.stride[0] * dst_buffer.extent[0] * 3; - dst_buffer.extent[2] = 3; - dst_buffer.stride[2] = dst_buffer.extent[0]; - dst_buffer.elem_size = 1; - - memset(dst_storage, 0, buffer_size); - - double t2 = benchmark(1, iterations, [&]() { + dst_image = Image(1 << 12, 3, 1 << 12); + dst_image.transpose(1, 2); + dst_image.fill(0); + + double t2 = benchmark(1, 20, [&]() { dst.realize(dst_image); }); - for (int32_t x = 0; x < buffer_side_length; x++) { - for (int32_t y = 0; y < buffer_side_length; y++) { + dst_image.for_each_element([&](int x, int y) { assert(dst_image(x, y, 0) == 0); assert(dst_image(x, y, 1) == 128); assert(dst_image(x, y, 2) == 255); - } - } + }); - printf("Interleaved to semi-planar bandwidth %.3e byte/s.\n", buffer_size / t2); - - delete[] src_storage; - delete[] dst_storage; + printf("Interleaved to semi-planar bandwidth %.3e byte/s.\n", + dst_image.number_of_elements() / t2); } void test_interleave(bool fast) { @@ -130,13 +86,11 @@ void test_interleave(bool fast) { dst(x, y, c) = src(x, y, c); // This is the default format for Halide, but made explicit for illustration. - src.set_stride(0, 1); - src.set_extent(2, 3); + src.dim(0).set_stride(1).dim(2).set_extent(3); - dst.output_buffer().set_min(2, 0); - dst.output_buffer().set_stride(0, 3); - dst.output_buffer().set_stride(2, 1); - dst.output_buffer().set_extent(2, 3); + dst.output_buffer() + .dim(0).set_stride(3) + .dim(2).set_stride(1).set_bounds(0, 3); if( fast ) { dst.reorder(c, x, y).bound(c, 0, 3).unroll(c); @@ -145,81 +99,44 @@ void test_interleave(bool fast) { dst.reorder(c, x, y).vectorize(x, 16); } - // Run test many times to avoid timing jitter - const int iterations = 20; - // Allocate two 16 megapixel, 3 channel, 8-bit images -- input and output - const int32_t buffer_side_length = (1 << 12); - const int32_t buffer_size = buffer_side_length * buffer_side_length; - uint8_t *src_storage(new uint8_t[buffer_size * 3]); - uint8_t *dst_storage(new uint8_t[buffer_size * 3]); + // Setup src to be planar + Image src_image(1 << 12, 1 << 12, 3); - buffer_t src_buffer; - buffer_t dst_buffer; + // Setup dst to be interleaved + Image dst_image = Image::make_interleaved(1 << 12, 1 << 12, 3); - // Setup src to be RGB interleaved, with no extra padding between channels or rows. - memset(&src_buffer, 0, sizeof(src_buffer)); - src_buffer.host = src_storage; - src_buffer.extent[0] = buffer_side_length; - src_buffer.stride[0] = 1; - src_buffer.extent[1] = buffer_side_length; - src_buffer.stride[1] = src_buffer.stride[0] * src_buffer.extent[0]; - src_buffer.extent[2] = 3; - src_buffer.stride[2] = src_buffer.stride[1] * src_buffer.extent[1]; - src_buffer.elem_size = 1; - - // Setup dst to be planar, with no extra padding between channels or rows. - memset(&dst_buffer, 0, sizeof(dst_buffer)); - dst_buffer.host = dst_storage; - dst_buffer.extent[0] = buffer_side_length; - dst_buffer.stride[0] = 3; - dst_buffer.extent[1] = buffer_side_length; - dst_buffer.stride[1] = dst_buffer.stride[0] * dst_buffer.extent[0]; - dst_buffer.extent[2] = 3; - dst_buffer.stride[2] = 1; - dst_buffer.elem_size = 1; - - Image src_image(&src_buffer, "src_image"); - Image dst_image(&dst_buffer, "dst_image"); - - for (int32_t x = 0; x < buffer_side_length; x++) { - for (int32_t y = 0; y < buffer_side_length; y++) { - src_image(x, y, 0) = 0; - src_image(x, y, 1) = 128; - src_image(x, y, 2) = 255; - } - } - memset(dst_storage, 0, buffer_size); + src_image.for_each_element([&](int x, int y) { + src_image(x, y, 0) = 0; + src_image(x, y, 1) = 128; + src_image(x, y, 2) = 255; + }); + dst_image.fill(0); src.set(src_image); - if( fast ) { + if (fast) { dst.compile_to_lowered_stmt("rgb_interleave_fast.stmt", dst.infer_arguments()); } else { dst.compile_to_lowered_stmt("rgb_interleave_slow.stmt", dst.infer_arguments()); } - dst.compile_jit(); // Warm up caches, etc. dst.realize(dst_image); - double t = benchmark(1, iterations, [&]() { + double t = benchmark(1, 20, [&]() { dst.realize(dst_image); }); - printf("Planar to interleaved bandwidth %.3e byte/s.\n", buffer_size / t); + printf("Planar to interleaved bandwidth %.3e byte/s.\n", + dst_image.number_of_elements() / t); - for (int32_t x = 0; x < buffer_side_length; x++) { - for (int32_t y = 0; y < buffer_side_length; y++) { + dst_image.for_each_element([&](int x, int y) { assert(dst_image(x, y, 0) == 0); assert(dst_image(x, y, 1) == 128); assert(dst_image(x, y, 2) == 255); - } - } - - delete[] src_storage; - delete[] dst_storage; + }); } int main(int argc, char **argv) { diff --git a/test/performance/wrap.cpp b/test/performance/wrap.cpp index 639b577889be..e3ea91e51ffa 100644 --- a/test/performance/wrap.cpp +++ b/test/performance/wrap.cpp @@ -125,23 +125,20 @@ int main(int argc, char **argv) { Image out1(1000, 1000); Image out2(1000, 1000); Image out3(1000, 1000); - Buffer buf1(out1); - Buffer buf2(out2); - Buffer buf3(out3); double shared_time = benchmark(5, 5, [&]() { - use_shared.realize(buf1); - buf1.device_sync(); + use_shared.realize(out1); + out1.device_sync(); }); double l1_time = benchmark(5, 5, [&]() { - use_l1.realize(buf2); - buf2.device_sync(); + use_l1.realize(out2); + out2.device_sync(); }); double wrap_time = benchmark(5, 5, [&]() { - use_wrap_for_shared.realize(buf3); - buf3.device_sync(); + use_wrap_for_shared.realize(out3); + out3.device_sync(); }); // Check correctness of the wrapper version diff --git a/test/renderscript/aot_copy.cpp b/test/renderscript/aot_copy.cpp index e01c4bdcff71..6827e1343e62 100644 --- a/test/renderscript/aot_copy.cpp +++ b/test/renderscript/aot_copy.cpp @@ -3,50 +3,34 @@ using namespace Halide; using namespace Halide::Internal; -Image make_interleaved_image(uint8_t *host, int W, int H, int channels) { - buffer_t buf = {0}; - buf.host = host; - buf.extent[0] = W; - buf.stride[0] = channels; - buf.extent[1] = H; - buf.stride[1] = buf.stride[0] * buf.extent[0]; - buf.extent[2] = channels; - buf.stride[2] = 1; - buf.elem_size = 1; - return Image(&buf); -} - void copy_interleaved(bool vectorize, int channels) { ImageParam input8(UInt(8), 3, "input"); - input8.set_stride(0, channels) - .set_stride(1, Halide::Expr()) - .set_stride(2, 1) - .set_bounds(2, 0, channels); // expecting interleaved image - uint8_t *in_buf = new uint8_t[128 * 128 * channels]; - uint8_t *out_buf = new uint8_t[128 * 128 * channels]; - Image in = make_interleaved_image(in_buf, 128, 128, channels); - Image out = make_interleaved_image(out_buf, 128, 128, channels); + input8 + .dim(0).set_stride(channels) + .dim(2).set_stride(1).set_bounds(0, channels); + + Image in = Image::make_interleaved(128, 128, channels); + Image out = Image::make_interleaved(128, 128, channels); input8.set(in); Var x, y, c; Func result("result"); result(x, y, c) = input8(x, y, c); + result.output_buffer() - .set_stride(0, channels) - .set_stride(1, Halide::Expr()) - .set_stride(2, 1) - .set_bounds(2, 0, channels); // expecting interleaved image + .dim(0).set_stride(channels) + .dim(2).set_stride(1).set_bounds(0, channels); result.bound(c, 0, channels); result.shader(x, y, c, DeviceAPI::Renderscript); - if (vectorize) result.vectorize(c); + if (vectorize) { + result.vectorize(c); + } std::vector args; args.push_back(input8); result.compile_to_file("aot_copy", args); - delete[] in_buf; - delete[] out_buf; } int main(int argc, char **argv) { diff --git a/test/renderscript/aot_copy_error.cpp b/test/renderscript/aot_copy_error.cpp index 102a0abd92aa..d1321af0e9ce 100644 --- a/test/renderscript/aot_copy_error.cpp +++ b/test/renderscript/aot_copy_error.cpp @@ -3,58 +3,37 @@ using namespace Halide; using namespace Halide::Internal; -Image make_interleaved_image(uint8_t *host, int W, int H, - int nChannels) { - buffer_t buf = { 0 }; - buf.host = host; - buf.extent[0] = W; - buf.stride[0] = nChannels; - buf.extent[1] = H; - buf.stride[1] = buf.stride[0] * buf.extent[0]; - buf.extent[2] = nChannels; - buf.stride[2] = 1; - buf.elem_size = 1; - return Image(&buf); -} - void copy_interleaved(bool vectorize, int channels) { ImageParam input8(UInt(8), 3, "input"); - input8.set_stride(0, channels) - .set_stride(1, Halide::Expr()) - .set_stride(2, 1) - .set_bounds(2, 0, channels); // expecting interleaved image - uint8_t *in_buf = new uint8_t[128 * 128 * channels]; - uint8_t *out_buf = new uint8_t[128 * 128 * channels]; - Image in = make_interleaved_image(in_buf, 128, 128, channels); - Image out = make_interleaved_image(out_buf, 128, 128, channels); + input8 + .dim(0).set_stride(channels) + .dim(2).set_stride(1).set_bounds(0, channels); // expecting interleaved image + Image in = Image::make_interleaved(128, 128, channels); + Image out = Image::make_interleaved(128, 128, channels); input8.set(in); Var x, y, c; Func result("result"); result(x, y, c) = input8(x, y, c); result.output_buffer() - .set_stride(0, channels) - .set_stride(1, Halide::Expr()) - .set_stride(2, 1) - .set_bounds(2, 0, channels); // expecting interleaved image + .dim(0).set_stride(channels) + .dim(2).set_stride(1).set_bounds(0, channels); result.bound(c, 0, channels); result.shader(x, y, c, DeviceAPI::Renderscript); - if (vectorize) result.vectorize(c); + if (vectorize) { + result.vectorize(c); + } std::vector args; args.push_back(input8); result.compile_to_file("aot_copy_error", args); - delete[] in_buf; - delete[] out_buf; } int main(int argc, char **argv) { - const bool VECTORIZE = true; - - copy_interleaved(VECTORIZE, 3); + copy_interleaved(true, 3); - std::cout << "Done!" << std::endl; + printf("Success!\n"); return 0; } diff --git a/test/renderscript/jit_copy.cpp b/test/renderscript/jit_copy.cpp index 57e8398c99a8..659cb4f5c4ca 100644 --- a/test/renderscript/jit_copy.cpp +++ b/test/renderscript/jit_copy.cpp @@ -180,27 +180,13 @@ class ValidateInterleavedVectorizedPipeline: public ValidateInterleavedPipeline ValidateInterleavedVectorizedPipeline(int _channels) : ValidateInterleavedPipeline(_channels) {} }; -Image make_interleaved_image(uint8_t *host, int W, int H, int channels) { - buffer_t buf = { 0 }; - buf.host = host; - buf.extent[0] = W; - buf.stride[0] = channels; - buf.extent[1] = H; - buf.stride[1] = buf.stride[0] * buf.extent[0]; - buf.extent[2] = channels; - buf.stride[2] = 1; - buf.elem_size = 1; - return Image(&buf); -} - void copy_interleaved(bool vectorized = false, int channels = 4) { ImageParam input8(UInt(8), 3, "input"); input8.set_stride(0, channels) .set_stride(1, Halide::Expr()) .set_stride(2, 1) .set_bounds(2, 0, channels); // expecting interleaved image - uint8_t *in_buf = new uint8_t[128 * 128 * channels]; - Image in = make_interleaved_image(in_buf, 128, 128, channels); + Image in = Image::make_interleaved(128, 128, channels); input8.set(in); Var x, y, c; @@ -214,16 +200,16 @@ void copy_interleaved(bool vectorized = false, int channels = 4) { result.bound(c, 0, channels); result.shader(x, y, c, DeviceAPI::Renderscript); - if (vectorized) result.vectorize(c); + if (vectorized) { + result.vectorize(c); + } result.add_custom_lowering_pass( - vectorized? + vectorized ? new ValidateInterleavedVectorizedPipeline(channels): new ValidateInterleavedPipeline(channels)); result.compile_jit(); - - delete[] in_buf; } int main(int argc, char **argv) { diff --git a/tutorial/lesson_12_using_the_gpu.cpp b/tutorial/lesson_12_using_the_gpu.cpp index d9fc1209db21..e26aaca07a51 100644 --- a/tutorial/lesson_12_using_the_gpu.cpp +++ b/tutorial/lesson_12_using_the_gpu.cpp @@ -162,7 +162,7 @@ class MyPipeline { // Use the GPU threads for the x and y coordinates of the // padded input. padded.gpu_threads(x, y); - + // JIT-compile the pipeline for the GPU. CUDA, OpenCL, or // Metal are not enabled by default. We have to construct a // Target object, enable one of them, and then pass that @@ -201,14 +201,7 @@ class MyPipeline { void test_performance() { // Test the performance of the scheduled MyPipeline. - // If we realize curved into a Halide::Image, that will - // unfairly penalize GPU performance by including a GPU->CPU - // copy in every run. Halide::Image objects always exist on - // the CPU. - - // Halide::Buffer, however, represents a buffer that may - // exist on either CPU or GPU or both. - Buffer output(UInt(8), input.width(), input.height(), input.channels()); + Image output(input.width(), input.height(), input.channels()); // Run the filter once to initialize any GPU runtime state. curved.realize(output); @@ -242,6 +235,13 @@ class MyPipeline { Image output = curved.realize(input.width(), input.height(), input.channels()); + // Halide by default does not copy the data back from the GPU + // (you might want to keep it there if you're going to feed it + // into another GPU pipeline). We can request that it be + // copied back like so: + printf("%llx\n", output.raw_buffer()->dev); + output.copy_to_host(); + // Check against the reference output. for (int c = 0; c < input.channels(); c++) { for (int y = 0; y < input.height(); y++) {