forked from halide/Halide
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add and reschedule harris corner detector
- Loading branch information
Showing
4 changed files
with
168 additions
and
127 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
include ../support/Makefile.inc | ||
|
||
all: $(BIN)/$(HL_TARGET)/filter | ||
|
||
test: $(BIN)/$(HL_TARGET)/out.png | ||
|
||
$(GENERATOR_BIN)/harris.generator: harris_generator.cpp $(GENERATOR_DEPS) | ||
@mkdir -p $(@D) | ||
$(CXX) $(CXXFLAGS) -g $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(HALIDE_SYSTEM_LIBS) $(USE_EXPORT_DYNAMIC) | ||
|
||
$(BIN)/%/harris.a: $(GENERATOR_BIN)/harris.generator | ||
@mkdir -p $(@D) | ||
$< -g harris -f harris -o $(BIN)/$* target=$* auto_schedule=false | ||
|
||
$(BIN)/%/harris_auto_schedule.a: $(GENERATOR_BIN)/harris.generator | ||
@mkdir -p $(@D) | ||
$< -g harris -f harris_auto_schedule -o $(BIN)/$* target=$* auto_schedule=true | ||
|
||
$(BIN)/%/filter: filter.cpp $(BIN)/%/harris.a $(BIN)/%/harris_auto_schedule.a | ||
@mkdir -p $(@D) | ||
$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) | ||
|
||
$(BIN)/%/out.png: $(BIN)/%/filter | ||
$< ../images/rgb.png $(BIN)/$*/out.png | ||
|
||
clean: | ||
rm -rf $(BIN) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#include <cstdio> | ||
#include <cstdlib> | ||
#include <cassert> | ||
|
||
#include "HalideRuntime.h" | ||
#include "HalideBuffer.h" | ||
|
||
#include "harris.h" | ||
#include "harris_auto_schedule.h" | ||
|
||
#include "halide_benchmark.h" | ||
#include "halide_image_io.h" | ||
|
||
using namespace Halide::Tools; | ||
|
||
int main(int argc, char **argv) { | ||
if (argc != 3) { | ||
printf("Usage: %s in out\n", argv[0]); | ||
return 1; | ||
} | ||
|
||
Halide::Runtime::Buffer<float> input = load_and_convert_image(argv[1]); | ||
|
||
// The harris app doesn't use a boundary condition | ||
Halide::Runtime::Buffer<float> output(input.width() - 6, input.height() - 6); | ||
output.set_min(3, 3); | ||
|
||
|
||
double best_manual = benchmark([&]() { | ||
harris(input, output); | ||
output.device_sync(); | ||
}); | ||
printf("Manually-tuned time: %gms\n", best_manual * 1e3); | ||
|
||
double best_auto = benchmark([&]() { | ||
harris_auto_schedule(input, output); | ||
output.device_sync(); | ||
}); | ||
printf("Auto-scheduled time: %gms\n", best_auto * 1e3); | ||
|
||
convert_and_save_image(output, argv[2]); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#include "Halide.h" | ||
|
||
namespace { | ||
|
||
using namespace Halide; | ||
|
||
Expr sum3x3(Func f, Var x, Var y) { | ||
return f(x-1, y-1) + f(x-1, y) + f(x-1, y+1) + | ||
f(x, y-1) + f(x, y) + f(x, y+1) + | ||
f(x+1, y-1) + f(x+1, y) + f(x+1, y+1); | ||
} | ||
|
||
class Harris : public Halide::Generator<Harris> { | ||
public: | ||
Input<Buffer<float>> input{"input", 3}; | ||
Output<Buffer<float>> output{"output", 2}; | ||
|
||
void generate() { | ||
Var x("x"), y("y"), c("c"); | ||
|
||
// Algorithm | ||
Func gray("gray"); | ||
gray(x, y) = (0.299f * input(x, y, 0) + | ||
0.587f * input(x, y, 1) + | ||
0.114f * input(x, y, 2)); | ||
|
||
Func Iy("Iy"); | ||
Iy(x, y) = gray(x-1, y-1)*(-1.0f/12) + gray(x-1, y+1)*(1.0f/12) + | ||
gray(x, y-1)*(-2.0f/12) + gray(x, y+1)*(2.0f/12) + | ||
gray(x+1, y-1)*(-1.0f/12) + gray(x+1, y+1)*(1.0f/12); | ||
|
||
Func Ix("Ix"); | ||
Ix(x, y) = gray(x-1, y-1)*(-1.0f/12) + gray(x+1, y-1)*(1.0f/12) + | ||
gray(x-1, y)*(-2.0f/12) + gray(x+1, y)*(2.0f/12) + | ||
gray(x-1, y+1)*(-1.0f/12) + gray(x+1, y+1)*(1.0f/12); | ||
|
||
Func Ixx("Ixx"); | ||
Ixx(x, y) = Ix(x, y) * Ix(x, y); | ||
|
||
Func Iyy("Iyy"); | ||
Iyy(x, y) = Iy(x, y) * Iy(x, y); | ||
|
||
Func Ixy("Ixy"); | ||
Ixy(x, y) = Ix(x, y) * Iy(x, y); | ||
|
||
Func Sxx("Sxx"); | ||
Sxx(x, y) = sum3x3(Ixx, x, y); | ||
|
||
Func Syy("Syy"); | ||
Syy(x, y) = sum3x3(Iyy, x, y); | ||
|
||
Func Sxy("Sxy"); | ||
Sxy(x, y) = sum3x3(Ixy, x, y); | ||
|
||
Func det("det"); | ||
det(x, y) = Sxx(x, y) * Syy(x, y) - Sxy(x, y) * Sxy(x, y); | ||
|
||
Func trace("trace"); | ||
trace(x, y) = Sxx(x, y) + Syy(x, y); | ||
|
||
output(x, y) = det(x, y) - 0.04f * trace(x, y) * trace(x, y); | ||
|
||
// Estimates (for autoscheduler; ignored otherwise) | ||
{ | ||
const int kWidth = 1536; | ||
const int kHeight = 2560; | ||
input.dim(0).set_estimate(0, kWidth) | ||
.dim(1).set_estimate(0, kHeight) | ||
.dim(2).set_estimate(0, 3); | ||
output.dim(0).set_estimate(3, kWidth - 6) | ||
.dim(1).set_estimate(3, kHeight - 6); | ||
} | ||
|
||
// Schedule | ||
if (!auto_schedule) { | ||
Var xi("xi"), yi("yi"); | ||
if (get_target().has_gpu_feature()) { | ||
|
||
output.gpu_tile(x, y, xi, yi, 30, 14); | ||
Ix.compute_at(output, x).gpu_threads(x, y); | ||
Iy.compute_at(output, x).gpu_threads(x, y); | ||
Ix.compute_with(Iy, x); | ||
} else { | ||
const int vec = natural_vector_size<float>(); | ||
output.split(y, y, yi, 32).parallel(y).vectorize(x, vec); | ||
gray.store_at(output, y).compute_at(output, yi).vectorize(x, vec); | ||
Ix.store_at(output, y).compute_at(output, yi).vectorize(x, vec); | ||
Iy.store_at(output, y).compute_at(output, yi).vectorize(x, vec); | ||
Ix.compute_with(Iy, x); | ||
} | ||
} | ||
} | ||
}; | ||
|
||
} // namespace | ||
|
||
HALIDE_REGISTER_GENERATOR(Harris, harris) |
This file was deleted.
Oops, something went wrong.