Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
fqnchina committed Nov 7, 2018
1 parent e29aa6c commit 154a024
Show file tree
Hide file tree
Showing 17 changed files with 18,161 additions and 0 deletions.
62 changes: 62 additions & 0 deletions compilation/EdgeComputation.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include "THCUNN.h"
#include "common.h"
#include "THCHalf.h"
#include "THCHalfAutoNumerics.cuh"

#include "EdgeComputation.h"

void THNN_CudaEdgeComputation_updateOutput(THCState *state, THCudaTensor *input, THCudaTensor *output) {

long batchSize = input->size[0];
long plane = input->size[1];
long height = input->size[2];
long width = input->size[3];

THCudaTensor *input_n = THCudaTensor_new(state);
THCudaTensor *output_n = THCudaTensor_new(state);

// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THCudaTensor_select(state, input_n, input, 0, elt);
THCudaTensor_select(state, output_n, output, 0, elt);

EdgeComputation(THCState_getCurrentStream(state),
THCudaTensor_data(state, input_n),
THCudaTensor_data(state, output_n),
height, width);
}

THCudaTensor_free(state, input_n);
THCudaTensor_free(state, output_n);
}

void THNN_CudaEdgeComputation_updateGradInput(THCState *state, THCudaTensor *input, THCudaTensor *gradOutput, THCudaTensor *gradInput) {

long batchSize = input->size[0];
long plane = input->size[1];
long height = input->size[2];
long width = input->size[3];

THCudaTensor *input_n = THCudaTensor_new(state);
THCudaTensor *gradOutput_n = THCudaTensor_new(state);
THCudaTensor *gradInput_n = THCudaTensor_new(state);

// For each elt in batch, do:
for (int elt = 0; elt < batchSize; elt ++) {
// Matrix mulitply per output:
THCudaTensor_select(state, input_n, input, 0, elt);
THCudaTensor_select(state, gradOutput_n, gradOutput, 0, elt);
THCudaTensor_select(state, gradInput_n, gradInput, 0, elt);

EdgeComputation_backward(THCState_getCurrentStream(state),
THCudaTensor_data(state, input_n),
THCudaTensor_data(state, gradOutput_n),
THCudaTensor_data(state, gradInput_n),
height, width);
}

THCudaTensor_free(state, input_n);
THCudaTensor_free(state, gradOutput_n);
THCudaTensor_free(state, gradInput_n);
}
101 changes: 101 additions & 0 deletions compilation/EdgeComputation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include "common.h"

#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))

template<typename Dtype>
__global__ void EdgeComputation_kernel(const int num_kernels, Dtype* input, Dtype* output, int height, int width) {

CUDA_KERNEL_LOOP(index, num_kernels)
{
int point_offset = index;
int x = index % width;
int y = index / width;

int window_size = 1;
for (int m = -window_size; m <= window_size; m++) {
for (int n = -window_size; n <= window_size; n++) {
if (y+m < 0 || y+m >= height || x+n < 0 || x+n >= width)
continue;
int image_offset = (y + m) * width + x + n;
*(output + point_offset) += fabs(*(input + point_offset)-*(input + image_offset));
}
}

if (y-2 >= 0)
*(output + point_offset) += fabs(*(input + point_offset)-*(input + (y - 2) * width + x));
if (y+2 < height)
*(output + point_offset) += fabs(*(input + point_offset)-*(input + (y + 2) * width + x));
if (x-2 >= 0)
*(output + point_offset) += fabs(*(input + point_offset)-*(input + y * width + x - 2));
if (x+2 < width)
*(output + point_offset) += fabs(*(input + point_offset)-*(input + y * width + x + 2));

*(output + point_offset) = *(output + point_offset)/6;
}
}

template<typename Dtype>
void EdgeComputation(cudaStream_t stream, Dtype* input, Dtype* output, int height, int width)
{
int dimSize = 1024;
int num_kernels = height * width;
int grid = (num_kernels + dimSize - 1) / dimSize;
EdgeComputation_kernel<<<grid, dimSize, 0, stream>>>(num_kernels, input, output, height, width);
}

template<typename Dtype>
__global__ void EdgeComputation_backward_kernel(const int num_kernels, Dtype* input, Dtype* gradOutput, Dtype* gradInput, int height, int width) {

CUDA_KERNEL_LOOP(index, num_kernels)
{
int point_offset = index;
int x = index % width;
int y = index / width;

int window_size = 1;
for (int m = -window_size; m <= window_size; m++) {
for (int n = -window_size; n <= window_size; n++) {
if (y+m < 0 || y+m >= height || x+n < 0 || x+n >= width)
continue;
int image_offset = (y + m) * width + x + n;

*(gradInput + point_offset) += (*(input + point_offset) > *(input + image_offset) ? 1 : -1) * *(gradOutput + point_offset);
*(gradInput + point_offset) += (*(input + point_offset) > *(input + image_offset) ? 1 : -1) * *(gradOutput + image_offset);
}
}

if (y-2 >= 0)
{
*(gradInput + point_offset) += (*(input + point_offset) > *(input + (y - 2) * width + x) ? 1 : -1) * *(gradOutput + point_offset);
*(gradInput + point_offset) += (*(input + point_offset) > *(input + (y - 2) * width + x) ? 1 : -1) * *(gradOutput + (y - 2) * width + x);
}
if (y+2 < height)
{
*(gradInput + point_offset) += (*(input + point_offset) > *(input + (y + 2) * width + x) ? 1 : -1) * *(gradOutput + point_offset);
*(gradInput + point_offset) += (*(input + point_offset) > *(input + (y + 2) * width + x) ? 1 : -1) * *(gradOutput + (y + 2) * width + x);
}
if (x-2 >= 0)
{
*(gradInput + point_offset) += (*(input + point_offset) > *(input + y * width + x - 2) ? 1 : -1) * *(gradOutput + point_offset);
*(gradInput + point_offset) += (*(input + point_offset) > *(input + y * width + x - 2) ? 1 : -1) * *(gradOutput + y * width + x - 2);
}
if (x+2 < width)
{
*(gradInput + point_offset) += (*(input + point_offset) > *(input + y * width + x + 2) ? 1 : -1) * *(gradOutput + point_offset);
*(gradInput + point_offset) += (*(input + point_offset) > *(input + y * width + x + 2) ? 1 : -1) * *(gradOutput + y * width + x + 2);
}

*(gradInput + point_offset) = *(gradInput + point_offset)/6;

}
}

template<typename Dtype>
void EdgeComputation_backward(cudaStream_t stream, Dtype* input, Dtype* gradOutput, Dtype* gradInput, int height, int width)
{
int dimSize = 1024;
int num_kernels = height * width;
int grid = (num_kernels + dimSize - 1) / dimSize;
EdgeComputation_backward_kernel<<<grid, dimSize, 0, stream>>>(num_kernels, input, gradOutput, gradInput, height, width);
}
36 changes: 36 additions & 0 deletions compilation/EdgeComputation.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
local THNN = require 'nn.THNN'
local EdgeComputation, parent = torch.class('nn.EdgeComputation', 'nn.Module')

function EdgeComputation:__init(scale)
parent.__init(self)
self.scale = scale or 1
end

function EdgeComputation:updateOutput(input)
self.scale = self.scale or 1
input = input / self.scale

local bs,dim,height,width = input:size(1),input:size(2),input:size(3),input:size(4)
input = torch.sum(input,2)
self.output = torch.CudaTensor():resizeAs(input):fill(0)

input.THNN.EdgeComputation_updateOutput(
input:cdata(),
self.output:cdata()
)
return self.output
end

function EdgeComputation:updateGradInput(input, gradOutput)
local bs,dim,height,width = input:size(1),input:size(2),input:size(3),input:size(4)
self.gradInput = torch.CudaTensor():resizeAs(gradOutput):zero()

input.THNN.EdgeComputation_updateGradInput(
input:cdata(),
gradOutput:cdata(),
self.gradInput:cdata()
)
self.gradInput = torch.expand(self.gradInput,bs,dim,height,width) / self.scale

return self.gradInput
end
38 changes: 38 additions & 0 deletions compilation/EdgeDetector.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "THCUNN.h"
#include "common.h"
#include "THCHalf.h"
#include "THCHalfAutoNumerics.cuh"

#include "EdgeDetector.h"

void THNN_CudaEdgeDetector_updateOutput(THCState *state, THCudaTensor *input_image, THCudaTensor *input_edge, THCudaTensor *label_preserve, THCudaTensor *label_eliminate, int isSmoothing) {

long batchSize = input_image->size[0];
long plane = input_image->size[1];
long height = input_image->size[2];
long width = input_image->size[3];

THCudaTensor *input_image_n = THCudaTensor_new(state);
THCudaTensor *input_edge_n = THCudaTensor_new(state);
THCudaTensor *label_preserve_n = THCudaTensor_new(state);
THCudaTensor *label_eliminate_n = THCudaTensor_new(state);

for (int elt = 0; elt < batchSize; elt ++) {
THCudaTensor_select(state, input_image_n, input_image, 0, elt);
THCudaTensor_select(state, input_edge_n, input_edge, 0, elt);
THCudaTensor_select(state, label_preserve_n, label_preserve, 0, elt);
THCudaTensor_select(state, label_eliminate_n, label_eliminate, 0, elt);

EdgeDetector(THCState_getCurrentStream(state),
THCudaTensor_data(state, input_image_n),
THCudaTensor_data(state, input_edge_n),
THCudaTensor_data(state, label_preserve_n),
THCudaTensor_data(state, label_eliminate_n),
height, width, isSmoothing);
}

THCudaTensor_free(state, input_image_n);
THCudaTensor_free(state, input_edge_n);
THCudaTensor_free(state, label_preserve_n);
THCudaTensor_free(state, label_eliminate_n);
}
Loading

0 comments on commit 154a024

Please sign in to comment.