Skip to content

Commit

Permalink
hip concat forward kernel match header
Browse files Browse the repository at this point in the history
temp remove outdated hip kernels to get compiling

remove forward concat kernel call

remove backward concat kernel call

Pool2d kernel fixes
  • Loading branch information
williamberman committed Nov 3, 2022
1 parent 484cd59 commit 633f7a9
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 179 deletions.
177 changes: 24 additions & 153 deletions src/ops/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,172 +41,43 @@ void calc_blk_size(coord_t &num_blocks,
}

/*static*/
void Concat::forward_kernel(float *output,
float const *const *inputs,
void Concat::forward_kernel(GenericTensorAccessorW const &output,
GenericTensorAccessorR const *inputs,
int num_inputs,
int axis,
Domain const &out_domain,
Domain const *in_domain,
hipStream_t stream) {
coord_t num_blocks = 1, output_blk_size = 1, input_blk_sizes[MAX_NUM_INPUTS];
assert(num_inputs <= MAX_NUM_INPUTS);
switch (out_domain.get_dim()) {
#define DIMFUNC(DIM) \
case DIM: { \
Rect<DIM> rect = out_domain; \
calc_blk_size<DIM>(num_blocks, output_blk_size, rect, axis); \
for (int i = 0; i < num_inputs; i++) { \
rect = in_domain[i]; \
coord_t input_num_blocks = 1; \
calc_blk_size<DIM>(input_num_blocks, input_blk_sizes[i], rect, axis); \
assert(input_num_blocks == num_blocks); \
} \
break; \
}
LEGION_FOREACH_N(DIMFUNC)
#undef DIMFUNC
default:
fprintf(stderr, "Unsupported concat dimension number");
assert(false);
}

for (int i = 0; i < num_inputs; i++) {
hipLaunchKernelGGL(copy_with_stride,
GET_BLOCKS(input_blk_sizes[i] * num_blocks),
CUDA_NUM_THREADS,
0,
stream,
output,
inputs[i],
num_blocks,
output_blk_size,
input_blk_sizes[i]);
// printf("output = %x num_blocks=%d output_blk_size=%d
// input_blk_size[%d]=%d\n",
// output, num_blocks, output_blk_size, i, input_blk_sizes[i]);
output += input_blk_sizes[i];
}
// TODO
assert(false);
}

/*static*/
void Concat::forward_kernel_wrapper(ConcatMeta const *m,
float *output,
float const *const *inputs,
GenericTensorAccessorW const &output,
GenericTensorAccessorR const *inputs,
int num_inputs,
int axis,
Domain const &out_domain,
Domain const *in_domain) {
hipStream_t stream;
checkCUDA(get_legion_stream(&stream));

hipEvent_t t_start, t_end;
if (m->profiling) {
hipEventCreate(&t_start);
hipEventCreate(&t_end);
hipEventRecord(t_start, stream);
}
Concat::forward_kernel(
output, inputs, num_inputs, axis, out_domain, in_domain, stream);
if (m->profiling) {
hipEventRecord(t_end, stream);
checkCUDA(hipEventSynchronize(t_end));
// print_tensor<4, float>(output - output_blk_size, output_rect,
// "[Concat:forward:output]"); printf("output_blk_size=%zu\n",
// output_blk_size); print_tensor<4, float>(inputs[0], input_rect[0],
// "[Concat:forward:input0]"); print_tensor<4, float>(inputs[1],
// input_rect[1], "[Concat:forward:input1]");
float elapsed = 0;
checkCUDA(hipEventElapsedTime(&elapsed, t_start, t_end));
printf("[%s] forward time = %.4f ms\n", m->op_name, elapsed);
hipEventDestroy(t_start);
hipEventDestroy(t_end);
}
int axis) {
// TODO
assert(false);
}

/*static*/
void Concat::backward_kernel(float const *output_grad,
float **input_grads,
int num_inputs,
int axis,
Domain const &out_grad_domain,
Domain const *in_grad_domain,
hipStream_t stream) {
coord_t num_blocks = 1, output_blk_size = 1, input_blk_sizes[MAX_NUM_INPUTS];
assert(num_inputs <= MAX_NUM_INPUTS);
switch (out_grad_domain.get_dim()) {
#define DIMFUNC(DIM) \
case DIM: { \
Rect<DIM> rect = out_grad_domain; \
calc_blk_size<DIM>(num_blocks, output_blk_size, rect, axis); \
for (int i = 0; i < num_inputs; i++) { \
rect = in_grad_domain[i]; \
coord_t input_num_blocks = 1; \
calc_blk_size<DIM>(input_num_blocks, input_blk_sizes[i], rect, axis); \
assert(input_num_blocks == num_blocks); \
} \
break; \
}
LEGION_FOREACH_N(DIMFUNC)
#undef DIMFUNC
default:
fprintf(stderr, "Unsupported concat dimension number");
assert(false);
}

for (int i = 0; i < num_inputs; i++) {
hipLaunchKernelGGL(add_with_stride,
GET_BLOCKS(input_blk_sizes[i] * num_blocks),
CUDA_NUM_THREADS,
0,
stream,
input_grads[i],
output_grad,
num_blocks,
input_blk_sizes[i],
output_blk_size);
output_grad += input_blk_sizes[i];
}

// Rect<2> output_rect(Point<2>(0, 0), Point<2>(output_blk_size-1, batch_size
// - 1)); Rect<2> input_rect(Point<2>(0, 0), Point<2>(input_blk_sizes[0]-1,
// batch_size - 1)); print_tensor<2, float>(output_grad - output_blk_size,
// output_rect, "[Concat:backward:output]"); print_tensor<2,
// float>(input_grads[0], input_rect, "[Concat:backward:input0]");
}
void Concat::backward_kernel(GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const *input_grads,
int num_inputs,
int axis,
ffStream_t stream) {
// TODO
assert(false);
}

/*static*/
void Concat::backward_kernel_wrapper(ConcatMeta const *m,
float const *output_grad,
float **input_grads,
int num_inputs,
int axis,
Domain const &out_grad_domain,
Domain const *in_grad_domain) {
hipStream_t stream;
checkCUDA(get_legion_stream(&stream));

hipEvent_t t_start, t_end;
if (m->profiling) {
hipEventCreate(&t_start);
hipEventCreate(&t_end);
hipEventRecord(t_start, stream);
}
Concat::backward_kernel(output_grad,
input_grads,
num_inputs,
axis,
out_grad_domain,
in_grad_domain,
stream);
if (m->profiling) {
hipEventRecord(t_end, stream);
checkCUDA(hipEventSynchronize(t_end));
float elapsed = 0;
checkCUDA(hipEventElapsedTime(&elapsed, t_start, t_end));
printf("[%s] forward time = %.4f ms\n", m->op_name, elapsed);
hipEventDestroy(t_start);
hipEventDestroy(t_end);
}
}
GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const *input_grads,
int num_inputs,
int axis) {
// TODO
assert(false);
}

}; // namespace FlexFlow
18 changes: 4 additions & 14 deletions src/ops/fused.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,8 @@ __host__ void FusedOp::forward_task(Task const *task,
assert(fused->op_num_outputs[op] == 1);
ConcatMeta *m = (ConcatMeta *)metas->meta[op];
int num_inputs = fused->op_num_inputs[op];
Concat::forward_kernel(my_op[0],
my_ip,
num_inputs,
m->legion_axis,
my_od[0],
my_id,
stream);
// TODO
assert(false);
break;
}
case OP_CONV2D: {
Expand Down Expand Up @@ -487,13 +482,8 @@ __host__ void FusedOp::backward_task(Task const *task,
assert(fused->op_num_outputs[op] == 1);
ConcatMeta *m = (ConcatMeta *)metas->meta[op];
int num_inputs = fused->op_num_inputs[op];
Concat::backward_kernel(my_grad_op[0],
my_grad_ip,
num_inputs,
m->legion_axis,
my_grad_od[0],
my_grad_id,
stream);
// TODO
assert(false);
break;
}
case OP_CONV2D: {
Expand Down
24 changes: 12 additions & 12 deletions src/ops/pool_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ void Pool2D::init_kernel(Pool2D const *pool,

/*static*/
void Pool2D::forward_kernel(Pool2DMeta const *m,
float const *input_ptr,
float *output_ptr,
void const *input_ptr,
void *output_ptr,
hipStream_t stream) {
checkCUDNN(miopenSetStream(m->handle.dnn, stream));

Expand All @@ -93,8 +93,8 @@ void Pool2D::forward_kernel(Pool2DMeta const *m,

/*static*/
void Pool2D::forward_kernel_wrapper(Pool2DMeta const *m,
float const *input_ptr,
float *output_ptr) {
void const *input_ptr,
void *output_ptr) {
hipStream_t stream;
checkCUDA(get_legion_stream(&stream));

Expand All @@ -121,10 +121,10 @@ void Pool2D::forward_kernel_wrapper(Pool2DMeta const *m,

/*static*/
void Pool2D::backward_kernel(Pool2DMeta const *m,
float const *input_ptr,
float *input_grad_ptr,
float const *output_ptr,
float const *output_grad_ptr,
void const *input_ptr,
void *input_grad_ptr,
void const *output_ptr,
void const *output_grad_ptr,
hipStream_t stream) {
checkCUDNN(miopenSetStream(m->handle.dnn, stream));

Expand All @@ -147,10 +147,10 @@ void Pool2D::backward_kernel(Pool2DMeta const *m,

/*static*/
void Pool2D::backward_kernel_wrapper(Pool2DMeta const *m,
float const *input_ptr,
float *input_grad_ptr,
float const *output_ptr,
float const *output_grad_ptr) {
void const *input_ptr,
void *input_grad_ptr,
void const *output_ptr,
void const *output_grad_ptr) {
hipStream_t stream;
checkCUDA(get_legion_stream(&stream));

Expand Down

0 comments on commit 633f7a9

Please sign in to comment.