This repository has been archived by the owner on Jan 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add utest compiler_block_motion_estimate_intel for extension cl_intel…
…_device_side_avc_motion_estimation. fix build warnings. Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com> Signed-off-by: Xionghu Luo <xionghu.luo@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
- Loading branch information
Showing
3 changed files
with
224 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
|
||
__kernel __attribute__((intel_reqd_sub_group_size(16))) | ||
void compiler_block_motion_estimate_intel( | ||
__read_only image2d_t src_img, | ||
__read_only image2d_t ref_img, | ||
__global short2* motion_vector_buffer, | ||
__global ushort* residuals_buffer, | ||
__global uchar* mj_shape_buffer, | ||
__global uchar* mn_shapes_buffer, | ||
__global uchar* directions_buffer, | ||
__global uint* dwo_buffer, | ||
__global uint* pld_buffer) { | ||
|
||
int gr_id0 = get_group_id(0); | ||
int gr_id1 = get_group_id(1); | ||
|
||
ushort2 src_coord = 0; | ||
src_coord.x = gr_id0 * 16; | ||
src_coord.y = gr_id1 * 16; | ||
uchar partition_mask = CLK_AVC_ME_PARTITION_MASK_16x16_INTEL; | ||
uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL; | ||
intel_sub_group_avc_ime_payload_t payload = intel_sub_group_avc_ime_initialize(src_coord, partition_mask, sad_adjustment); | ||
short2 ref_offset = 0; | ||
uchar search_window_config = CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL; | ||
payload = intel_sub_group_avc_ime_set_single_reference(ref_offset, search_window_config, payload); | ||
|
||
//mv cost penalty setting | ||
ulong packed_cc_delta = 0; | ||
uint2 packed_cost_table = intel_sub_group_avc_mce_get_default_medium_penalty_cost_table(); | ||
uchar cost_precision = CLK_AVC_ME_COST_PRECISION_QPEL_INTEL; | ||
payload = intel_sub_group_avc_ime_set_motion_vector_cost_function( | ||
packed_cc_delta, packed_cost_table, cost_precision, payload); | ||
|
||
//ime shape penalty | ||
ulong packed_shape_cost = (1 << 4 | 2); | ||
packed_shape_cost <<= 32; | ||
payload = intel_sub_group_avc_ime_set_inter_shape_penalty(packed_shape_cost ,payload); | ||
|
||
sampler_t vs = 0; | ||
intel_sub_group_avc_ime_result_t i_result = | ||
intel_sub_group_avc_ime_evaluate_with_single_reference(src_img, ref_img, vs, payload); | ||
|
||
//Get ime related result | ||
ulong mvs = intel_sub_group_avc_ime_get_motion_vectors(i_result); | ||
ushort distortions = intel_sub_group_avc_ime_get_inter_distortions(i_result); | ||
uchar major_shape = intel_sub_group_avc_ime_get_inter_major_shape(i_result); | ||
uchar minor_shapes = intel_sub_group_avc_ime_get_inter_minor_shapes(i_result); | ||
uchar directions = intel_sub_group_avc_ime_get_inter_directions(i_result); | ||
|
||
int lid_x = get_local_id(0); | ||
int mb = gr_id0 + gr_id1 * get_num_groups(0); | ||
int2 bi_mvs = as_int2(mvs); | ||
if(lid_x == 0){ | ||
motion_vector_buffer[mb] = as_short2(bi_mvs.s0); | ||
residuals_buffer[mb] = distortions; | ||
mj_shape_buffer[mb] = major_shape; | ||
mn_shapes_buffer[mb] = minor_shapes; | ||
directions_buffer[mb] = directions; | ||
} | ||
//fme setting and evaluate | ||
uchar pixel_mode = CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL; | ||
intel_sub_group_avc_ref_payload_t r_payload = | ||
intel_sub_group_avc_fme_initialize( | ||
src_coord, mvs, major_shape, minor_shapes, | ||
directions, pixel_mode, sad_adjustment); | ||
intel_sub_group_avc_ref_result_t r_result = | ||
intel_sub_group_avc_ref_evaluate_with_single_reference(src_img, ref_img, vs, r_payload); | ||
mvs = intel_sub_group_avc_ref_get_motion_vectors(r_result); | ||
distortions = intel_sub_group_avc_ref_get_inter_distortions(r_result); | ||
|
||
dwo_buffer[mb*16*4 + lid_x + 16*0] = i_result.s0; | ||
dwo_buffer[mb*16*4 + lid_x + 16*1] = i_result.s1; | ||
dwo_buffer[mb*16*4 + lid_x + 16*2] = i_result.s2; | ||
dwo_buffer[mb*16*4 + lid_x + 16*3] = i_result.s3; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
#include "utest_helper.hpp" | ||
#include <string.h> | ||
|
||
void compiler_block_motion_estimate_intel(void) | ||
{ | ||
if (!cl_check_device_side_avc_motion_estimation()) { | ||
return; | ||
} | ||
if (!cl_check_reqd_subgroup()) | ||
return; | ||
|
||
|
||
OCL_CREATE_KERNEL("compiler_block_motion_estimate_intel"); | ||
|
||
const size_t w = 80; | ||
const size_t h = 48; | ||
const size_t mv_w = (w + 15) / 16; | ||
const size_t mv_h = (h + 15) / 16; | ||
|
||
cl_image_format format; | ||
cl_image_desc desc; | ||
|
||
memset(&desc, 0x0, sizeof(cl_image_desc)); | ||
memset(&format, 0x0, sizeof(cl_image_format)); | ||
|
||
uint8_t *image_data1 = (uint8_t *)malloc(w * h); // src | ||
uint8_t *image_data2 = (uint8_t *)malloc(w * h); // ref | ||
for (size_t j = 0; j < h; j++) { | ||
for (size_t i = 0; i < w; i++) { | ||
if (i >= 32 && i <= 47 && j >= 16 && j <= 31) | ||
image_data1[w * j + i] = 100; | ||
else | ||
image_data1[w * j + i] = 0; | ||
if (i >= 30 && i <= 45 && j >= 18 && j <= 33) | ||
image_data2[w * j + i] = 98; | ||
else | ||
image_data2[w * j + i] = 0; | ||
} | ||
} | ||
|
||
format.image_channel_order = CL_R; | ||
format.image_channel_data_type = CL_UNORM_INT8; | ||
desc.image_type = CL_MEM_OBJECT_IMAGE2D; | ||
desc.image_width = w; | ||
desc.image_height = h; | ||
desc.image_row_pitch = 0; | ||
OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, image_data1); // src | ||
OCL_CREATE_IMAGE(buf[1], CL_MEM_COPY_HOST_PTR, &format, &desc, image_data2); // ref | ||
|
||
OCL_CREATE_BUFFER(buf[2], 0, mv_w * mv_h * sizeof(int16_t) * 2, NULL); | ||
OCL_CREATE_BUFFER(buf[3], 0, mv_w * mv_h * sizeof(uint16_t), NULL); | ||
OCL_CREATE_BUFFER(buf[4], 0, mv_w * mv_h * sizeof(uint8_t), NULL); | ||
OCL_CREATE_BUFFER(buf[5], 0, mv_w * mv_h * sizeof(uint8_t), NULL); | ||
OCL_CREATE_BUFFER(buf[6], 0, mv_w * mv_h * sizeof(uint8_t), NULL); | ||
OCL_CREATE_BUFFER(buf[7], 0, mv_w * mv_h * sizeof(uint32_t) * 16 * 8, NULL); | ||
OCL_CREATE_BUFFER(buf[8], 0, mv_w * mv_h * sizeof(uint32_t) * 8 * 8, NULL); | ||
|
||
OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); | ||
OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); | ||
OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); | ||
OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); | ||
OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]); | ||
OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]); | ||
OCL_SET_ARG(6, sizeof(cl_mem), &buf[6]); | ||
OCL_SET_ARG(7, sizeof(cl_mem), &buf[7]); | ||
OCL_SET_ARG(8, sizeof(cl_mem), &buf[8]); | ||
|
||
globals[0] = w; | ||
globals[1] = h / 16; | ||
locals[0] = 16; | ||
locals[1] = 1; | ||
OCL_NDRANGE(2); | ||
|
||
int16_t expected[] = {-8, -8, // S13.2 fixed point value | ||
-8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, 4, | ||
-8, -8, -8, -8, -8, -8, -8, -8, 4, 4, -8, -8, -8, -8}; | ||
OCL_MAP_BUFFER(2); | ||
OCL_MAP_BUFFER(3); | ||
OCL_MAP_BUFFER(4); | ||
OCL_MAP_BUFFER(5); | ||
OCL_MAP_BUFFER(6); | ||
OCL_MAP_BUFFER(7); | ||
OCL_MAP_BUFFER(8); | ||
int16_t *mv = (int16_t *)buf_data[2]; | ||
#define VME_DEBUG 0 | ||
#if VME_DEBUG | ||
uint16_t *residual = (uint16_t *)buf_data[3]; | ||
uint8_t *major_shape = (uint8_t *)buf_data[4]; | ||
uint8_t *minor_shape = (uint8_t *)buf_data[5]; | ||
uint8_t *direction = (uint8_t *)buf_data[6]; | ||
uint32_t *dwo = (uint32_t *)buf_data[7]; | ||
uint32_t *pld = (uint32_t *)buf_data[8]; | ||
std::cout << std::endl; | ||
for (uint32_t j = 0; j <= mv_h - 1; ++j) { | ||
for (uint32_t i = 0; i <= mv_w - 1; ++i) { | ||
uint32_t mv_num = j * mv_w + i; | ||
std::cout << "******* mv num = " << mv_num << ": " << std::endl; | ||
std::cout << "payload register result: " << std::endl; | ||
for (uint32_t row_num = 0; row_num < 8; row_num++) { | ||
for (int32_t idx = 7; idx >= 0; idx--) | ||
printf("%.8x ", pld[mv_num * 64 + row_num * 8 + idx]); | ||
printf("\n"); | ||
} | ||
std::cout << std::endl; | ||
std::cout << "writeback register result: " << std::endl; | ||
for (uint32_t row_num = 0; row_num < 4; row_num++) { | ||
for (int32_t wi = 7; wi >= 0; wi--) | ||
printf("%.8x ", dwo[mv_num * 16 * 4 + row_num * 16 + wi]); | ||
printf("\n"); | ||
for (int32_t wi = 15; wi >= 8; wi--) | ||
printf("%.8x ", dwo[mv_num * 16 * 4 + row_num * 16 + wi]); | ||
printf("\n"); | ||
} | ||
std::cout << std::endl; | ||
std::cout << "mv: "; | ||
std::cout << "(" << mv[mv_num * 2] << ", " << mv[mv_num * 2 + 1] << ") "; | ||
std::cout << std::endl; | ||
std::cout << "residual: "; | ||
std::cout << residual[mv_num] << " "; | ||
std::cout << std::endl; | ||
printf("major shape: %u\n", major_shape[mv_num]); | ||
printf("minor shape: %u\n", minor_shape[mv_num]); | ||
printf("direction: %u\n", direction[mv_num]); | ||
std::cout << std::endl; | ||
} | ||
} | ||
#endif | ||
for (uint32_t j = 0; j <= mv_h - 1; ++j) { | ||
for (uint32_t i = 0; i <= mv_w - 1; ++i) { | ||
uint32_t mv_num = j * mv_w + i; | ||
OCL_ASSERT(mv[mv_num * 2] == expected[mv_num * 2]); | ||
OCL_ASSERT(mv[mv_num * 2 + 1] == expected[mv_num * 2 + 1]); | ||
} | ||
} | ||
OCL_UNMAP_BUFFER(2); | ||
OCL_UNMAP_BUFFER(3); | ||
OCL_UNMAP_BUFFER(4); | ||
OCL_UNMAP_BUFFER(5); | ||
OCL_UNMAP_BUFFER(6); | ||
OCL_UNMAP_BUFFER(7); | ||
OCL_UNMAP_BUFFER(8); | ||
|
||
free(image_data1); | ||
free(image_data2); | ||
} | ||
|
||
MAKE_UTEST_FROM_FUNCTION(compiler_block_motion_estimate_intel); |