Permalink
Browse files

Move CDEF stuff out of deblock data and other cleanup

  • Loading branch information...
stemidts authored and Thomas Davies committed Jan 8, 2018
1 parent 5cbc81e commit 159803f3fcaaeb5262b34cb993ac733c30c526e2
Showing with 53 additions and 63 deletions.
  1. +9 −6 common/common_frame.c
  2. +2 −2 common/common_frame.h
  3. +2 −5 common/types.h
  4. +0 −6 dec/decode_block.c
  5. +4 −5 dec/decode_frame.c
  6. +0 −6 enc/encode_block.c
  7. +36 −33 enc/encode_frame.c
@@ -823,7 +823,7 @@ int cdef_allskip(int xoff, int yoff, int width, int height, deblock_data_t *debl
}
#endif
void TEMPLATE(cdef_frame)(const yuv_frame_t *frame, const yuv_frame_t *org, deblock_data_t *deblock_data, void *stream, int cdef_bits, int bitdepth, unsigned int plane) {
void TEMPLATE(cdef_frame)(cdef_strengths *cdef_strengths, const yuv_frame_t *frame, const yuv_frame_t *org, deblock_data_t *deblock_data, void *stream, int cdef_bits, int bitdepth, unsigned int plane) {
int c, k, l;
const int fb_size_log2 = 6;
@@ -863,6 +863,8 @@ void TEMPLATE(cdef_frame)(const yuv_frame_t *frame, const yuv_frame_t *org, debl
cdef_init(stride16, cdef_directions_copy);
cdef_init(sstride, cdef_directions);
int ci = 0;
// Iterate over all filter blocks
for (k = 0; k < num_fb_ver; k++) {
for (l = 0; l < num_fb_hor; l++) {
@@ -878,7 +880,7 @@ void TEMPLATE(cdef_frame)(const yuv_frame_t *frame, const yuv_frame_t *org, debl
w += !w << fb_size_log2;
int index = (yoff/MIN_PB_SIZE)*(width/MIN_PB_SIZE) + (xoff/MIN_PB_SIZE);
cdef_strength *cdef = &deblock_data[index].cdef->plane[plane != 0];
cdef_strength *cdef = &cdef_strengths[ci].plane[plane != 0];
int coeff_shift = bitdepth - 8;
int pri_strength = cdef->level;
@@ -895,7 +897,7 @@ void TEMPLATE(cdef_frame)(const yuv_frame_t *frame, const yuv_frame_t *org, debl
index = ((yoff + m * 8) / MIN_PB_SIZE) * (width/MIN_PB_SIZE) + ((xoff + n * 8) / MIN_PB_SIZE);
if (plane == 0)
deblock_data[index].cdef_dir = (use_simd ? TEMPLATE(cdef_find_dir_simd) : TEMPLATE(cdef_find_dir))(src_buffer + ypos * sstride + xpos, sstride, &deblock_data[index].cdef_var, coeff_shift);
cdef_strengths[ci].dir[m * (bs << sub) + n] = (use_simd ? TEMPLATE(cdef_find_dir_simd) : TEMPLATE(cdef_find_dir))(src_buffer + ypos * sstride + xpos, sstride, &cdef_strengths[ci].var[m * bs + n], coeff_shift);
if (deblock_data[index].mode != MODE_SKIP) {
@@ -948,26 +950,27 @@ void TEMPLATE(cdef_frame)(const yuv_frame_t *frame, const yuv_frame_t *org, debl
TEMPLATE(cdef_prepare_input)(sizex, sizey, xpos, ypos, bt, padding, src16 + offset16, stride16, src_buffer, sstride);
int adj_str = plane ? pri_strength : adjust_strength(pri_strength, deblock_data[index].cdef_var);
int adj_str = plane ? pri_strength : adjust_strength(pri_strength, cdef_strengths[ci].var[m * (bs << sub) + n]);
int pri_damping = adj_str ? max(log2i(adj_str), cdef->pri_damping - !!plane) : cdef->pri_damping - !!plane;
int sec_damping = cdef->sec_damping - !!plane;
// Apply the filter.
#ifdef HBD
(use_simd ? cdef_filter_block_simd : cdef_filter_block)(NULL, dst_buffer + ypos * dstride + xpos, dstride, src16 + offset16, stride16,
adj_str << coeff_shift, sec_strength << coeff_shift,
pri_strength ? deblock_data[index].cdef_dir : 0, pri_damping + coeff_shift, sec_damping + coeff_shift, sizex,
pri_strength ? cdef_strengths[ci].dir[m * (bs << sub) + n] : 0, pri_damping + coeff_shift, sec_damping + coeff_shift, sizex,
cdef_directions_copy, coeff_shift);
#else
(use_simd ? cdef_filter_block_simd : cdef_filter_block)(dst_buffer + ypos * dstride + xpos, NULL, dstride, src16 + offset16, stride16,
adj_str << coeff_shift, sec_strength << coeff_shift,
pri_strength ? deblock_data[index].cdef_dir : 0, pri_damping + coeff_shift, sec_damping + coeff_shift, sizex,
pri_strength ? cdef_strengths[ci].dir[m * (bs << sub) + n] : 0, pri_damping + coeff_shift, sec_damping + coeff_shift, sizex,
cdef_directions_copy, coeff_shift);
#endif
}
}
}
}
ci++;
}
}
@@ -52,8 +52,8 @@ void clpf_frame_lbd(const yuv_frame_t *frame, const yuv_frame_t *org, const debl
void clpf_frame_hbd(const yuv_frame_t *frame, const yuv_frame_t *org, const deblock_data_t *deblock_data, void *stream,int enable_sb_flag, unsigned int strength, unsigned int fb_size_log2, int bitdepth, plane_t plane, int qp,
int(*decision)(int, int, const yuv_frame_t *, const yuv_frame_t *, const deblock_data_t *, int, int, int, void *, unsigned int, unsigned int, unsigned int, unsigned int, int));
#if CDEF
void cdef_frame_lbd(const yuv_frame_t *frame, const yuv_frame_t *org, deblock_data_t *deblock_data, void *stream, int cdef_bits, int bitdepth, unsigned int plane);
void cdef_frame_hbd(const yuv_frame_t *frame, const yuv_frame_t *org, deblock_data_t *deblock_data, void *stream, int cdef_bits, int bitdepth, unsigned int plane);
void cdef_frame_lbd(cdef_strengths *cdef_strengths, const yuv_frame_t *frame, const yuv_frame_t *org, deblock_data_t *deblock_data, void *stream, int cdef_bits, int bitdepth, unsigned int plane);
void cdef_frame_hbd(cdef_strengths *cdef_strengths, const yuv_frame_t *frame, const yuv_frame_t *org, deblock_data_t *deblock_data, void *stream, int cdef_bits, int bitdepth, unsigned int plane);
int cdef_allskip(int xoff, int yoff, int width, int height, deblock_data_t *deblock_data, int fb_size_log2);
void cdef_prepare_input_lbd(int sizex, int sizey, int xpos, int ypos, boundary_type bt, int padding, uint16_t *src16, int stride16, uint8_t *src_buffer, int sstride);
void cdef_prepare_input_hbd(int sizex, int sizey, int xpos, int ypos, boundary_type bt, int padding, uint16_t *src16, int stride16, uint16_t *src_buffer, int sstride);
@@ -169,6 +169,8 @@ typedef struct
typedef struct
{
int dir[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE / 64];
int var[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE / 64];
cdef_strength plane[2];
} cdef_strengths;
#endif
@@ -182,11 +184,6 @@ typedef struct
part_t pb_part;
inter_pred_t inter_pred;
inter_pred_t inter_pred_arr[16]; //TODO: MAX_GOP_SIZE
#if CDEF
cdef_strengths *cdef;
int cdef_dir;
int cdef_var;
#endif
} deblock_data_t;
typedef enum {
@@ -218,12 +218,6 @@ static void copy_deblock_data(decoder_info_t *decoder_info, block_info_dec_t *bl
decoder_info->deblock_data[block_index].inter_pred.ref_idx0 = block_info->block_param.ref_idx0;
decoder_info->deblock_data[block_index].inter_pred.ref_idx1 = block_info->block_param.ref_idx1;
decoder_info->deblock_data[block_index].inter_pred.bipred_flag = block_info->block_param.dir;
#if CDEF
int xpos = block_info->block_pos.xpos + n * MIN_PB_SIZE;
int ypos = block_info->block_pos.ypos + m * MIN_PB_SIZE;
cdef_strengths *cdef = decoder_info->cdef + (ypos>>CDEF_BLOCKSIZE_LOG2)*((decoder_info->width + CDEF_BLOCKSIZE - 1)>>CDEF_BLOCKSIZE_LOG2) + (xpos>>CDEF_BLOCKSIZE_LOG2);
decoder_info->deblock_data[block_index].cdef = cdef;
#endif
}
}
}
@@ -157,7 +157,6 @@ void decode_frame(decoder_info_t *decoder_info, yuv_frame_t* rec_buffer)
for (int l = 0; l < nvfb; l++) {
int xpos = l << fb_size_log2;
int ypos = k << fb_size_log2;
int index = (ypos / MIN_PB_SIZE)*(width / MIN_PB_SIZE) + xpos / MIN_PB_SIZE;
int preset = 0;
if (decoder_info->cdef_bits) {
int allskip = cdef_allskip(xpos, ypos, width, height, decoder_info->deblock_data, fb_size_log2);
@@ -166,17 +165,17 @@ void decode_frame(decoder_info_t *decoder_info, yuv_frame_t* rec_buffer)
}
}
for (int plane = 0; plane < 2; plane++) {
cdef_strength *cdef = &decoder_info->deblock_data[index].cdef->plane[plane != 0];
cdef_strength *cdef = &decoder_info->cdef[k*nvfb+l].plane[plane != 0];
cdef->level = decoder_info->cdef_presets[preset].pri_strength[plane] * 2 + decoder_info->cdef_presets[preset].skip_condition[plane];
cdef->sec_strength = decoder_info->cdef_presets[preset].sec_strength[plane];
cdef->pri_damping = decoder_info->cdef_damping[0];
cdef->sec_damping = decoder_info->cdef_damping[1];
}
}
}
TEMPLATE(cdef_frame)(decoder_info->rec, 0, decoder_info->deblock_data, stream, 0, decoder_info->bitdepth, 0);
TEMPLATE(cdef_frame)(decoder_info->rec, 0, decoder_info->deblock_data, stream, 0, decoder_info->bitdepth, 1);
TEMPLATE(cdef_frame)(decoder_info->rec, 0, decoder_info->deblock_data, stream, 0, decoder_info->bitdepth, 2);
TEMPLATE(cdef_frame)(decoder_info->cdef, decoder_info->rec, 0, decoder_info->deblock_data, stream, 0, decoder_info->bitdepth, 0);
TEMPLATE(cdef_frame)(decoder_info->cdef, decoder_info->rec, 0, decoder_info->deblock_data, stream, 0, decoder_info->bitdepth, 1);
TEMPLATE(cdef_frame)(decoder_info->cdef, decoder_info->rec, 0, decoder_info->deblock_data, stream, 0, decoder_info->bitdepth, 2);
}
#endif
@@ -1608,12 +1608,6 @@ static void copy_deblock_data(encoder_info_t *encoder_info, block_info_t *block_
encoder_info->deblock_data[block_index].inter_pred.ref_idx0 = block_info->block_param.ref_idx0;
encoder_info->deblock_data[block_index].inter_pred.ref_idx1 = block_info->block_param.ref_idx1;
encoder_info->deblock_data[block_index].inter_pred.bipred_flag = block_info->block_param.dir;
#if CDEF
int xpos = block_info->block_pos.xpos + n * MIN_PB_SIZE;
int ypos = block_info->block_pos.ypos + m * MIN_PB_SIZE;
cdef_strengths *cdef = encoder_info->cdef + (ypos>>CDEF_BLOCKSIZE_LOG2)*((encoder_info->width + CDEF_BLOCKSIZE - 1)>>CDEF_BLOCKSIZE_LOG2) + (xpos>>CDEF_BLOCKSIZE_LOG2);
encoder_info->deblock_data[block_index].cdef = cdef;
#endif
}
}
}
@@ -43,7 +43,7 @@ extern double squared_lambda_QP[52];
#if CDEF
int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *deblock_data, const frame_info_t *frame_info, encoder_info_t *encoder_info,
int cdef_strengths[8], int cdef_uv_strengths[8], int speed);
int strengths[8], int uv_strengths[8], int speed);
#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
@@ -222,12 +222,12 @@ static uint64_t dist_8x8(SAMPLE *dst, int dstride, SAMPLE *src,
int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *deblock_data, const frame_info_t *frame_info, encoder_info_t *encoder_info,
int cdef_strengths[8], int cdef_uv_strengths[8], int speed) {
int strengths[8], int uv_strengths[8], int speed) {
int width = rec->width;
int height = rec->height;
const int fb_size_log2 = CDEF_BLOCKSIZE_LOG2;
const int nhfb = (height+CDEF_BLOCKSIZE-1)>>CDEF_BLOCKSIZE_LOG2;
const int nvfb = (width+CDEF_BLOCKSIZE-1)>>CDEF_BLOCKSIZE_LOG2;
const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
uint64_t best_tot_mse = (uint64_t)1 << 63;
uint64_t tot_mse;
uint64_t(*mse[2])[TOTAL_STRENGTHS];
@@ -245,23 +245,25 @@ int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *de
SAMPLE *dst = thor_alloc(bs * bs * sizeof(SAMPLE), 32);
int cdef_directions[8][2 + CDEF_FULL];
int cdef_directions_copy[8][2 + CDEF_FULL];
int *sb_index = thor_alloc(nvfb * nhfb * sizeof(*sb_index), 16);
int *selected_strength = thor_alloc(nvfb * nhfb * sizeof(*sb_index), 16);
int *ci_index = thor_alloc(num_fb_hor * num_fb_ver * sizeof(*ci_index), 16);
int *selected_strength = thor_alloc(num_fb_hor * num_fb_ver * sizeof(*ci_index), 16);
stream_t *stream = encoder_info->stream;
const int bitdepth = encoder_info->params->bitdepth;
mse[0] = thor_alloc(sizeof(**mse) * nvfb * nhfb, 32);
mse[1] = thor_alloc(sizeof(**mse) * nvfb * nhfb, 32);
mse[0] = thor_alloc(sizeof(**mse) * num_fb_hor * num_fb_ver, 32);
mse[1] = thor_alloc(sizeof(**mse) * num_fb_hor * num_fb_ver, 32);
cdef_init(stride16, cdef_directions_copy);
for (int k = 0; k < nhfb; k++) {
for (int l = 0; l < nvfb; l++) {
int ci = -1;
for (int k = 0; k < num_fb_ver; k++) {
for (int l = 0; l < num_fb_hor; l++) {
int h, w;
const int xoff = l << fb_size_log2;
const int yoff = k << fb_size_log2;
int allskip = cdef_allskip(xoff, yoff, width, height, deblock_data, fb_size_log2);
ci++;
if (allskip)
continue;
@@ -316,24 +318,25 @@ int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *de
sizey = min((height >> sub) - ypos, bs);
index = ((yoff + m * 8) / MIN_PB_SIZE) * (width/MIN_PB_SIZE) + ((xoff + n * 8) / MIN_PB_SIZE);
if (plane == 0)
deblock_data[index].cdef_dir = (use_simd ? TEMPLATE(cdef_find_dir_simd) : TEMPLATE(cdef_find_dir))(src_buffer + ypos * sstride + xpos, sstride, &deblock_data[index].cdef_var, coeff_shift);
if (plane == 0 && gi == 0)
encoder_info->cdef[ci].dir[m * bs + n] = (use_simd ? TEMPLATE(cdef_find_dir_simd) : TEMPLATE(cdef_find_dir))(src_buffer + ypos * sstride + xpos, sstride, &encoder_info->cdef[ci].var[m * bs + n], coeff_shift);
if (deblock_data[index].mode != MODE_SKIP) {
int adj_str = plane ? pri_strength : adjust_strength(pri_strength, deblock_data[index].cdef_var);
int adj_str = plane ? pri_strength : adjust_strength(pri_strength, encoder_info->cdef[ci].var[m * bs + n]);
int adj_pri_damping = adj_str ? max(log2i(adj_str), pri_damping - !!plane) : pri_damping - !!plane;
int adj_sec_damping = sec_damping - !!plane;
// Apply the filter.
#ifdef HBD
(use_simd ? cdef_filter_block_simd : cdef_filter_block)(NULL, dst, sizex, src16 + offset16 + n * bs + m * bs * stride16, stride16,
adj_str << coeff_shift, sec_strength << coeff_shift,
pri_strength ? deblock_data[index].cdef_dir : 0, adj_pri_damping + coeff_shift, adj_sec_damping + coeff_shift, sizex,
pri_strength ? encoder_info->cdef[ci].dir[m * bs + n] : 0, adj_pri_damping + coeff_shift, adj_sec_damping + coeff_shift, sizex,
cdef_directions_copy, coeff_shift);
#else
(use_simd ? cdef_filter_block_simd : cdef_filter_block)(dst, NULL, sizex, src16 + offset16 + n * bs + m * bs * stride16, stride16,
adj_str << coeff_shift, sec_strength << coeff_shift,
pri_strength ? deblock_data[index].cdef_dir : 0, adj_pri_damping + coeff_shift, adj_sec_damping + coeff_shift, sizex,
pri_strength ? encoder_info->cdef[ci].dir[m * bs + n] : 0, adj_pri_damping + coeff_shift, adj_sec_damping + coeff_shift, sizex,
cdef_directions_copy, coeff_shift);
#endif
@@ -351,7 +354,7 @@ int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *de
}
}
}
sb_index[sb_count++] = ((k << fb_size_log2)/MIN_PB_SIZE)*(rec->width/MIN_PB_SIZE) + ((l << fb_size_log2)/MIN_PB_SIZE);
ci_index[sb_count++] = ci;
}
}
@@ -379,8 +382,8 @@ int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *de
best_tot_mse = tot_mse;
nb_strength_bits = i;
for (j = 0; j < 1 << nb_strength_bits; j++) {
cdef_strengths[j] = best_lev0[j];
cdef_uv_strengths[j] = best_lev1[j];
strengths[j] = best_lev0[j];
uv_strengths[j] = best_lev1[j];
}
}
}
@@ -394,8 +397,8 @@ int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *de
uint64_t best_mse = (uint64_t)1 << 63;
best_gi = 0;
for (gi = 0; gi < (1 << nb_strength_bits); gi++) {
uint64_t curr = mse[0][i][cdef_strengths[gi]];
if (encoder_info->params->subsample != 400) curr += mse[1][i][cdef_uv_strengths[gi]];
uint64_t curr = mse[0][i][strengths[gi]];
if (encoder_info->params->subsample != 400) curr += mse[1][i][uv_strengths[gi]];
if (curr < best_mse) {
best_gi = gi;
best_mse = curr;
@@ -407,21 +410,21 @@ int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *de
}
for (int j = 0; j < nb_strengths; j++) {
cdef_strengths[j] =
priconv[speed][cdef_strengths[j] / CDEF_SEC_STRENGTHS] *
strengths[j] =
priconv[speed][strengths[j] / CDEF_SEC_STRENGTHS] *
CDEF_SEC_STRENGTHS +
(cdef_strengths[j] % CDEF_SEC_STRENGTHS);
cdef_uv_strengths[j] =
priconv[speed][cdef_uv_strengths[j] / CDEF_SEC_STRENGTHS] *
(strengths[j] % CDEF_SEC_STRENGTHS);
uv_strengths[j] =
priconv[speed][uv_strengths[j] / CDEF_SEC_STRENGTHS] *
CDEF_SEC_STRENGTHS +
(cdef_uv_strengths[j] % CDEF_SEC_STRENGTHS);
(uv_strengths[j] % CDEF_SEC_STRENGTHS);
}
for (int i = 0; i < sb_count; i++) {
for (int plane = 0; plane < 2; plane++) {
cdef_strength *cdef = &deblock_data[sb_index[i]].cdef->plane[plane != 0];
cdef->level = (plane ? cdef_uv_strengths[selected_strength[i]] : cdef_strengths[selected_strength[i]]) >> 2;
cdef->sec_strength = (plane ? cdef_uv_strengths[selected_strength[i]] : cdef_strengths[selected_strength[i]]) & 3;
cdef_strength *cdef = &encoder_info->cdef[ci_index[i]].plane[plane != 0];
cdef->level = (plane ? uv_strengths[selected_strength[i]] : strengths[selected_strength[i]]) >> 2;
cdef->sec_strength = (plane ? uv_strengths[selected_strength[i]] : strengths[selected_strength[i]]) & 3;
cdef->pri_damping = cdef->sec_damping = encoder_info->cdef_damping;
}
}
@@ -431,7 +434,7 @@ int TEMPLATE(cdef_search)(yuv_frame_t *rec, yuv_frame_t *org, deblock_data_t *de
thor_free(src16);
thor_free(dst);
thor_free(sb_index);
thor_free(ci_index);
thor_free(selected_strength);
return nb_strength_bits;
@@ -718,9 +721,9 @@ void TEMPLATE(encode_frame)(encoder_info_t *encoder_info)
int cdef_bits = TEMPLATE(cdef_search)(encoder_info->rec, encoder_info->orig, encoder_info->deblock_data, frame_info, encoder_info, encoder_info->cdef_strengths, encoder_info->cdef_uv_strengths, encoder_info->params->cdef - 1);
// Apply the filter using the chosen strengths
TEMPLATE(cdef_frame)(encoder_info->rec, encoder_info->orig, encoder_info->deblock_data, stream, 0, encoder_info->params->bitdepth, 0);
TEMPLATE(cdef_frame)(encoder_info->rec, encoder_info->orig, encoder_info->deblock_data, stream, 0, encoder_info->params->bitdepth, 1);
TEMPLATE(cdef_frame)(encoder_info->rec, encoder_info->orig, encoder_info->deblock_data, stream, 0, encoder_info->params->bitdepth, 2);
TEMPLATE(cdef_frame)(encoder_info->cdef, encoder_info->rec, encoder_info->orig, encoder_info->deblock_data, stream, 0, encoder_info->params->bitdepth, 0);
TEMPLATE(cdef_frame)(encoder_info->cdef, encoder_info->rec, encoder_info->orig, encoder_info->deblock_data, stream, 0, encoder_info->params->bitdepth, 1);
TEMPLATE(cdef_frame)(encoder_info->cdef, encoder_info->rec, encoder_info->orig, encoder_info->deblock_data, stream, 0, encoder_info->params->bitdepth, 2);
// Modify the uncompressed header
stream_pos_t cur_stream_pos;

0 comments on commit 159803f

Please sign in to comment.