Skip to content

Commit

Permalink
Finally fix the horrible backsub bug
Browse files Browse the repository at this point in the history
  • Loading branch information
catid committed Mar 16, 2012
1 parent 03920d0 commit fbd8fb7
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 79 deletions.
6 changes: 4 additions & 2 deletions Tester.cpp
Expand Up @@ -176,8 +176,10 @@ int main()
++drop_seed;
}

double avg_time = time_sum/trials;
cout << "N=" << decoder.BlockCount() << " decoder.Decode in " << avg_time << " usec, " << message_bytes / avg_time << " MB/s. Average overhead = " << overhead_sum / (double)overhead_trials << endl;
double avg_time = time_sum / trials;
double avg_overhead = overhead_sum / (double)overhead_trials;
double avg_bytes = message_bytes * (decoder.BlockCount() + avg_overhead) / (double)decoder.BlockCount() - message_bytes;
cout << "N=" << decoder.BlockCount() << " decoder.Decode in " << avg_time << " usec, " << message_bytes / avg_time << " MB/s. Average overhead = " << avg_overhead << " (" << avg_bytes << " bytes)" << endl;
}

m_clock.OnFinalize();
Expand Down
153 changes: 77 additions & 76 deletions codec_source/Wirehair.cpp
Expand Up @@ -3070,6 +3070,7 @@ void Codec::BackSubstituteAboveDiagonal()
const int pivot_count = _defer_count + _mix_count;
int pivot_i = pivot_count - 1;
const u16 first_heavy_row = _defer_count + _dense_count;
const u16 first_heavy_column = _first_heavy_column;

#if defined(CAT_WINDOWED_BACKSUB)
// Build temporary storage space if windowing is to be used
Expand Down Expand Up @@ -3144,11 +3145,11 @@ void Codec::BackSubstituteAboveDiagonal()

// If diagonal element is heavy,
u16 ge_row_i = _pivots[src_pivot_i];
if (ge_row_i >= first_heavy_row && src_pivot_i >= _first_heavy_column)
if (ge_row_i >= first_heavy_row && src_pivot_i >= first_heavy_column)
{
// Look up row value
u16 heavy_row_i = ge_row_i - first_heavy_row;
u16 heavy_col_i = src_pivot_i - _first_heavy_column;
u16 heavy_col_i = src_pivot_i - first_heavy_column;
u8 code_value = _heavy_matrix[_heavy_pitch * heavy_row_i + heavy_col_i];

// Normalize code value, setting it to 1 (implicitly nonzero)
Expand All @@ -3163,35 +3164,35 @@ void Codec::BackSubstituteAboveDiagonal()

CAT_IF_DUMP(cout << "Back-substituting small triangle from pivot " << src_pivot_i << "[" << (int)src[0] << "] :";)

// For each row in the upper triangle,
// For each row above the diagonal,
u64 *ge_row = _ge_matrix + (src_pivot_i >> 6);
for (int dest_pivot_i = backsub_i; dest_pivot_i < src_pivot_i; ++dest_pivot_i)
{
// If row is heavy,
u16 dest_row_i = _pivots[dest_pivot_i];
if (dest_row_i >= first_heavy_row && dest_pivot_i >= _first_heavy_column)
if (dest_row_i >= first_heavy_row && src_pivot_i >= first_heavy_column)
{
// If column is zero,
u16 heavy_row_i = dest_row_i - first_heavy_row;
u16 heavy_col_i = dest_pivot_i - _first_heavy_column;
u16 heavy_col_i = src_pivot_i - first_heavy_column;
u8 code_value = _heavy_matrix[_heavy_pitch * heavy_row_i + heavy_col_i];
if (!code_value) continue; // Skip it

// Back-substitute
u8 *dest = _recovery_blocks + _block_bytes * _ge_col_map[dest_pivot_i];
if (code_value == 1)
if (code_value != 1)
{
memxor(dest, src, _block_bytes);
CAT_IF_ROWOP(++rowops;)
GF256MemMulAdd(dest, code_value, src, _block_bytes);
CAT_IF_ROWOP(++heavyops;)

CAT_IF_DUMP(cout << " *" << dest_pivot_i;)
CAT_IF_DUMP(cout << " h" << dest_pivot_i;)
}
else
{
GF256MemMulAdd(dest, code_value, src, _block_bytes);
CAT_IF_ROWOP(++heavyops;)
memxor(dest, src, _block_bytes);
CAT_IF_ROWOP(++rowops;)

CAT_IF_DUMP(cout << " h" << dest_pivot_i;)
CAT_IF_DUMP(cout << " *" << dest_pivot_i;)
}
}
else
Expand All @@ -3212,13 +3213,13 @@ void Codec::BackSubstituteAboveDiagonal()
CAT_IF_DUMP(cout << endl;)
} // next pivot

// If pivot row is heavy,
// Normalize the final diagonal element
u16 ge_row_i = _pivots[backsub_i];
if (ge_row_i >= first_heavy_row && backsub_i >= _first_heavy_column)
if (ge_row_i >= first_heavy_row && backsub_i >= first_heavy_column)
{
// Look up row value
u16 heavy_row_i = ge_row_i - first_heavy_row;
u16 heavy_col_i = backsub_i - _first_heavy_column;
u16 heavy_col_i = backsub_i - first_heavy_column;
u8 code_value = _heavy_matrix[_heavy_pitch * heavy_row_i + heavy_col_i];

// Divide by this code value (implicitly nonzero)
Expand All @@ -3230,7 +3231,51 @@ void Codec::BackSubstituteAboveDiagonal()
}
}

const u16 first_heavy_column = _first_heavy_column;
CAT_IF_DUMP(cout << "-- Generating window table with " << w << " bits" << endl;)

// Generate window table: 2 bits
win_table[1] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i];
win_table[2] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 1];
memxor_set(win_table[3], win_table[1], win_table[2], _block_bytes);
CAT_IF_ROWOP(++rowops;)

// Generate window table: 3 bits
win_table[4] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 2];
memxor_set(win_table[5], win_table[1], win_table[4], _block_bytes);
memxor_set(win_table[6], win_table[2], win_table[4], _block_bytes);
memxor_set(win_table[7], win_table[1], win_table[6], _block_bytes);
CAT_IF_ROWOP(rowops += 3;)

// Generate window table: 4 bits
win_table[8] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 3];
for (int ii = 1; ii < 8; ++ii)
memxor_set(win_table[8 + ii], win_table[ii], win_table[8], _block_bytes);
CAT_IF_ROWOP(rowops += 7;)

// Generate window table: 5+ bits
if (w >= 5)
{
win_table[16] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 4];
for (int ii = 1; ii < 16; ++ii)
memxor_set(win_table[16 + ii], win_table[ii], win_table[16], _block_bytes);
CAT_IF_ROWOP(rowops += 15;)

if (w >= 6)
{
win_table[32] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 5];
for (int ii = 1; ii < 32; ++ii)
memxor_set(win_table[32 + ii], win_table[ii], win_table[32], _block_bytes);
CAT_IF_ROWOP(rowops += 31;)

if (w >= 7)
{
win_table[64] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 6];
for (int ii = 1; ii < 64; ++ii)
memxor_set(win_table[64 + ii], win_table[ii], win_table[64], _block_bytes);
CAT_IF_ROWOP(rowops += 63;)
}
}
}

// If a row above the window may be heavy,
if (pivot_i >= first_heavy_column)
Expand Down Expand Up @@ -3266,8 +3311,8 @@ void Codec::BackSubstituteAboveDiagonal()
}

// For each heavy column,
u16 heavy_col_j = ge_column_j - first_heavy_column;
u16 heavy_row_i = ge_row_i - first_heavy_row;
u16 heavy_col_j = ge_column_j - first_heavy_column;
u8 *heavy_row = &_heavy_matrix[_heavy_pitch * heavy_row_i + heavy_col_j];
for (; ge_column_j <= pivot_i; ++ge_column_j)
{
Expand All @@ -3291,52 +3336,6 @@ void Codec::BackSubstituteAboveDiagonal()
} // next pivot in window
} // end if contains heavy

CAT_IF_DUMP(cout << "-- Generating window table with " << w << " bits" << endl;)

// Generate window table: 2 bits
win_table[1] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i];
win_table[2] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 1];
memxor_set(win_table[3], win_table[1], win_table[2], _block_bytes);
CAT_IF_ROWOP(++rowops;)

// Generate window table: 3 bits
win_table[4] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 2];
memxor_set(win_table[5], win_table[1], win_table[4], _block_bytes);
memxor_set(win_table[6], win_table[2], win_table[4], _block_bytes);
memxor_set(win_table[7], win_table[1], win_table[6], _block_bytes);
CAT_IF_ROWOP(rowops += 3;)

// Generate window table: 4 bits
win_table[8] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 3];
for (int ii = 1; ii < 8; ++ii)
memxor_set(win_table[8 + ii], win_table[ii], win_table[8], _block_bytes);
CAT_IF_ROWOP(rowops += 7;)

// Generate window table: 5+ bits
if (w >= 5)
{
win_table[16] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 4];
for (int ii = 1; ii < 16; ++ii)
memxor_set(win_table[16 + ii], win_table[ii], win_table[16], _block_bytes);
CAT_IF_ROWOP(rowops += 15;)

if (w >= 6)
{
win_table[32] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 5];
for (int ii = 1; ii < 32; ++ii)
memxor_set(win_table[32 + ii], win_table[ii], win_table[32], _block_bytes);
CAT_IF_ROWOP(rowops += 31;)

if (w >= 7)
{
win_table[64] = _recovery_blocks + _block_bytes * _ge_col_map[backsub_i + 6];
for (int ii = 1; ii < 64; ++ii)
memxor_set(win_table[64 + ii], win_table[ii], win_table[64], _block_bytes);
CAT_IF_ROWOP(rowops += 63;)
}
}
}

// Only add window table entries for rows under this limit
u16 window_row_limit = (pivot_i >= first_heavy_column) ? first_heavy_row : 0x7fff;

Expand Down Expand Up @@ -3365,7 +3364,8 @@ void Codec::BackSubstituteAboveDiagonal()
CAT_IF_DUMP(cout << "Adding window table " << win_bits << " to pivot " << above_pivot_i << endl;)

// Back-substitute
memxor(_recovery_blocks + _block_bytes * _ge_col_map[above_pivot_i], win_table[win_bits], _block_bytes);
u8 *dest = _recovery_blocks + _block_bytes * _ge_col_map[above_pivot_i];
memxor(dest, win_table[win_bits], _block_bytes);
CAT_IF_ROWOP(++rowops;)
}
}
Expand All @@ -3392,7 +3392,8 @@ void Codec::BackSubstituteAboveDiagonal()
CAT_IF_DUMP(cout << "Adding window table " << win_bits << " to pivot " << above_pivot_i << endl;)

// Back-substitute
memxor(_recovery_blocks + _block_bytes * _ge_col_map[above_pivot_i], win_table[win_bits], _block_bytes);
u8 *dest = _recovery_blocks + _block_bytes * _ge_col_map[above_pivot_i];
memxor(dest, win_table[win_bits], _block_bytes);
CAT_IF_ROWOP(++rowops;)
}
}
Expand Down Expand Up @@ -3435,11 +3436,11 @@ void Codec::BackSubstituteAboveDiagonal()

// If diagonal element is heavy,
u16 ge_row_i = _pivots[pivot_i];
if (ge_row_i >= first_heavy_row && pivot_i >= _first_heavy_column)
if (ge_row_i >= first_heavy_row && pivot_i >= first_heavy_column)
{
// Look up row value
u16 heavy_row_i = ge_row_i - first_heavy_row;
u16 heavy_col_i = pivot_i - _first_heavy_column;
u16 heavy_col_i = pivot_i - first_heavy_column;
u8 code_value = _heavy_matrix[_heavy_pitch * heavy_row_i + heavy_col_i];

// Normalize code value, setting it to 1 (implicitly nonzero)
Expand All @@ -3460,29 +3461,29 @@ void Codec::BackSubstituteAboveDiagonal()
{
// If element is heavy,
u16 up_row_i = _pivots[ge_up_i];
if (up_row_i >= first_heavy_row && ge_up_i >= _first_heavy_column)
if (up_row_i >= first_heavy_row && ge_up_i >= first_heavy_column)
{
// If column is zero,
u16 heavy_row_i = up_row_i - first_heavy_row;
u16 heavy_col_i = pivot_i - _first_heavy_column;
u16 heavy_col_i = pivot_i - first_heavy_column;
u8 code_value = _heavy_matrix[_heavy_pitch * heavy_row_i + heavy_col_i];
if (!code_value) continue; // Skip it

// Back-substitute
u8 *dest = _recovery_blocks + _block_bytes * _ge_col_map[ge_up_i];
if (code_value == 1)
if (code_value != 1)
{
memxor(dest, src, _block_bytes);
CAT_IF_ROWOP(++rowops;)
GF256MemMulAdd(dest, code_value, src, _block_bytes);
CAT_IF_ROWOP(++heavyops;)

CAT_IF_DUMP(cout << " *" << up_row_i;)
CAT_IF_DUMP(cout << " h" << up_row_i;)
}
else
{
GF256MemMulAdd(dest, code_value, src, _block_bytes);
CAT_IF_ROWOP(++heavyops;)
memxor(dest, src, _block_bytes);
CAT_IF_ROWOP(++rowops;)

CAT_IF_DUMP(cout << " h" << up_row_i;)
CAT_IF_DUMP(cout << " *" << up_row_i;)
}
}
else
Expand Down
2 changes: 1 addition & 1 deletion codec_source/WirehairDetails.hpp
Expand Up @@ -47,7 +47,7 @@
// Optimization options:
#define CAT_COPY_FIRST_N /* Copy the first N rows from the input (faster) */
#define CAT_HEAVY_WIN_MULT /* Use 4-bit table and multiplication optimization (faster) */
//#define CAT_WINDOWED_BACKSUB /* Use window optimization for back-substitution (faster) */
#define CAT_WINDOWED_BACKSUB /* Use window optimization for back-substitution (faster) */

// Heavy rows:
#define CAT_HEAVY_ROWS 6 /* Number of heavy rows to add - Tune for desired overhead / performance trade-off */
Expand Down

0 comments on commit fbd8fb7

Please sign in to comment.