Skip to content

Commit

Permalink
Remove dead code
Browse files Browse the repository at this point in the history
Several giant blocks of code are never used. If they are still needed, we should use conditional compilation, and add it to the CI to avoid bitrot.
  • Loading branch information
nyurik authored and danielrh committed Apr 25, 2024
1 parent d1f356c commit f53535a
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 286 deletions.
166 changes: 16 additions & 150 deletions src/enc/bit_cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use core::cmp::{max, min};
#[cfg(feature = "simd")]
use core::simd::prelude::SimdPartialOrd;

use super::util::{floatX, FastLog2, FastLog2u16};
use super::vectorization::{cast_f32_to_i32, cast_i32_to_f32, log2i, sum8, v256, v256i, Mem256i};
use super::util::{FastLog2, FastLog2u16};
use super::vectorization::Mem256i;

static kCopyBase: [u32; 24] = [
2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118,
Expand Down Expand Up @@ -79,167 +79,33 @@ fn CostComputation<T: SliceWrapper<Mem256i>>(
depth_histo: &mut [u32; BROTLI_CODE_LENGTH_CODES],
nnz_data: &T,
nnz: usize,
total_count: super::util::floatX,
_total_count: super::util::floatX,
log2total: super::util::floatX,
) -> super::util::floatX {
let mut bits: super::util::floatX = 0.0 as super::util::floatX;
if true {
let mut max_depth: usize = 1;
for i in 0..nnz {
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
// = log2(total_count) - log2(count(symbol))
let element = nnz_data.slice()[i >> 3][i & 7];
let log2p = log2total - FastLog2u16(element as u16);
// Approximate the bit depth by round(-log2(P(symbol)))
let depth = min((log2p + 0.5) as u8, 15u8);
bits += element as super::util::floatX * log2p;
if (depth as usize > max_depth) {
max_depth = depth as usize;
}
depth_histo[depth as usize] += 1;
}

// Add the estimated encoding cost of the code length code histogram.
bits += (18 + 2 * max_depth) as super::util::floatX;
// Add the entropy of the code length code histogram.
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
//println_stderr!("{:?} {:?}", &depth_histo[..], bits);
return bits;
}
let rem = nnz & 7;
let nnz_srl_3 = nnz >> 3;
if true {
let mut vec_max_depth: [i32; 8] = [1; 8];
let mut depth_histo_vec = [[0i32; BROTLI_CODE_LENGTH_CODES]; 8];
for nnz_data_vec in nnz_data.slice().split_at(nnz_srl_3).0.iter() {
for i in 0..8 {
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
// = log2(total_count) - log2(count(symbol))
let ele = nnz_data_vec[i];
let log2p = log2total - FastLog2u16(ele as u16);
// Approximate the bit depth by round(-log2(P(symbol)))
let depth = min((log2p + 0.5) as i32, 15) as i32;
bits += ele as super::util::floatX * log2p;
vec_max_depth[i] = max(vec_max_depth[i], depth);
depth_histo_vec[i][depth as usize] += 1;
}
}
let mut max_depth = vec_max_depth[7];
for i in 0..8 {
for j in 0..BROTLI_CODE_LENGTH_CODES {
depth_histo[j] += depth_histo_vec[i][j] as u32;
}
max_depth = max(vec_max_depth[i], max_depth);
}
if rem != 0 {
let last_vec = nnz_data.slice()[nnz_srl_3];
for i in 0..rem {
// remainder won't have last element for sure
let element = last_vec[i];
let log2p = log2total - FastLog2u16(element as u16);
// Approximate the bit depth by round(-log2(P(symbol)))
let depth = min((log2p + 0.5) as i32, 15);
bits += element as super::util::floatX * log2p;
max_depth = max(depth, max_depth);
depth_histo[depth as usize] += 1;
}
}
// Add the estimated encoding cost of the code length code histogram.
bits += (18 + 2 * max_depth) as super::util::floatX;
// Add the entropy of the code length code histogram.
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
//println_stderr!("{:?} {:?}", &depth_histo[..], bits);
return bits;
}
let pow2l: v256 = [
1.0/*0.7071067811865476*/ as floatX,
0.3535533905932738 as floatX,
0.1767766952966369 as floatX,
0.0883883476483184 as floatX,
0.0441941738241592 as floatX,
0.0220970869120796 as floatX,
0.0110485434560398 as floatX,
0.0055242717280199 as floatX,
]
.into();
let pow2h: v256 = [
//FIXME: setr
0.0027621358640100 as floatX,
0.0013810679320050 as floatX,
0.0006905339660025 as floatX,
0.0003452669830012 as floatX,
0.0001726334915006 as floatX,
0.0000863167457503 as floatX,
0.0000431583728752 as floatX,
/*0.0000215791864376f*/ 0.0 as floatX,
]
.into();
let ymm_tc = v256::splat(total_count as floatX);
let search_depthl = cast_f32_to_i32(pow2l * ymm_tc);
let search_depthh = cast_f32_to_i32(pow2h * ymm_tc);
let mut suml = v256i::splat(0);
let mut sumh = v256i::splat(0);
for nnz_data_vec in nnz_data.slice().split_at(nnz_srl_3).0.iter() {
for sub_data_item_index in 0..8 {
let count = v256i::splat(nnz_data_vec[sub_data_item_index]);
let cmpl: v256i = count.simd_gt(search_depthl).to_int();
let cmph: v256i = count.simd_gt(search_depthh).to_int();
suml = suml + cmpl;
sumh = sumh + cmph;
}
}
if rem != 0 {
let last_element = nnz_data.slice()[nnz >> 3];
for sub_index in 0..rem {
let count = v256i::splat(last_element[sub_index & 7]);
let cmpl: v256i = count.simd_gt(search_depthl).to_int();
let cmph: v256i = count.simd_gt(search_depthh).to_int();
suml = suml + cmpl;
sumh = sumh + cmph;
}
}
let mut max_depth: usize = 1;
// Deal with depth_histo and max_depth
{
let cumulative_sum: [Mem256i; 2] = [suml, sumh];
let mut prev = cumulative_sum[0][0];
for j in 1..16 {
let cur = cumulative_sum[(j & 8) >> 3][j & 7];
let delta = cur - prev;
prev = cur;
let cur = &mut depth_histo[j];
*cur = (*cur as i32 + delta) as u32; // depth_histo[j] += delta
if delta != 0 {
max_depth = j;
}
}
}
let ymm_log2total = v256::splat(log2total);
let mut bits_cumulative = v256::splat(0.0 as floatX);
for nnz_data_item in nnz_data.slice().split_at(nnz_srl_3).0.iter() {
let counts = cast_i32_to_f32(*nnz_data_item);
let log_counts = log2i(*nnz_data_item);
let log2p = ymm_log2total - log_counts;
let tmp = counts * log2p;
bits_cumulative += tmp;
}
bits += sum8(bits_cumulative);
if rem != 0 {
let last_vec = nnz_data.slice()[nnz_srl_3];
for i in 0..rem {
let last_item = last_vec[i];
let log2p = log2total - FastLog2u16(last_item as u16);
bits += last_item as super::util::floatX * log2p;
for i in 0..nnz {
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
// = log2(total_count) - log2(count(symbol))
let element = nnz_data.slice()[i >> 3][i & 7];
let log2p = log2total - FastLog2u16(element as u16);
// Approximate the bit depth by round(-log2(P(symbol)))
let depth = min((log2p + 0.5) as u8, 15u8);
bits += element as super::util::floatX * log2p;
if (depth as usize > max_depth) {
max_depth = depth as usize;
}
depth_histo[depth as usize] += 1;
}

// Add the estimated encoding cost of the code length code histogram.
bits += (18 + 2 * max_depth) as super::util::floatX;
// Add the entropy of the code length code histogram.
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
//println_stderr!("{:?} {:?}", depth_histo, bits);
//println_stderr!("{:?} {:?}", &depth_histo[..], bits);
bits
}

use alloc::SliceWrapperMut;

pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
Expand Down
120 changes: 35 additions & 85 deletions src/enc/block_splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,40 +53,6 @@ fn update_cost_and_signal(
cost: &mut [Mem256f],
switch_signal: &mut [u8],
) {
if (false) {
// scalar mode
for k in 0..((num_histograms32 as usize + 7) >> 3 << 3) {
cost[k >> 3][k & 7] -= min_cost;
if (cost[k >> 3][k & 7] >= block_switch_cost) {
let mask = (1_u8 << (k & 7));
cost[k >> 3][k & 7] = block_switch_cost;
switch_signal[ix + (k >> 3)] |= mask;
}
}
return;
}
if (false) {
// scalar mode

for k in 0..((num_histograms32 as usize + 7) >> 3 << 3) {
cost[k >> 3][k & 7] -= min_cost;
let cmpge = if (cost[k >> 3][k & 7] >= block_switch_cost) {
0xff
} else {
0
};
let mask = (1_u8 << (k & 7));
let bits = cmpge & mask;
if block_switch_cost < cost[k >> 3][k & 7] {
cost[k >> 3][k & 7] = block_switch_cost;
}
switch_signal[ix + (k >> 3)] |= bits;
//if (((k + 1)>> 3) != (k >>3)) {
// println_stderr!("{:} ss {:} c {:?}", k, switch_signal[ix + (k >> 3)],cost[k>>3]);
//}
}
return;
}
let ymm_min_cost = v256::splat(min_cost);
let ymm_block_switch_cost = v256::splat(block_switch_cost);
let ymm_and_mask = v256i::from([
Expand Down Expand Up @@ -311,60 +277,44 @@ where
u64::from(data_byte_ix.clone()).wrapping_mul(num_histograms as u64) as usize;
let mut min_cost: super::util::floatX = 1e38 as super::util::floatX;
let mut block_switch_cost: super::util::floatX = block_switch_bitcost;
if false {
// nonvectorized version: same code below
for (k, insert_cost_iter) in insert_cost
[insert_cost_ix..(insert_cost_ix + num_histograms)]
.iter()
.enumerate()
{
let cost_iter = &mut cost[(k >> 3)][k & 7];
*cost_iter += *insert_cost_iter;
if *cost_iter < min_cost {
min_cost = *cost_iter;
*block_id_ptr = k as u8;
}
}
} else {
// main (vectorized) loop
let insert_cost_slice = insert_cost.split_at(insert_cost_ix).1;
for (v_index, cost_iter) in cost
.split_at_mut(num_histograms >> 3)
.0
.iter_mut()
.enumerate()
{
let base_index = v_index << 3;
let mut local_insert_cost = [0.0 as super::util::floatX; 8];
local_insert_cost
.clone_from_slice(insert_cost_slice.split_at(base_index).1.split_at(8).0);
for sub_index in 0usize..8usize {
cost_iter[sub_index] += local_insert_cost[sub_index];
let final_cost = cost_iter[sub_index];
if final_cost < min_cost {
min_cost = final_cost;
*block_id_ptr = (base_index + sub_index) as u8;
}
// main (vectorized) loop
let insert_cost_slice = insert_cost.split_at(insert_cost_ix).1;
for (v_index, cost_iter) in cost
.split_at_mut(num_histograms >> 3)
.0
.iter_mut()
.enumerate()
{
let base_index = v_index << 3;
let mut local_insert_cost = [0.0 as super::util::floatX; 8];
local_insert_cost
.clone_from_slice(insert_cost_slice.split_at(base_index).1.split_at(8).0);
for sub_index in 0usize..8usize {
cost_iter[sub_index] += local_insert_cost[sub_index];
let final_cost = cost_iter[sub_index];
if final_cost < min_cost {
min_cost = final_cost;
*block_id_ptr = (base_index + sub_index) as u8;
}
}
let vectorized_offset = ((num_histograms >> 3) << 3);
let mut k = vectorized_offset;
//remainder loop for
for insert_cost_iter in insert_cost
.split_at(insert_cost_ix + vectorized_offset)
.1
.split_at(num_histograms & 7)
.0
.iter()
{
let cost_iter = &mut cost[(k >> 3)];
cost_iter[k & 7] += *insert_cost_iter;
if cost_iter[k & 7] < min_cost {
min_cost = cost_iter[k & 7];
*block_id_ptr = k as u8;
}
k += 1;
}
let vectorized_offset = ((num_histograms >> 3) << 3);
let mut k = vectorized_offset;
//remainder loop for
for insert_cost_iter in insert_cost
.split_at(insert_cost_ix + vectorized_offset)
.1
.split_at(num_histograms & 7)
.0
.iter()
{
let cost_iter = &mut cost[(k >> 3)];
cost_iter[k & 7] += *insert_cost_iter;
if cost_iter[k & 7] < min_cost {
min_cost = cost_iter[k & 7];
*block_id_ptr = k as u8;
}
k += 1;
}
if byte_ix < 2000usize {
block_switch_cost *= (0.77 as super::util::floatX
Expand Down

0 comments on commit f53535a

Please sign in to comment.