Skip to content

Commit

Permalink
remove chunked output for now (#132)
Browse files Browse the repository at this point in the history
It adds too much complexity. I want to focus on the basics.

Change-Id: I45c0da75d8a3c81eb20a982c8ccba6a1c87b2374
  • Loading branch information
garymm committed Mar 10, 2024
1 parent e9ddce1 commit 4c37384
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 61 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,21 @@ Otherwise, copy the clangd args from the [.vscode/settings.json](.vscode/setting

### TODO

* Get SYCL building with bazel. Already have OpenSYCL building for CPU only [here](https://github.com/garymm/xpu).
Would be nicer to use [intel's LLVM](https://github.com/intel/llvm) which supports lots of GPUs.
* (maybe?) Implement LZ77 with C++ std lib.
#### Basic

* Implement Deflate decompression with C++ std lib.
* Port Deflate to SYCL.
* Benchmark it on CPU.
* Build system work to get it to run on GPU.
* Port Deflate to GPU.
* Benchmark it on GPU.

#### Nice to have

* Support chunked output. Started in
[2e6a83d622e](https://github.com/garymm/starflate/commit/2e6a83d622a0bbe6b65c757199b64511156b516c)
, but removed because it was adding too much complexity and I wanted to focus on getting the
basics working.

## References

* [DEFLATE Compressed Data Format Specification version 1.3](https://tools.ietf.org/html/rfc1951)
Expand Down
24 changes: 12 additions & 12 deletions src/decompress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ auto valid(BlockType type) -> bool
}

auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>
-> std::expected<BlockHeader, DecompressStatus>
{
if (std::ranges::size(compressed_bits) < 3) {
return std::unexpected{DecompressError::InvalidBlockHeader};
return std::unexpected{DecompressStatus::InvalidBlockHeader};
}
auto type = static_cast<BlockType>(
std::uint8_t{static_cast<bool>(compressed_bits[1])} |
(std::uint8_t{static_cast<bool>(compressed_bits[2])} << 1));
if (not valid(type)) {
return std::unexpected{DecompressError::InvalidBlockHeader};
return std::unexpected{DecompressStatus::InvalidBlockHeader};
}
const bool final{static_cast<bool>(compressed_bits[0])};
compressed_bits.consume(3);
Expand All @@ -34,16 +34,16 @@ auto read_header(huffman::bit_span& compressed_bits)
} // namespace detail

auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>
-> DecompressStatus
{
using enum detail::BlockType;

huffman::bit_span src_bits{src};
std::size_t dst_written{};
// std::size_t dst_written{};
for (bool was_final = false; not was_final;) {
const auto header = detail::read_header(src_bits);
if (not header) {
return std::unexpected{header.error()};
return header.error();
}
was_final = header->final;
if (header->type == NoCompression) { // no compression
Expand All @@ -52,32 +52,32 @@ auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
const std::uint16_t len = src_bits.pop_16();
const std::uint16_t nlen = src_bits.pop_16();
if (len != static_cast<std::uint16_t>(~nlen)) {
return std::unexpected{DecompressError::NoCompressionLenMismatch};
return DecompressStatus::NoCompressionLenMismatch;
}
// TODO: should we return an error instead of assert?
assert(
std::cmp_greater_equal(
src_bits.size(), std::size_t{len} * CHAR_BIT) and
"not enough bits in src");

if (std::ranges::size(dst) < len) {
return DecompressResult{src, dst_written, len};
if (dst.size() < len) {
return DecompressStatus::DstTooSmall;
}

std::copy_n(src_bits.byte_data(), len, dst.begin());
src_bits.consume(CHAR_BIT * len);
dst = dst.subspan(len);
dst_written += len;
// dst_written += len;
} else {
// TODO: implement
return std::unexpected{DecompressError::Error};
return DecompressStatus::Error;
}
const auto distance =
std::distance(std::ranges::data(src), src_bits.byte_data());
assert(distance >= 0 and "distance must be positive");
src = src.subspan(static_cast<size_t>(distance));
}
return DecompressResult{src, dst_written, 0};
return DecompressStatus::Success;
}

} // namespace starflate
26 changes: 7 additions & 19 deletions src/decompress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
namespace starflate {

// error code enum
enum class DecompressError : std::uint8_t
enum class DecompressStatus : std::uint8_t
{
Error,
Success,
Error, // TODO: remove
InvalidBlockHeader,
NoCompressionLenMismatch,
DstTooSmall,
};

namespace detail {
Expand All @@ -33,31 +35,17 @@ struct BlockHeader
};

auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>;
-> std::expected<BlockHeader, DecompressStatus>;
} // namespace detail

/// The result of decompress.
///
struct DecompressResult
{
std::span<const std::byte> remaining_src; ///< Remaining source data after
///< decompression.
std::size_t dst_written; ///< Number of bytes written to dst.
std::size_t min_next_dst_size; ///< Minimum number of bytes required in dst
///< for the next decompression. This is only
///< enough space for decompression of a
///< single block
};

/// Decompresses the given source data into the destination buffer.
///
/// @param src The source data to decompress.
/// @param dst The destination buffer to store the decompressed data.
/// @return An expected value containing the decompression result if successful,
/// or an error code if failed.
/// @return A status code indicating the result of the decompression.
///
auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>;
-> DecompressStatus;

template <std::ranges::contiguous_range R>
requires std::same_as<std::ranges::range_value_t<R>, std::byte>
Expand Down
42 changes: 16 additions & 26 deletions src/test/decompress_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ auto main(int, char* argv[]) -> int
test("read_header") = [] -> void {
huffman::bit_span empty{nullptr, 0, 0};
expect(detail::read_header(empty).error() ==
DecompressError::InvalidBlockHeader);
DecompressStatus::InvalidBlockHeader);

constexpr auto bad_block_type = huffman::byte_array(0b111);
huffman::bit_span bad_block_type_span{bad_block_type};
expect(detail::read_header(bad_block_type_span).error() ==
DecompressError::InvalidBlockHeader);
DecompressStatus::InvalidBlockHeader);

constexpr auto fixed = huffman::byte_array(0b010);
huffman::bit_span fixed_span{fixed};
Expand All @@ -84,9 +84,9 @@ auto main(int, char* argv[]) -> int
};

test("decompress invalid header") = [] -> void {
const auto result =
const auto status =
decompress(std::span<const std::byte>{}, std::span<std::byte>{});
expect(result.error() == DecompressError::InvalidBlockHeader);
expect(status == DecompressStatus::InvalidBlockHeader);
};

test("no compression") = [] {
Expand All @@ -108,31 +108,21 @@ auto main(int, char* argv[]) -> int
'b',
'u',
'd');
std::span<const std::byte> src{compressed};
const std::span<const std::byte> src{compressed};

constexpr auto expected_0 = huffman::byte_array('r', 'o', 's', 'e');
constexpr auto expected_1 = huffman::byte_array('b', 'u', 'd');
const std::array<std::span<const std::byte>, 2> expecteds{
expected_0, expected_1};
constexpr auto expected =
huffman::byte_array('r', 'o', 's', 'e', 'b', 'u', 'd');

std::array<std::byte, expected.size()> dst_array{};
const std::span<std::byte> dst_too_small{
dst_array.data(), dst_array.size() - 1};
const auto status_too_small = decompress(src, dst_too_small);
expect(status_too_small == DecompressStatus::DstTooSmall);

std::array<std::byte, 4> dst_array{};
const std::span<std::byte> dst{dst_array};
for (std::size_t i = 0; i < expecteds.size(); ++i) {
const auto result = decompress(src, dst);
expect(result.has_value())
<< "got error code: " << static_cast<std::int32_t>(result.error());
if (i == 0) {
expect(not result->remaining_src.empty());
expect(result->min_next_dst_size == expecteds.at(1).size());
} else {
expect(result->remaining_src.empty());
expect(result->min_next_dst_size == 0);
}
const auto expected = expecteds.at(i);
expect(result->dst_written == expected.size());
expect(std::ranges::equal(dst.subspan(0, expected.size()), expected));
src = result->remaining_src;
}
const auto status = decompress(src, dst);
expect(status == DecompressStatus::Success);
expect(std::ranges::equal(dst, expected));
};

test("fixed huffman") = [argv] {
Expand Down

0 comments on commit 4c37384

Please sign in to comment.