Skip to content

Commit

Permalink
libtrellis: Add experimental support for packing compressed bitstreams
Browse files Browse the repository at this point in the history
Signed-off-by: David Shah <dave@ds0.me>
  • Loading branch information
gatecat committed Nov 7, 2019
1 parent 9129ca0 commit dc10b8a
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 22 deletions.
178 changes: 158 additions & 20 deletions libtrellis/src/Bitstream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <iomanip>
#include <fstream>
#include <array>

#include <queue>
namespace Trellis {

static const uint16_t CRC16_POLY = 0x8005;
Expand Down Expand Up @@ -175,6 +175,122 @@ class BitstreamReadWriter {
// if remaining bits > 0 they are just padding bits added to the end so we can ignore them
}

// Decode a onehot byte, -1 if not onehot
int decode_onehot(uint8_t in) {
switch(in) {
case 0b00000001:
return 0;
case 0b00000010:
return 1;
case 0b00000100:
return 2;
case 0b00001000:
return 3;
case 0b00010000:
return 4;
case 0b00100000:
return 5;
case 0b01000000:
return 6;
case 0b10000000:
return 7;
default:
return -1;
}
}

void write_compressed_frames(const std::vector<std::vector<uint8_t>> &frames_in) {
// Build a histogram of bytes to aid creating the dictionary
int histogram[256];
for (int i = 0; i < 256; i++)
histogram[i] = 0;
for (auto &fr : frames_in)
for (auto b : fr)
++histogram[b];
std::priority_queue<std::pair<int, uint8_t>> most_frequent;
for (int i = 0; i < 256; i++)
if (i != 0 && (decode_onehot(i) == -1)) // these values are special and don't go in dict
most_frequent.push(std::make_pair(histogram[i], i));
uint8_t dict_entries[8];
for (int i = 0; i < 8; i++) {
dict_entries[i] = most_frequent.top().second;
most_frequent.pop();
}
// Write dictionary
write_byte(uint8_t(BitstreamCommand::LSC_WRITE_COMP_DIC));
insert_zeros(3);
for (int i = 7; i >= 0; i--)
write_byte(dict_entries[i]);
// Write data
write_byte(uint8_t(BitstreamCommand::LSC_PROG_INCR_CMP));
write_byte(0x91); //CRC check, 1 dummy byte
uint16_t frames = uint16_t(frames_in.size());
write_byte(uint8_t((frames >> 8) & 0xFF));
write_byte(uint8_t(frames & 0xFF));

// For writing a stream of bits
uint8_t buffer = 0;
int bits_in_buffer = 0;
auto flush_bits = [&]() {
if (bits_in_buffer != 0) {
write_byte(buffer);
buffer = 0;
bits_in_buffer = 0;
}
};
auto add_bit = [&](bool bit) {
if (bit)
buffer |= (1 << (7 - bits_in_buffer));
bits_in_buffer++;
if (bits_in_buffer == 8)
flush_bits();
};
auto add_bits = [&](uint32_t x, int len) {
for (int i = len-1; i >= 0; i--)
add_bit((x & (1 << i)) != 0);
};
// Add zero bytes (represented by zero bits in the bitstream)
// to pad frame to 64 bits
for (auto &fr : frames_in) {
int frame_bytes = int(fr.size());
if ((frame_bytes % 8) != 0)
for (int i = 0; i < (8 - (frame_bytes % 8)); i++)
add_bit(0);
// Process bytes of frames
for (auto b : fr) {
if (b == 0) {
add_bit(0); // 0 bit -> 0 byte
continue;
}
int oh = decode_onehot(b);
if (oh != -1) {
add_bits(0b100, 3); // 0b100xxx -> only bit xxx set in byte
add_bits(oh, 3);
continue;
}
// Search dictionary
for (int j = 0; j < 8; j++)
if (dict_entries[j] == b) {
add_bits(0b101, 3); // 0b101xxx -> dictionary entry xxx
add_bits(j, 3);
goto dict_found;
}
if (false) {
dict_found:
continue;
}
// Uncompressable byte; use literal
add_bits(0b11, 2); // 0b11xxxxxxxx -> literal byte
add_bits(b, 8);
}
// This ensures compressed frame is 8-bit aligned
flush_bits();
// Post-frame CRC and 0xFF byte
insert_crc16();
write_byte(0xFF);
}
}

// Write multiple bytes from an InputIterator and update CRC
template<typename T>
void write_bytes(T in, size_t count) {
Expand Down Expand Up @@ -620,27 +736,49 @@ Bitstream Bitstream::serialise_chip(const Chip &chip, const map<string, string>
// Init address
wr.write_byte(uint8_t(BitstreamCommand::LSC_INIT_ADDRESS));
wr.insert_zeros(3);
// Bitstream data
wr.write_byte(uint8_t(BitstreamCommand::LSC_PROG_INCR_RTI));
wr.write_byte(0x91); //CRC check, 1 dummy byte
uint16_t frames = uint16_t(chip.info.num_frames);
wr.write_byte(uint8_t((frames >> 8) & 0xFF));
wr.write_byte(uint8_t(frames & 0xFF));
size_t bytes_per_frame = (chip.info.bits_per_frame + chip.info.pad_bits_after_frame +
chip.info.pad_bits_before_frame) / 8U;
unique_ptr<uint8_t[]> frame_bytes = make_unique<uint8_t[]>(bytes_per_frame);
for (size_t i = 0; i < frames; i++) {
fill(frame_bytes.get(), frame_bytes.get() + bytes_per_frame, 0x00);
for (int j = 0; j < chip.info.bits_per_frame; j++) {
size_t ofs = j + chip.info.pad_bits_after_frame;
assert(((bytes_per_frame - 1) - (ofs / 8)) < bytes_per_frame);
frame_bytes[(bytes_per_frame - 1) - (ofs / 8)] |=
(chip.cram.bit((chip.info.num_frames - 1) - i, j) & 0x01) << (ofs % 8);
if (options.count("compress") && options.at("compress") == "yes") {
// First create an uncompressed array of frames
std::vector<std::vector<uint8_t>> frames_data;
uint16_t frames = uint16_t(chip.info.num_frames);
size_t bytes_per_frame = (chip.info.bits_per_frame + chip.info.pad_bits_after_frame +
chip.info.pad_bits_before_frame) / 8U;
for (size_t i = 0; i < frames; i++) {
frames_data.emplace_back();
auto &frame_bytes = frames_data.back();
frame_bytes.resize(bytes_per_frame);
for (int j = 0; j < chip.info.bits_per_frame; j++) {
size_t ofs = j + chip.info.pad_bits_after_frame;
assert(((bytes_per_frame - 1) - (ofs / 8)) < bytes_per_frame);
frame_bytes[(bytes_per_frame - 1) - (ofs / 8)] |=
(chip.cram.bit((chip.info.num_frames - 1) - i, j) & 0x01) << (ofs % 8);
}
}
// Then compress and write
wr.write_compressed_frames(frames_data);
} else {
// Bitstream data
wr.write_byte(uint8_t(BitstreamCommand::LSC_PROG_INCR_RTI));
wr.write_byte(0x91); //CRC check, 1 dummy byte
uint16_t frames = uint16_t(chip.info.num_frames);
wr.write_byte(uint8_t((frames >> 8) & 0xFF));
wr.write_byte(uint8_t(frames & 0xFF));
size_t bytes_per_frame = (chip.info.bits_per_frame + chip.info.pad_bits_after_frame +
chip.info.pad_bits_before_frame) / 8U;
unique_ptr<uint8_t[]> frame_bytes = make_unique<uint8_t[]>(bytes_per_frame);
for (size_t i = 0; i < frames; i++) {
fill(frame_bytes.get(), frame_bytes.get() + bytes_per_frame, 0x00);
for (int j = 0; j < chip.info.bits_per_frame; j++) {
size_t ofs = j + chip.info.pad_bits_after_frame;
assert(((bytes_per_frame - 1) - (ofs / 8)) < bytes_per_frame);
frame_bytes[(bytes_per_frame - 1) - (ofs / 8)] |=
(chip.cram.bit((chip.info.num_frames - 1) - i, j) & 0x01) << (ofs % 8);
}
wr.write_bytes(frame_bytes.get(), bytes_per_frame);
wr.insert_crc16();
wr.write_byte(0xFF);
}
wr.write_bytes(frame_bytes.get(), bytes_per_frame);
wr.insert_crc16();
wr.write_byte(0xFF);
}

// Post-bitstream space for SECURITY and SED (not used here)
wr.insert_dummy(12);
// Program Usercode
Expand Down
9 changes: 7 additions & 2 deletions libtrellis/tools/ecppack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ int main(int argc, char *argv[])
options.add_options()("freq", po::value<std::string>(), "config frequency in MHz");
options.add_options()("svf", po::value<std::string>(), "output SVF file");
options.add_options()("svf-rowsize", po::value<int>(), "SVF row size in bits (default 8000)");
options.add_options()("compress", "compress bitstream to reduce size");
options.add_options()("spimode", po::value<std::string>(), "SPI Mode to use (fast-read, dual-spi, qspi)");
options.add_options()("background", "enable background reconfiguration in bitstream");
options.add_options()("delta", po::value<std::string>(), "create a delta partial bitstream given a reference config");
Expand Down Expand Up @@ -132,6 +133,9 @@ int main(int argc, char *argv[])
if (vm.count("spimode"))
bitopts["spimode"] = vm["spimode"].as<string>();

if (vm.count("compress"))
bitopts["compress"] = "yes";

if (vm.count("background")) {
auto tile_db = get_tile_bitdata(TileLocator{c.info.family, c.info.name, "EFB0_PICB0"});
auto esb = tile_db->get_data_for_enum("SYSCONFIG.BACKGROUND_RECONFIG");
Expand Down Expand Up @@ -209,15 +213,16 @@ int main(int argc, char *argv[])
}

if (vm.count("svf")) {

// Create JTAG bitstream without SPI flash related settings, as these
// seem to confuse the chip sometimes when configuring over JTAG
if (!bitopts.empty()) {
if (!bitopts.empty() && !(bitopts.size() == 1 && bitopts.count("compress"))) {
bitopts.clear();
if (vm.count("background"))
bitopts["background"] = "yes";
if (vm.count("bootaddr"))
bitopts["multiboot"] = "yes";
if (vm.count("compress"))
bitopts["compress"] = "yes";
b = Bitstream::serialise_chip(c, bitopts);
}

Expand Down

0 comments on commit dc10b8a

Please sign in to comment.