Skip to content

Commit

Permalink
feat(singlejar): Add Log4j2Plugins combiner
Browse files Browse the repository at this point in the history
  • Loading branch information
stevebarrau committed May 29, 2024
1 parent 8a6aa56 commit 3ac4920
Show file tree
Hide file tree
Showing 9 changed files with 308 additions and 35 deletions.
3 changes: 3 additions & 0 deletions src/tools/singlejar/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ cc_test(
":zlib_interface",
],
data = [
"data/log4j2_plugins_set_1.jar",
"data/log4j2_plugins_set_2.jar",
"data/log4j2_plugins_set_result.dat",
"data/multi_release.jar",
],
# Requires at least 5 GiB of memory
Expand Down
220 changes: 185 additions & 35 deletions src/tools/singlejar/combiners.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,53 +14,38 @@

#include "src/tools/singlejar/combiners.h"

#include <algorithm>
#include <arpa/inet.h>
#include <cctype>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#include "src/tools/singlejar/diag.h"

Combiner::~Combiner() {}

Concatenator::~Concatenator() {}

bool Concatenator::Merge(const CDH *cdh, const LH *lh) {
if (insert_newlines_ && buffer_.get() && buffer_->data_size() &&
'\n' != buffer_->last_byte()) {
Append("\n", 1);
}
CreateBuffer();
if (Z_NO_COMPRESSION == lh->compression_method()) {
buffer_->ReadEntryContents(lh);
} else if (Z_DEFLATED == lh->compression_method()) {
if (!inflater_) {
inflater_.reset(new Inflater());
}
buffer_->DecompressEntryContents(cdh, lh, inflater_.get());
} else {
diag_errx(2, "%s is neither stored nor deflated", filename_.c_str());
}
return true;
}

void *Concatenator::OutputEntry(bool compress) {
if (!buffer_) {
return nullptr;
}

void *outputEntryFromBuffer(const std::string filename,
std::unique_ptr<TransientBytes> &buffer,
bool compress) {
// Allocate a contiguous buffer for the local file header and
// deflated data. We assume that deflate decreases the size, so if
// the deflater reports overflow, we just save original data.
// the deflater reports overflow, we just save original data.
size_t deflated_buffer_size =
sizeof(LH) + filename_.size() + buffer_->data_size();
sizeof(LH) + filename.size() + buffer->data_size();

// Huge entry (>4GB) needs Zip64 extension field with 64-bit original
// and compressed size values.
uint8_t
zip64_extension_buffer[sizeof(Zip64ExtraField) + 2 * sizeof(uint64_t)];
bool huge_buffer = ziph::zfield_needs_ext64(buffer_->data_size());
bool huge_buffer = ziph::zfield_needs_ext64(buffer->data_size());
if (huge_buffer) {
deflated_buffer_size += sizeof(zip64_extension_buffer);
}
Expand All @@ -75,7 +60,7 @@ void *Concatenator::OutputEntry(bool compress) {
lh->last_mod_file_date(30 << 9 | 1 << 5 | 1); // 2010-01-01
lh->crc32(0x12345678);
lh->compressed_file_size32(0);
lh->file_name(filename_.c_str(), filename_.size());
lh->file_name(filename.c_str(), filename.size());

if (huge_buffer) {
// Add Z64 extension if this is a huge entry.
Expand All @@ -84,22 +69,22 @@ void *Concatenator::OutputEntry(bool compress) {
reinterpret_cast<Zip64ExtraField *>(zip64_extension_buffer);
z64->signature();
z64->payload_size(2 * sizeof(uint64_t));
z64->attr64(0, buffer_->data_size());
z64->attr64(0, buffer->data_size());
lh->extra_fields(reinterpret_cast<uint8_t *>(z64), z64->size());
} else {
lh->uncompressed_file_size32(buffer_->data_size());
lh->uncompressed_file_size32(buffer->data_size());
lh->extra_fields(nullptr, 0);
}

uint32_t checksum;
uint64_t compressed_size;
uint16_t method;
if (compress) {
method = buffer_->CompressOut(lh->data(), &checksum, &compressed_size);
method = buffer->CompressOut(lh->data(), &checksum, &compressed_size);
} else {
buffer_->CopyOut(lh->data(), &checksum);
buffer->CopyOut(lh->data(), &checksum);
method = Z_NO_COMPRESSION;
compressed_size = buffer_->data_size();
compressed_size = buffer->data_size();
}
lh->crc32(checksum);
lh->compression_method(method);
Expand All @@ -118,6 +103,35 @@ void *Concatenator::OutputEntry(bool compress) {
return reinterpret_cast<void *>(lh);
}

Concatenator::~Concatenator() {}

bool Concatenator::Merge(const CDH *cdh, const LH *lh) {
if (insert_newlines_ && buffer_.get() && buffer_->data_size() &&
'\n' != buffer_->last_byte()) {
Append("\n", 1);
}
CreateBuffer();
if (Z_NO_COMPRESSION == lh->compression_method()) {
buffer_->ReadEntryContents(lh);
} else if (Z_DEFLATED == lh->compression_method()) {
if (!inflater_) {
inflater_.reset(new Inflater());
}
buffer_->DecompressEntryContents(cdh, lh, inflater_.get());
} else {
diag_errx(2, "%s is neither stored nor deflated", filename_.c_str());
}
return true;
}

void *Concatenator::OutputEntry(bool compress) {
if (!buffer_) {
return nullptr;
}

return outputEntryFromBuffer(filename_, buffer_, compress);
}

NullCombiner::~NullCombiner() {}

bool NullCombiner::Merge(const CDH * /*cdh*/, const LH * /*lh*/) {
Expand Down Expand Up @@ -284,3 +298,139 @@ void *ManifestCombiner::OutputEntry(bool compress) {
concatenator_->Append("\r\n");
return concatenator_->OutputEntry(compress);
}

bool readBool(std::istringstream &stream) {
bool value;
stream.read(reinterpret_cast<char *>(&value), sizeof(value));
return value;
}

uint32_t readInt(std::istringstream &stream) {
uint32_t values;
stream.read(reinterpret_cast<char *>(&values), sizeof(values));
return ntohl(values);
}

std::string readUTFString(std::istringstream &stream) {
uint16_t length;
stream.read(reinterpret_cast<char *>(&length), sizeof(length));
length = ntohs(length); // Convert to host byte order
std::string result(length, '\0');
stream.read(&result[0], length);
return result;
}

void writeBoolean(TransientBytes &buffer, bool value) {
uint8_t byte = value ? 1 : 0;
buffer.Append(&byte, sizeof(byte));
}

void writeInt(TransientBytes &buffer, int value) {
uint8_t data[sizeof(value)];
std::memcpy(data, &value, sizeof(value));
buffer.Append(data, sizeof(value));
}

void writeUTFString(TransientBytes &buffer, const std::string &str) {
uint16_t length = htons(static_cast<uint16_t>(str.size()));
buffer.Append(reinterpret_cast<const uint8_t *>(&length), sizeof(length));
buffer.Append(reinterpret_cast<const uint8_t *>(str.data()), str.size());
}

// Write Log4j2 plugin cache file.
//
// Modeled after the Java canonical implementation here:
// https://github.com/apache/logging-log4j2/blob/8573ef778d2fad2bbec50a687955dccd2a616cc5/log4j-core/src/main/java/org/apache/logging/log4j/core/config/plugins/processor/PluginCache.java#L66-L85
std::unique_ptr<TransientBytes> writeLog4j2PluginCacheFile(std::map<std::string, std::map<std::string, PluginEntry>> categories) {
std::unique_ptr<TransientBytes> buffer;
buffer.reset(new TransientBytes());
writeInt(*buffer, htonl(static_cast<int>(categories.size())));
for (const auto &categoryPair : categories) {
writeUTFString(*buffer, categoryPair.first);
writeInt(*buffer, htonl(static_cast<int>(categoryPair.second.size())));
for (const auto &pluginPair : categoryPair.second) {
const PluginEntry &plugin = pluginPair.second;
writeUTFString(*buffer, plugin.key);
writeUTFString(*buffer, plugin.className);
writeUTFString(*buffer, plugin.name);
writeBoolean(*buffer, plugin.printable);
writeBoolean(*buffer, plugin.defer);
}
}

return buffer;
}

// Load Log4j2 plugin .cache file.
//
// Modeled after the Java canonical implementation here:
// https://github.com/apache/logging-log4j2/blob/8573ef778d2fad2bbec50a687955dccd2a616cc5/log4j-core/src/main/java/org/apache/logging/log4j/core/config/plugins/processor/PluginCache.java#L93-L124
std::map<std::string, std::map<std::string, PluginEntry>> loadLog4j2PluginCacheFile(TransientBytes &transientBytes) {
uint64_t data_size = transientBytes.data_size();
std::vector<uint8_t> byteData(data_size);
uint32_t checksum = 0;
transientBytes.CopyOut(byteData.data(), &checksum);
std::istringstream buffer(std::string(byteData.begin(), byteData.end()));

std::map<std::string, std::map<std::string, PluginEntry>> categories;
uint32_t categoriesCount = readInt(buffer);
for (uint32_t i = 0; i < categoriesCount; ++i) {
std::string category = readUTFString(buffer);
uint32_t entries = readInt(buffer);
for (uint32_t j = 0; j < entries; ++j) {
std::string key = readUTFString(buffer);
std::string className = readUTFString(buffer);
std::string name = readUTFString(buffer);
bool printable = readBool(buffer);
bool defer = readBool(buffer);
PluginEntry entry(key, className, name, printable, defer, category);
categories[category].insert({key, entry});
}
}

return categories;
}

Log4J2PluginDatCombiner::~Log4J2PluginDatCombiner() {}

bool Log4J2PluginDatCombiner::Merge(const CDH *cdh, const LH *lh) {
TransientBytes bytes_;
if (lh->compression_method() == Z_NO_COMPRESSION) {
bytes_.ReadEntryContents(lh);
} else if (lh->compression_method() == Z_DEFLATED) {
if (!inflater_) {
inflater_.reset(new Inflater());
}
bytes_.DecompressEntryContents(cdh, lh, inflater_.get());
} else {
diag_errx(2, "neither stored nor deflated");
}

auto newCategories = loadLog4j2PluginCacheFile(bytes_);
for (const auto &newCategoryPair : newCategories) {
auto newCategoryId = newCategoryPair.first;
auto newPlugins = newCategoryPair.second;

if (auto existingCategoryPair = categories_.find(newCategoryId); existingCategoryPair != categories_.end()) {
for (const auto &pluginPair : newPlugins) {
auto newPluginKey = pluginPair.first;
auto newPlugin = pluginPair.second;

if (auto existingPluginKey = categories_[newCategoryId].find(newPluginKey); existingPluginKey != categories_[newCategoryId].end() && no_duplicates_) {
diag_errx(1, "%s:%d: Log4J2 plugin %s.%s is present in multiple jars", __FILE__, __LINE__, newCategoryId.c_str(), newPluginKey.c_str());
}

categories_[newCategoryId].insert(pluginPair);
}
} else {
categories_[newCategoryId] = newPlugins;
}
}

return true;
}

void *Log4J2PluginDatCombiner::OutputEntry(bool compress) {
auto buffer = writeLog4j2PluginCacheFile(categories_);
return outputEntryFromBuffer(filename_, buffer, compress);
}
31 changes: 31 additions & 0 deletions src/tools/singlejar/combiners.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,4 +173,35 @@ class ManifestCombiner : public Combiner {
std::unique_ptr<Inflater> inflater_;
};

class PluginEntry {
public:
PluginEntry(const std::string &key, const std::string &className,
const std::string &name, bool printable, bool defer,
const std::string &category)
: key(key), className(className), name(name), printable(printable),
defer(defer), category(category) {}

std::string key;
std::string className;
std::string name;
bool printable;
bool defer;
std::string category;
};

class Log4J2PluginDatCombiner : public Combiner {
public:
Log4J2PluginDatCombiner(const std::string &filename, const bool no_duplicates)
: filename_(filename), no_duplicates_(no_duplicates) {}
~Log4J2PluginDatCombiner() override;
bool Merge(const CDH *cdh, const LH *lh) override;
void *OutputEntry(bool compress) override;

private:
const std::string filename_;
const bool no_duplicates_;
std::unique_ptr<Inflater> inflater_;
std::map<std::string, std::map<std::string, PluginEntry>> categories_;
};

#endif // SRC_TOOLS_SINGLEJAR_COMBINERS_H_
Loading

0 comments on commit 3ac4920

Please sign in to comment.