Skip to content

Commit

Permalink
Add .hash section to written ELF files
Browse files Browse the repository at this point in the history
Summary:
This is necessary for functions like dlsym() to actually find symbols in a file.
A .dynamic section must reference a hash section for the .dynamic section to be
valid.

Reviewed By: swtaarrs

Differential Revision: D56575852

fbshipit-source-id: 26935dc5eea23ff01fb42572ec9afd0fae4b58df
  • Loading branch information
Alex Malyshev authored and facebook-github-bot committed May 1, 2024
1 parent 25f31c2 commit db19f16
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 33 deletions.
105 changes: 78 additions & 27 deletions cinderx/Jit/elf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,26 @@ void initDynamicSection(Object& elf) {
header.size = elf.dynamic.bytes().size();
header.link = raw(SectionIdx::kDynstr);
header.entry_size = sizeof(Dyn);
header.align = 0x8;

elf.section_offset += header.size;
}

void initHashSection(Object& elf) {
JIT_CHECK(
isAligned(elf.section_offset, 0x8),
"Hash section starts at unaligned address {:#x}",
elf.section_offset);

SectionHeader& header = elf.getSectionHeader(SectionIdx::kHash);
header.name_offset = elf.shstrtab.insert(".hash");
header.type = kHash;
header.flags = kSectionAlloc;
header.address = elf.section_offset;
header.offset = elf.section_offset;
header.size = elf.hash.size_bytes();
header.link = raw(SectionIdx::kDynsym);
header.align = 0x8;

elf.section_offset += header.size;
}
Expand Down Expand Up @@ -169,18 +189,19 @@ void initTextSegment(Object& elf) {
}

void initReadonlySegment(Object& elf) {
// Starts at .dynsym and ends at .dynamic.
SectionHeader& dynsym = elf.getSectionHeader(SectionIdx::kDynsym);
SectionHeader& dynstr = elf.getSectionHeader(SectionIdx::kDynstr);
SectionHeader& dynamic = elf.getSectionHeader(SectionIdx::kDynamic);
JIT_CHECK(
dynsym.address < dynstr.address,
dynsym.address < dynamic.address,
"Expecting sections to be in a specific order");

SegmentHeader& header = elf.getSegmentHeader(SegmentIdx::kReadonly);
header.type = kSegmentLoadable;
header.flags = kSegmentReadable;
header.offset = dynsym.offset;
header.address = dynsym.address;
header.file_size = dynsym.size + dynstr.size;
header.file_size = dynamic.offset - dynsym.offset;
header.mem_size = header.file_size;
header.align = 0x1000;

Expand Down Expand Up @@ -216,13 +237,15 @@ void initDynamicSegment(Object& elf) {
}

void initDynamics(Object& elf) {
// Has to be run after .dynsym and .dynstr are mapped out.
// Has to be run after .dynsym, .dynstr, and .hash are mapped out.
SectionHeader& dynsym = elf.getSectionHeader(SectionIdx::kDynsym);
SectionHeader& dynstr = elf.getSectionHeader(SectionIdx::kDynstr);
SectionHeader& hash = elf.getSectionHeader(SectionIdx::kHash);

// TODO(T183002717): kNeeded for _cinderx.so and kHash for .hash.
// TODO(T183002717): kNeeded for _cinderx.so.
elf.dynamic.insert(DynTag::kNeeded, elf.libpython_name);

elf.dynamic.insert(DynTag::kHash, hash.address);
elf.dynamic.insert(DynTag::kStrtab, dynstr.address);
elf.dynamic.insert(DynTag::kStrSz, dynstr.size);
elf.dynamic.insert(DynTag::kSymtab, dynsym.address);
Expand All @@ -245,6 +268,45 @@ void pad(std::ostream& os, size_t size) {
}
}

void writeHash(std::ostream& os, const HashTable& hash) {
uint32_t nbuckets = hash.buckets().size();
uint32_t nchains = hash.chains().size();

write(os, &nbuckets, sizeof(nbuckets));
write(os, &nchains, sizeof(nchains));
write(os, std::as_bytes(hash.buckets()));
write(os, std::as_bytes(hash.chains()));
}

void writeElf(
std::ostream& os,
const Object& elf,
const std::vector<CodeEntry>& entries) {
// Write out all the headers.
write(os, &elf.file_header, sizeof(elf.file_header));
write(os, &elf.section_headers, sizeof(elf.section_headers));
write(os, &elf.segment_headers, sizeof(elf.segment_headers));
pad(os, elf.header_padding);

// Write out the actual sections themselves.
for (const CodeEntry& entry : entries) {
write(os, entry.code.data(), entry.code.size());
}
pad(os, elf.text_padding);

write(os, elf.dynsym.bytes());
write(os, elf.dynstr.bytes());
pad(os, elf.dynsym_padding);

writeHash(os, elf.hash);
pad(os, elf.hash_padding);

write(os, elf.dynamic.bytes());
pad(os, elf.dynamic_padding);

write(os, elf.shstrtab.bytes());
}

} // namespace

void writeEntries(std::ostream& os, const std::vector<CodeEntry>& entries) {
Expand Down Expand Up @@ -272,8 +334,8 @@ void writeEntries(std::ostream& os, const std::vector<CodeEntry>& entries) {

// The headers are all limited to the zeroth page, sections begin on the next
// page.
elf.section_offset = offsetof(Object, header_stop);
uint64_t header_padding = alignOffset(elf, kPageSize);
elf.section_offset = offsetof(Object, header_padding);
elf.header_padding = alignOffset(elf, kPageSize);
JIT_CHECK(
elf.section_offset == kTextStartAddress,
"ELF headers were too big and went past the zeroth page: {:#x}",
Expand All @@ -282,40 +344,29 @@ void writeEntries(std::ostream& os, const std::vector<CodeEntry>& entries) {
// Null section needs no extra initialization.

initTextSection(elf, text_size);
uint64_t text_padding = alignOffset(elf, kPageSize);
elf.text_padding = alignOffset(elf, kPageSize);

initDynsymSection(elf);
initDynstrSection(elf);
uint64_t dynsym_padding = alignOffset(elf, kPageSize);
elf.dynsym_padding = alignOffset(elf, 0x8);

elf.hash.build(elf.dynsym, elf.dynstr);
initHashSection(elf);
elf.hash_padding = alignOffset(elf, kPageSize);

initDynamics(elf);

initDynamicSection(elf);
elf.dynamic_padding = alignOffset(elf, 0x8);

initShstrtabSection(elf);

initTextSegment(elf);
initReadonlySegment(elf);
initReadwriteSegment(elf);
initDynamicSegment(elf);

// Write out all the headers.
write(os, &elf.file_header, sizeof(elf.file_header));
write(os, &elf.section_headers, sizeof(elf.section_headers));
write(os, &elf.segment_headers, sizeof(elf.segment_headers));
pad(os, header_padding);

// Write out the actual sections themselves.
for (const CodeEntry& entry : entries) {
write(os, entry.code.data(), entry.code.size());
}
pad(os, text_padding);

write(os, elf.dynsym.bytes());
write(os, elf.dynstr.bytes());
pad(os, dynsym_padding);

write(os, elf.dynamic.bytes());
write(os, elf.shstrtab.bytes());
writeElf(os, elf, entries);
}

} // namespace jit::elf
114 changes: 108 additions & 6 deletions cinderx/Jit/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace jit::elf {
constexpr uint32_t kProgram = 0x01;
constexpr uint32_t kSymbolTable = 0x02;
constexpr uint32_t kStringTable = 0x03;
constexpr uint32_t kHash = 0x05;
constexpr uint32_t kDynamic = 0x06;

// Section header flags.
Expand Down Expand Up @@ -49,6 +50,7 @@ enum class SectionIdx : uint32_t {
kText = 1,
kDynsym,
kDynstr,
kHash,
kDynamic,
kShstrtab,
kTotal,
Expand Down Expand Up @@ -211,6 +213,10 @@ class StringTable {
return static_cast<uint32_t>(start_off);
}

std::string_view string_at(size_t idx) const {
return reinterpret_cast<const char*>(&bytes_[idx]);
}

std::span<const std::byte> bytes() const {
return std::as_bytes(std::span<const uint8_t>{bytes_});
}
Expand Down Expand Up @@ -247,10 +253,18 @@ class SymbolTable {
syms_.emplace_back(std::forward<T&&>(sym));
}

const Symbol& operator[](size_t idx) const {
return syms_[idx];
}

std::span<const std::byte> bytes() const {
return std::as_bytes(std::span{syms_});
}

size_t size() const {
return syms_.size();
}

private:
std::vector<Symbol> syms_;
};
Expand Down Expand Up @@ -297,22 +311,110 @@ class DynamicTable {
std::vector<Dyn> dyns_;
};

// Represents an ELF object/file.
// This is the hash function defined by the ELF standard.
inline uint32_t hash(const char* name) {
uint32_t h = 0;
for (; *name; name++) {
h = (h << 4) + *name;
uint32_t g = h & 0xf0000000;
if (g) {
h ^= g >> 24;
}
h &= ~g;
}
return h;
}

// Hash table of symbols. The table is split into two arrays: the buckets array
// and the chains array. The buckets array holds symbol table indices, and if
// those don't match, then the lookup starts chasing through the chains array,
// trying each index until it hits 0, which is always the undefined symbol.
//
// The headers are laid out in the exact order that they will appear in the
// file.
// See
// https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html#scrolltoc
class HashTable {
public:
void build(const SymbolTable& syms, const StringTable& strings) {
// Use a load factor of 2 for the hash table. It will never be resized
// after it is created.
buckets_.reserve(syms.size() / 2);
buckets_.resize(syms.size() / 2);

chains_.reserve(syms.size());
chains_.resize(syms.size());

// Skip element zero as that's the undefined symbol.
for (size_t i = 1; i < syms.size(); ++i) {
auto const bucket_idx =
hash(strings.string_at(syms[i].name_offset).data()) % buckets_.size();
auto first_chain_idx = buckets_[bucket_idx];
if (first_chain_idx == 0) {
buckets_[bucket_idx] = i;
} else {
chains_[chaseChainIdx(first_chain_idx)] = i;
}
}
}

constexpr std::span<const uint32_t> buckets() const {
return std::span{buckets_};
}

constexpr std::span<const uint32_t> chains() const {
return std::span{chains_};
}

constexpr size_t size_bytes() const {
// Hash table serializes the lengths of both tables as uint32_t values
// before writing out the tables.
return (sizeof(uint32_t) * 2) + buckets().size_bytes() +
chains().size_bytes();
}

private:
uint32_t chaseChainIdx(uint32_t idx) const {
const uint32_t limit = chains_.size();

uint32_t count;
for (count = 0; chains_[idx] != 0 && count < limit; ++count) {
idx = chains_[idx];
}
JIT_CHECK(
count < limit,
"Can't find end of hash table chain, infinite loop, last index {}",
idx);

return idx;
}

std::vector<uint32_t> buckets_;
std::vector<uint32_t> chains_;
};

// Represents an ELF object/file.
struct Object {
FileHeader file_header;
std::array<SectionHeader, raw(SectionIdx::kTotal)> section_headers;
std::array<SegmentHeader, raw(SegmentIdx::kTotal)> segment_headers;

// Zero-length field that gives the offset into the ELF object where the
// headers stop.
char header_stop[0];
// Amount of padding to put after the headers. When used with offsetof, tells
// us the total size of the headers.
uint32_t header_padding{0};

// This is the padding for the text section, which doesn't show up in this
// struct. It's the vector of CodeEntry objects passed to writeEntries().
uint32_t text_padding{0};

SymbolTable dynsym;
StringTable dynstr;
uint32_t dynsym_padding{0};

HashTable hash;
uint32_t hash_padding{0};

DynamicTable dynamic;
uint32_t dynamic_padding{0};

StringTable shstrtab;

uint32_t section_offset{0};
Expand Down

0 comments on commit db19f16

Please sign in to comment.