Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ filegroup(
"Headers/DebugServer2/Target/ThreadBase.h",
"Headers/DebugServer2/Utils/Backtrace.h",
"Headers/DebugServer2/Utils/Bits.h",
"Headers/DebugServer2/Utils/crc32.h",
"Headers/DebugServer2/Utils/CompilerSupport.h",
"Headers/DebugServer2/Utils/Daemon.h",
"Headers/DebugServer2/Utils/Enums.h",
Expand Down Expand Up @@ -348,6 +349,7 @@ filegroup(
"Sources/Target/Common/ProcessBase.cpp",
"Sources/Target/Common/ThreadBase.cpp",
"Sources/Utils/Backtrace.cpp",
"Sources/Utils/crc32.cpp",
"Sources/Utils/Log.cpp",
"Sources/Utils/OptParse.cpp",
"Sources/Utils/Stringify.cpp",
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ add_executable(ds2
Sources/Target/Common/${DS2_ARCHITECTURE}/ProcessBase${DS2_ARCHITECTURE}.cpp

Sources/Utils/Backtrace.cpp
Sources/Utils/crc32.c
Sources/Utils/Log.cpp
Sources/Utils/OptParse.cpp
Sources/Utils/Stringify.cpp)
Expand Down
1 change: 1 addition & 0 deletions Headers/DebugServer2/Host/File.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class File {
public:
static ErrorCode fileSize(std::string const &path, uint64_t &size);
static ErrorCode fileMode(std::string const &path, uint32_t &mode);
static ErrorCode crc32(std::string const &path, uint32_t &crc);

protected:
int _fd;
Expand Down
12 changes: 10 additions & 2 deletions Headers/DebugServer2/Support/POSIX/ELFSupport.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,16 @@ class ELFSupport {
NotesHeader &nhdr, ByteVector &id);

template <typename ELFHeader, typename SectionHeader>
static bool ReadSectionHeader(int fd, const ELFHeader &ehdr, SectionHeader &shdr,
size_t idx);
static bool ReadSectionHeader(int fd, const ELFHeader &ehdr,
SectionHeader &shdr, size_t idx);

template <typename ELFHeader, typename SectionHeader>
static bool ReadStringTable(int fd, const ELFHeader &ehdr,
SectionHeader &shdr, std::vector<char> &table);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how I feel about this. This is effectively a full copy of the string table. Do we need a copy? Is the mapped view not guaranteed to outlive this the reference? That is, more specifically, can we get away with a std::span<char> &?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no mmap of the file at this point, just an open fd. This string table should just include section names so I don't expect it to ever be very large-- just one string per section.

Let me rename the method to ReadSectionNameStringTable for clarity.


template <typename ELFHeader, typename SectionHeader>
static bool ReadDebugLinkCRC(int fd, const ELFHeader &ehdr,
SectionHeader &shdr, ByteVector &crc);
};
} // namespace Support
} // namespace ds2
19 changes: 19 additions & 0 deletions Headers/DebugServer2/Utils/crc32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*-
* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
* code or tables extracted from it, as desired without restriction.
*/

#pragma once

#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

uint32_t crc32(uint32_t crc, const void *buf, size_t size);

#ifdef __cplusplus
}
#endif
28 changes: 18 additions & 10 deletions Sources/GDBRemote/Mixins/FileOperationsMixin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,18 +130,26 @@ ErrorCode FileOperationsMixin<T>::onQueryModuleInfo(Session &session,
std::string &triple,
ModuleInfo &info) const {
ByteVector buildId;
if (Platform::GetExecutableFileBuildID(path, buildId)) {
// format the uuid as an upper-case string with two hex chars per byte
std::ostringstream ss;
for(const auto b : buildId)
ss << std::uppercase << std::hex << std::setfill('0') << std::setw(2) << int(b);

info.uuid = ss.str();
if (!Platform::GetExecutableFileBuildID(path, buildId)) {
// Not all executable files contain an embedded build ID. In this case,
// return a crc32 of the contents of the file. The documentation for
// `qModuleInfo` suggests an md5 hash can be returned as "md5" in the
// response instead of "uuid." However, returning a crc32 is consistent
// with behavior of lldb-server.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, MD5 is the right hash here. The default build ID hash was MD5, though it is not required to be MD5. The modern default has changed to SHA1, though UUID is becoming somewhat common too. The user also has the ability to specify a custom (hex) string of an even length as well. Is there some expectation that it is the CRC? I'd prefer MD5/SHA1 over CRC32.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to follow the docs and use md5 here, but there are a number of lldb tests that that start failing when we report an md5 hash instead of uuid. I don't know if sha1 is allowed-- there is nothing in the documentation.

This crc implementation is what I can observe lldb-server doing while running tests, and it gets the most tests passing. I will go back to md5 and see if there's anything more I can do to get tests passing.

uint32_t crc;
CHK(File::crc32(path, crc));
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, the computation is over the normative portions of the file, not the entire file. I suppose that there is the performance aspect - digesting the entire file is faster than parsing and processing the normative parts. Not doing the slower process means that we would not be able to correctly identify files, which leaves this partially incorrect.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I've observed, the CRCs calculated this way exactly match those sent by lldb-server on the same files. Am I getting lucky? Is there any documentation on what parts of the file should be covered by the crc/hash?

Copy link
Owner

@compnerd compnerd Nov 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documentation? Ha! The source code to the linker is the documentation 😢. IIRC, it was .text, .data, and any custom sections that are digested.

std::copy(reinterpret_cast<uint8_t*>(&crc),
reinterpret_cast<uint8_t*>(&crc) + sizeof(crc),
std::back_inserter(buildId));
}

// TODO(andrurogerz): Not all executable files contain an embedded build ID.
// If GetExecutableFileBuildID fails, calculate an md5 hash of the file
// contents and return that as an "md5" field instead of the "uuid" field.
// format the uuid as an upper-case string with two hex chars per byte
std::ostringstream ss;
for(const auto b : buildId)
ss << std::uppercase << std::hex << std::setfill('0') << std::setw(2) << int(b);

info.uuid = ss.str();


auto error = File::fileSize(path, info.file_size);
if (error != kSuccess)
Expand Down
19 changes: 19 additions & 0 deletions Sources/Host/POSIX/File.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "DebugServer2/Host/File.h"
#include "DebugServer2/Host/Platform.h"
#include "DebugServer2/Utils/crc32.h"

#include <fcntl.h>
#include <limits>
Expand Down Expand Up @@ -216,5 +217,23 @@ ErrorCode File::fileMode(std::string const &path, uint32_t &mode) {
mode = static_cast<uint32_t>(ALLPERMS & stbuf.st_mode);
return kSuccess;
}

ErrorCode File::crc32(std::string const &path, uint32_t &crc) {
int fd = ::open(path.c_str(), O_RDONLY);
if (fd < 0)
return Platform::TranslateError();

ByteVector buffer(getpagesize());
ssize_t bytesRead;

crc = ::crc32(0L, nullptr, 0);
while ((bytesRead = ::read(fd, buffer.data(), buffer.size())) > 0)
crc = ::crc32(crc, buffer.data(), bytesRead);

::close(fd);

return kSuccess;
}

} // namespace Host
} // namespace ds2
4 changes: 4 additions & 0 deletions Sources/Host/Windows/File.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,9 @@ ErrorCode File::fileMode(std::string const &path, uint32_t &mode) {
return kErrorUnsupported;
}

ErrorCode File::crc32(std::string const &path, uint32_t &crc) {
return kErrorUnsupported;
}

} // namespace Host
} // namespace ds2
107 changes: 87 additions & 20 deletions Sources/Support/POSIX/ELFSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ bool ELFSupport::GetELFFileBuildID(std::string const &path, ByteVector &buildId)
case ELFCLASS32: {
Elf32_Shdr shdr;
Elf32_Nhdr nhdr;
result = ReadBuildID(fd, ehdr, shdr, nhdr, buildId);
if (!(result = ReadBuildID(fd, ehdr, shdr, nhdr, buildId)))
result = ReadDebugLinkCRC(fd, ehdr, shdr, buildId);
break;
}

Expand All @@ -102,7 +103,8 @@ bool ELFSupport::GetELFFileBuildID(std::string const &path, ByteVector &buildId)

Elf64_Shdr shdr;
Elf64_Nhdr nhdr;
result = ReadBuildID(fd, ehdr, shdr, nhdr, buildId);
if (!(result = ReadBuildID(fd, ehdr, shdr, nhdr, buildId)))
result = ReadDebugLinkCRC(fd, ehdr, shdr, buildId);
break;
}

Expand All @@ -123,30 +125,30 @@ bool ELFSupport::GetELFFileBuildID(std::string const &path, ByteVector &buildId)
template <typename ELFHeader, typename SectionHeader, typename NotesHeader>
bool ELFSupport::ReadBuildID(int fd, const ELFHeader &ehdr, SectionHeader &shdr,
NotesHeader &nhdr, ByteVector &id) {
// Build ID is found in a note section with note type NT_GNU_BUILD_ID.
// The section is typically named .note.gnu.build-id.
for (size_t i = 0; i < ehdr.e_shnum; i++) {
if (!ReadSectionHeader(fd, ehdr, shdr, i))
return false;
// Build ID is found in a note section with note type NT_GNU_BUILD_ID.
// The section is typically named .note.gnu.build-id.
for (size_t i = 0; i < ehdr.e_shnum; i++) {
if (!ReadSectionHeader(fd, ehdr, shdr, i))
return false;

if (shdr.sh_type != SHT_NOTE)
continue;
if (shdr.sh_type != SHT_NOTE)
continue;

if (::pread(fd, &nhdr, sizeof(nhdr), shdr.sh_offset) != sizeof(nhdr))
return false;
if (::pread(fd, &nhdr, sizeof(nhdr), shdr.sh_offset) != sizeof(nhdr))
return false;

if (nhdr.n_type != NT_GNU_BUILD_ID)
continue;
if (nhdr.n_type != NT_GNU_BUILD_ID)
continue;

id.resize(nhdr.n_descsz);
const off_t pos = shdr.sh_offset + sizeof(nhdr) + nhdr.n_namesz;
if (::pread(fd, id.data(), nhdr.n_descsz, pos) != nhdr.n_descsz)
return false;
id.resize(nhdr.n_descsz);
const off_t pos = shdr.sh_offset + sizeof(nhdr) + nhdr.n_namesz;
if (::pread(fd, id.data(), nhdr.n_descsz, pos) != nhdr.n_descsz)
return false;

return true;
}
return true;
}

return false;
return false;
}

template <typename ELFHeader, typename SectionHeader>
Expand All @@ -161,5 +163,70 @@ bool ELFSupport::ReadSectionHeader(int fd, const ELFHeader &ehdr, SectionHeader

return true;
}

template <typename ELFHeader, typename SectionHeader>
bool ELFSupport::ReadStringTable(int fd, const ELFHeader &ehdr,
SectionHeader &shdr,
std::vector<char> &table) {
// e_shstrndx is the index of the string table containing section names
if (!ReadSectionHeader(fd, ehdr, shdr, ehdr.e_shstrndx))
return false;

if (shdr.sh_type != SHT_STRTAB)
return false;

table.resize(shdr.sh_size);
if (::pread(fd, &table[0], shdr.sh_size, shdr.sh_offset) != shdr.sh_size)
return false;

return true;
}

template <typename ELFHeader, typename SectionHeader>
bool ELFSupport::ReadDebugLinkCRC(int fd, const ELFHeader &ehdr,
SectionHeader &shdr, ByteVector &crc) {

std::vector<char> table;
if (!ReadStringTable(fd, ehdr, shdr, table))
return false;

for (size_t i = 0; i < ehdr.e_shnum; i++) {
if (!ReadSectionHeader(fd, ehdr, shdr, i))
return false;

if (shdr.sh_type != SHT_PROGBITS)
continue;

const std::string sectionName(&table[shdr.sh_name]);
if (sectionName != ".gnu_debuglink")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should create a global constant for the section name.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will. That was lazy.

continue;

// The .gnu_debuglink section contains the following:
// 1) A filename with any leading directory components removed (base name)
// 2) A string-terminating zero byte
// 3) Zero to three bytes of padding to ensure the next value is four-byte
// aligned
// 4) A four byte crc32 checksum of the debug information file's contents
// (same endianness as used for the containing executable file)
ByteVector section(shdr.sh_size);
if (::pread(fd, &section[0], shdr.sh_size, shdr.sh_offset) != shdr.sh_size)
return false;

const std::string name = reinterpret_cast<char*>(section.data());
const size_t offset = (name.length() + 4) & ~static_cast<size_t>(3);

if (offset + sizeof(uint32_t) > section.size())
return false; // malformated

crc.clear();
auto start = section.begin() + offset;
std::copy(start, start + sizeof(uint32_t), std::back_inserter(crc));

DS2LOG(Info, "using .gnu_debuglink (%s) as build id", name.c_str());
return true;
}

return false;
}
} // namespace Support
} // namespace ds2
101 changes: 101 additions & 0 deletions Sources/Utils/crc32.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*-
* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
* code or tables extracted from it, as desired without restriction.
*/

/*
* First, the polynomial itself and its table of feedback terms. The
* polynomial is
* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
*
* Note that we take it "backwards" and put the highest-order term in
* the lowest-order bit. The X^32 term is "implied"; the LSB is the
* X^31 term, etc. The X^0 term (usually shown as "+1") results in
* the MSB being 1
*
* Note that the usual hardware shift register implementation, which
* is what we're using (we're merely optimizing it by doing eight-bit
* chunks at a time) shifts bits into the lowest-order term. In our
* implementation, that means shifting towards the right. Why do we
* do it this way? Because the calculated CRC must be transmitted in
* order from highest-order term to lowest-order term. UARTs transmit
* characters in order from LSB to MSB. By storing the CRC this way
* we hand it to the UART in the order low-byte to high-byte; the UART
* sends each low-bit to hight-bit; and the result is transmission bit
* by bit from highest- to lowest-order term without requiring any bit
* shuffling on our part. Reception works similarly
*
* The feedback terms table consists of 256, 32-bit entries. Notes
*
* The table can be generated at runtime if desired; code to do so
* is shown later. It might not be obvious, but the feedback
* terms simply represent the results of eight shift/xor opera
* tions for all combinations of data and CRC register values
*
* The values must be right-shifted by eight bits by the "updcrc
* logic; the shift must be unsigned (bring in zeroes). On some
* hardware you could probably optimize the shift in assembler by
* using byte-swap instructions
* polynomial $edb88320
*
*
* CRC32 code derived from work by Gary S. Brown.
*/

#include <stddef.h>
#include <stdint.h>

static const uint32_t crc32_tab[] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};

uint32_t
crc32(uint32_t crc, const void *buf, size_t size) {
const uint8_t *p = buf;
crc = crc ^ 0xffffffffUL;
while (size--)
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
return (crc ^ ~0U);
}
Loading
Loading