-
Notifications
You must be signed in to change notification settings - Fork 150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix BinaryEdit::getResolvedLibraryPath for Ubuntu 22.04 #1362
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,12 @@ | |
#include "linux.h" | ||
#include <dlfcn.h> | ||
|
||
#include <exception> | ||
#include <sstream> | ||
#include <string> | ||
#include <utility> | ||
#include <vector> | ||
|
||
#include "boost/shared_ptr.hpp" | ||
|
||
#include "pcEventMuxer.h" | ||
|
@@ -174,118 +180,167 @@ bool PCEventMuxer::useCallback(Dyninst::ProcControlAPI::EventType et) | |
return false; | ||
} | ||
|
||
bool BinaryEdit::getResolvedLibraryPath(const string &filename, std::vector<string> &paths) { | ||
char *libPathStr, *libPath; | ||
std::vector<string> libPaths; | ||
struct stat dummy; | ||
char buffer[512]; | ||
char *pos, *key, *val; | ||
namespace | ||
{ | ||
template <typename ContainerT = std::vector<std::string>, | ||
typename PredicateT = std::string (*)(const std::string&)> | ||
inline ContainerT | ||
delimit( | ||
const std::string& line, const std::string& delimiters = ":", | ||
PredicateT&& predicate = [](const std::string& s) -> std::string { return s; }) | ||
{ | ||
size_t _beginp = 0; // position that is the beginning of the new string | ||
size_t _delimp = 0; // position of the delimiter in the string | ||
ContainerT _result = {}; | ||
while(_beginp < line.length() && _delimp < line.length()) | ||
{ | ||
// find the first character (starting at _delimp) that is not a delimiter | ||
_beginp = line.find_first_not_of(delimiters, _delimp); | ||
// if no a character after or at _end that is not a delimiter is not found | ||
// then we are done | ||
if(_beginp == std::string::npos) | ||
break; | ||
// starting at the position of the new string, find the next delimiter | ||
_delimp = line.find_first_of(delimiters, _beginp); | ||
std::string _tmp{}; | ||
try | ||
{ | ||
// starting at the position of the new string, get the characters | ||
// between this position and the next delimiter | ||
if(_beginp < line.length()) | ||
_tmp = line.substr(_beginp, _delimp - _beginp); | ||
} catch(std::exception& e) | ||
{ | ||
// print the exception but don't fail, unless maybe it should? | ||
fprintf(stderr, "[%s:%i] %s (delimiters: %s) :: %s\n", __FILE__, __LINE__, | ||
line.c_str(), delimiters.c_str(), e.what()); | ||
} | ||
// don't add empty strings | ||
if(!_tmp.empty()) | ||
{ | ||
_result.emplace_back(std::forward<PredicateT>(predicate)(_tmp)); | ||
} | ||
} | ||
return _result; | ||
} | ||
} // namespace | ||
|
||
bool | ||
BinaryEdit::getResolvedLibraryPath(const string& filename, std::vector<string>& paths) | ||
{ | ||
auto _path_exists = [](const std::string& _filename) { | ||
struct stat dummy; | ||
return (_filename.empty()) ? false : (stat(_filename.c_str(), &dummy) == 0); | ||
}; | ||
|
||
auto _emplace_if_exists = [&paths, filename, | ||
_path_exists](const std::string& _directory) { | ||
auto _filename = _directory + "/" + filename; | ||
if(_path_exists(_filename)) | ||
paths.emplace_back(std::move(_filename)); | ||
}; | ||
|
||
// prefer qualified file paths | ||
if (stat(filename.c_str(), &dummy) == 0) { | ||
paths.push_back(filename); | ||
} | ||
if(_path_exists(filename)) | ||
paths.emplace_back(filename); | ||
|
||
// For cross-rewriting | ||
char *dyn_path = getenv("DYNINST_REWRITER_PATHS"); | ||
if (dyn_path) { | ||
libPathStr = strdup(dyn_path); | ||
libPath = strtok(libPathStr, ":"); | ||
while (libPath != NULL) { | ||
libPaths.push_back(string(libPath)); | ||
libPath = strtok(NULL, ":"); | ||
} | ||
free(libPathStr); | ||
char* dyn_path = getenv("DYNINST_REWRITER_PATHS"); | ||
if(dyn_path) | ||
{ | ||
for(const auto& itr : delimit(dyn_path, ":")) | ||
_emplace_if_exists(itr); | ||
} | ||
|
||
// search paths from environment variables | ||
char *ld_path = getenv("LD_LIBRARY_PATH"); | ||
if (ld_path) { | ||
libPathStr = strdup(ld_path); | ||
libPath = strtok(libPathStr, ":"); | ||
while (libPath != NULL) { | ||
libPaths.push_back(string(libPath)); | ||
libPath = strtok(NULL, ":"); | ||
} | ||
free(libPathStr); | ||
char* ld_path = getenv("LD_LIBRARY_PATH"); | ||
if(ld_path) | ||
{ | ||
for(const auto& itr : delimit(ld_path, ":")) | ||
_emplace_if_exists(itr); | ||
} | ||
|
||
#ifdef DYNINST_COMPILER_SEARCH_DIRS | ||
// search compiler-specific library paths | ||
// NB1: DYNINST_COMPILER_SEARCH_DIRS is defined at build time | ||
// NB2: This is explicitly done _after_ adding directories from | ||
// LD_LIBRARY_PATH so that the user can override these paths. | ||
{ | ||
#define xstr(s) str(s) | ||
#define str(s) #s | ||
|
||
libPathStr = strdup(xstr(DYNINST_COMPILER_SEARCH_DIRS)); | ||
libPath = strtok(libPathStr, ":"); | ||
while (libPath != NULL) { | ||
libPaths.emplace_back(libPath); | ||
libPath = strtok(NULL, ":"); | ||
} | ||
free(libPathStr); | ||
|
||
#undef str | ||
#undef xstr | ||
} | ||
#endif | ||
//libPaths.push_back(string(getenv("PWD"))); | ||
for (unsigned int i = 0; i < libPaths.size(); i++) { | ||
string str = libPaths[i] + "/" + filename; | ||
if (stat(str.c_str(), &dummy) == 0) { | ||
paths.push_back(str); | ||
} | ||
{ | ||
# define xstr(s) str(s) | ||
# define str(s) # s | ||
|
||
for(const auto& itr : delimit(xstr(DYNINST_COMPILER_SEARCH_DIRS), ":")) | ||
_emplace_if_exists(itr); | ||
|
||
# undef str | ||
# undef xstr | ||
} | ||
#endif | ||
|
||
// search ld.so.cache | ||
// apparently ubuntu doesn't like pclosing NULL, so a shared pointer custom | ||
// destructor is out. Ugh. | ||
FILE* ldconfig = popen("/sbin/ldconfig -p", "r"); | ||
if (ldconfig) { | ||
if(!fgets(buffer, 512, ldconfig)) { // ignore first line | ||
return false; | ||
} | ||
while (fgets(buffer, 512, ldconfig) != NULL) { | ||
pos = buffer; | ||
while (*pos == ' ' || *pos == '\t') pos++; | ||
key = pos; | ||
while (*pos != ' ') pos++; | ||
*pos = '\0'; | ||
while (*pos != '=' && *(pos + 1) != '>') pos++; | ||
pos += 2; | ||
while (*pos == ' ' || *pos == '\t') pos++; | ||
val = pos; | ||
while (*pos != '\n' && *pos != '\0') pos++; | ||
*pos = '\0'; | ||
if (strcmp(key, filename.c_str()) == 0) { | ||
paths.push_back(val); | ||
if(ldconfig) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just terrible, and I'm honestly surprised it took this long to get a nasty bug here. I vote we replace this and the manual directory list lookup below by just using
The only caveat here is that
If we want to work around this, we could just continue to do the manual search of LD_LIBRARY_PATH first to let users override. Begrudgingly, the DYNINST_COMPILER_SEARCH_DIRS has to stay because ldconfig doesn't always have the compiler-specific search directories available. At least on my system, With this, we'd have the heuristic:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You want me to proceed with this? I'll probably first use with the RTLD_NOLOAD to make sure it's not already open bc I'm concerned whether there is a guarantee that loading the library will not have any unintended side effects outside of just finding the library, e.g., cause Dyninst to start parsing that library. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We had an internal discussion about this yesterday. The confounding factors are the RTLD_NOLOAD won't let you open a file unless it's already open. This means the only way to inspect RTLD_DI_ORIGIN is to actually load the library into Dyninst's address space. Aside from the security implications, it's just an inefficient way of doing it. We're pretty much stuck to parsing the output of ldconfig. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about NOLOAD on the library containing this function, grabbing that DI_ORIGIN and searching that? In many cases, that could give us the path we want. We could also NOLOAD dlopen the executable, get the link path, and search the dirs of the libs it is linked to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I fully understand what NOLOAD does. My point is that since this function is being called, unless the function is statically linked, the library is loaded so it will return a handle. And the same goes for passing nullptr to dlopen (which returns the handle to the exe). As long as those are dynamic objects, you can get the handle via NOLOAD There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this helps to clarify: auto* _handle = dlopen("libdyninstAPI.so", RTLD_LAZY|RTLD_NOLOAD);
// ... get origin of handle ...
auto _lib_directory = dirname(...); In other words, the RT library you want is extremely likely to be installed in the same directory as the dyninstAPI library There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is supposed to resolve any library name, so we have to at least consider everything in ld.so.cache which is likely a larger set than the union of DYNINST_REWRITER_PATHS, LD_LIBRARY_PATH, DYNINST_COMPILER_SEARCH_DIRS, and $ORIGIN. @kupsch @bigtrak There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ok well, then how about adding a vector hints parameter which gets searched after LD_LIBRARY_PATH for situations like finding the RT library. If you are concerned about the ABI, we can just implement one with the same signature that calls the new one with zero hints. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What kinds of hints were you thinking? I think adding the "self" $ORIGIN would be good to do regardless. P.S. |
||
{ | ||
constexpr size_t buffer_size = 512; | ||
char buffer[buffer_size]; | ||
// ignore first line | ||
if(fgets(buffer, buffer_size, ldconfig)) | ||
{ | ||
// each line constaining relevant info should be in form: | ||
// <LIBRARY_BASENAME> (...) => <RESOLVED_ABSOLUTE_PATH> | ||
// example: | ||
// libz.so (libc6,x86-64) => /lib/x86_64-linux-gnu/libz.so | ||
auto _get_entry = [](const std::string& _inp) { | ||
auto _paren_pos = _inp.find('('); | ||
auto _arrow_pos = _inp.find("=>", _paren_pos); | ||
if(_arrow_pos == std::string::npos || _paren_pos == std::string::npos) | ||
return std::string{}; | ||
if(_arrow_pos + 2 < _inp.length()) | ||
{ | ||
auto _pos = _inp.find_first_not_of(" \t", _arrow_pos + 2); | ||
if(_pos < _inp.length()) | ||
return _inp.substr(_pos); | ||
} | ||
return std::string{}; | ||
}; | ||
|
||
auto _data = std::stringstream{}; | ||
while(fgets(buffer, buffer_size, ldconfig) != nullptr) | ||
{ | ||
_data << buffer; | ||
auto _len = strnlen(buffer, buffer_size); | ||
if(_len > 0 && buffer[_len - 1] == '\n') | ||
{ | ||
auto _v = _data.str(); | ||
if(!_v.empty()) | ||
{ | ||
_v = _v.substr(_v.find_first_not_of(" \t")); | ||
if(_v.length() > 1) | ||
{ | ||
auto _entry = _get_entry(_v.substr(0, _v.length() - 1)); | ||
if(!_entry.empty()) | ||
_emplace_if_exists(_entry); | ||
} | ||
} | ||
_data = std::stringstream{}; | ||
} | ||
} | ||
} | ||
pclose(ldconfig); | ||
} | ||
|
||
// search hard-coded system paths | ||
libPaths.clear(); | ||
libPaths.push_back("/usr/local/lib"); | ||
libPaths.push_back("/usr/share/lib"); | ||
libPaths.push_back("/usr/lib"); | ||
libPaths.push_back("/usr/lib64"); | ||
libPaths.push_back("/usr/lib/x86_64-linux-gnu"); | ||
libPaths.push_back("/lib"); | ||
libPaths.push_back("/lib64"); | ||
libPaths.push_back("/lib/x86_64-linux-gnu"); | ||
libPaths.push_back("/usr/lib/i386-linux-gnu"); | ||
libPaths.push_back("/usr/lib32"); | ||
for (unsigned int i = 0; i < libPaths.size(); i++) { | ||
string str = libPaths[i] + "/" + filename; | ||
if (stat(str.c_str(), &dummy) == 0) { | ||
paths.push_back(str); | ||
} | ||
for(const char* itr : | ||
{ "/usr/local/lib", "/usr/share/lib", "/usr/lib", "/usr/lib64", | ||
"/usr/lib/x86_64-linux-gnu", "/lib", "/lib64", "/lib/x86_64-linux-gnu", | ||
"/usr/lib/i386-linux-gnu", "/usr/lib32" }) | ||
{ | ||
_emplace_if_exists(itr); | ||
} | ||
|
||
return ( 0 < paths.size() ); | ||
return (!paths.empty()); | ||
} | ||
|
||
bool BinaryEdit::archSpecificMultithreadCapable() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For as much as I hate in/out parameters, I'd rather replace this with
just to have one less hand-rolled solution in this codebase.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes perfect sense. I've actually had this fix for a while but hadn't submitted it bc I figured that would be the case and hadn't looked around for any existing solution.