From 1408afb8c011b4198212fb3e16254d72fd462aa1 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Thu, 27 Jul 2017 10:49:11 -0700 Subject: [PATCH 1/3] Update README for the profiling binary requirement. --- README | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README b/README index 0df76d8..7078c7a 100644 --- a/README +++ b/README @@ -25,7 +25,8 @@ architectures, BR_INST_EXEC:TAKEN also works. --binary: BINARY with debug info. You need to make sure the binary name is the same as the binary you run during profiling. Additionally, you will need to have debug info (i.e. line table) availabe in the binary. This means that -you need to compile the binary with "-gmlt" +you need to compile the binary with "-gmlt" or "-g1". For LLVM, you alse need +to have -fdebug-info-for-profiling. Output: From 72b7f86b920a35b02faed94afc685fd2d517fc78 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Thu, 27 Jul 2017 14:47:19 -0700 Subject: [PATCH 2/3] Rebase autofdo toolchain. --- create_llvm_prof.cc | 1 + dump_gcov.cc | 2 +- llvm_profile_writer.cc | 9 +-- module_grouper.cc | 109 ++++++++++++++++++----------- module_grouper.h | 27 ++++---- profile.cc | 41 ++++++----- profile_creator.cc | 8 +-- profile_creator.h | 11 ++- profile_diff.cc | 4 +- profile_merger.cc | 3 +- profile_reader.cc | 11 +-- profile_reader.h | 27 +++++--- profile_update.cc | 20 ++++-- profile_writer.cc | 110 +++++++++++++++-------------- profile_writer.h | 9 ++- source_info.h | 40 ++++++++--- symbol_map.cc | 153 +++++++++++++++++++++++++++-------------- symbol_map.h | 83 ++++++++++++++++------ 18 files changed, 430 insertions(+), 238 deletions(-) diff --git a/create_llvm_prof.cc b/create_llvm_prof.cc index dc1cdb3..5ab0521 100644 --- a/create_llvm_prof.cc +++ b/create_llvm_prof.cc @@ -75,6 +75,7 @@ int main(int argc, char **argv) { } autofdo::ProfileCreator creator(FLAGS_binary); + creator.set_use_discriminator_encoding(true); if (creator.CreateProfile(FLAGS_profile, FLAGS_profiler, writer.get(), FLAGS_out)) return 0; diff --git a/dump_gcov.cc b/dump_gcov.cc index 2af7bbf..0b64928 100644 --- a/dump_gcov.cc +++ b/dump_gcov.cc @@ -55,7 +55,7 @@ int main(int argc, char **argv) { autofdo::SymbolMap symbol_map; autofdo::ModuleMap module_map; autofdo::AutoFDOProfileReader reader( - &symbol_map, &module_map); + &symbol_map, &module_map, true); reader.ReadFromFile(argv[1]); symbol_map.Dump(); PrintModuleProfiles(module_map); diff --git a/llvm_profile_writer.cc b/llvm_profile_writer.cc index 62d29f2..1431e9f 100644 --- a/llvm_profile_writer.cc +++ b/llvm_profile_writer.cc @@ -31,7 +31,6 @@ #include "profile_writer.h" DECLARE_bool(debug_dump); -DECLARE_string(format); namespace autofdo { @@ -95,10 +94,11 @@ void LLVMProfileBuilder::VisitCallsite(const Callsite &callsite) { inline_stack_.pop_back(); } auto &caller_profile = *(inline_stack_.back()); + auto CalleeName = GetNameRef(Symbol::Name(callsite.second)); auto &callee_profile = caller_profile.functionSamplesAt(llvm::sampleprof::LineLocation( - line, discriminator)); - callee_profile.setName(GetNameRef(callsite.second)); + line, discriminator))[CalleeName]; + callee_profile.setName(CalleeName); inline_stack_.push_back(&callee_profile); } @@ -142,7 +142,8 @@ void LLVMProfileBuilder::Visit(const Symbol *node) { } llvm::StringRef LLVMProfileBuilder::GetNameRef(const string &str) { - StringIndexMap::const_iterator ret = name_table_.find(str); + StringIndexMap::const_iterator ret = + name_table_.find(Symbol::Name(str.c_str())); CHECK(ret != name_table_.end()); return llvm::StringRef(ret->first.c_str()); } diff --git a/module_grouper.cc b/module_grouper.cc index 65ab56b..42c638e 100644 --- a/module_grouper.cc +++ b/module_grouper.cc @@ -33,7 +33,7 @@ DEFINE_int32(max_ggc_memory, 3 << 20, namespace autofdo { // in_func is not a const pointer, but it's not modified in the function. void Function::AddInEdgeCount(int64 count, Function *in_func) { - pair ret = in_edge_count.insert( + std::pair ret = in_edge_count.insert( EdgeCount::value_type(in_func, 0)); ret.first->second += count; total_in_count += count; @@ -41,17 +41,20 @@ void Function::AddInEdgeCount(int64 count, Function *in_func) { // out_func is not a const pointer, but it's not modified in the function. void Function::AddOutEdgeCount(int64 count, Function *out_func) { - pair ret = out_edge_count.insert( + std::pair ret = out_edge_count.insert( EdgeCount::value_type(out_func, 0)); ret.first->second += count; total_out_count += count; } -ModuleGrouper *ModuleGrouper::GroupModule( +ModuleGrouper::ModuleGrouper(const SymbolMap *symbol_map) + : total_count_(0), symbol_map_(symbol_map) {} + +std::unique_ptr ModuleGrouper::GroupModule( const string &binary, const string §ion_prefix, const SymbolMap *symbol_map) { - ModuleGrouper *grouper = new ModuleGrouper(symbol_map); + std::unique_ptr grouper(new ModuleGrouper(symbol_map)); grouper->ReadModuleOptions(binary, section_prefix); if (grouper->module_map().size() == 0) { LOG(WARNING) << "Cannot read compilation info from binary. " @@ -76,16 +79,26 @@ void ModuleGrouper::Group() { continue; } const string base_module_name = symbol->ModuleName(); - vector queue; + std::vector queue; queue.push_back(symbol); while (!queue.empty()) { const Symbol *s = queue.back(); queue.pop_back(); + if (s->total_count == 0) { + continue; + } for (const auto &pos_symbol : s->callsites) { queue.push_back(pos_symbol.second); } - if (s->IsFromHeader() || s->ModuleName() == base_module_name || - s->total_count == 0) { + // If we don't have module info for the symbol, try to find it from + // top level symbol map. + if (s->ModuleName().empty()) { + s = symbol_map_->GetSymbolByName(s->info.func_name); + if (s == nullptr) { + continue; + } + } + if (s->IsFromHeader() || s->ModuleName() == base_module_name) { continue; } legacy_group[base_module_name].insert(s->ModuleName()); @@ -100,15 +113,11 @@ void ModuleGrouper::Group() { continue; } for (const auto &name : name_modules.second) { - if (module_map_.find(name) == module_map_.end()) { - LOG(ERROR) << "Module " << name.c_str() - << " is not found in the profile binary"; - continue; + if (module_map_.find(name) != module_map_.end()) { + module_map_[name].is_exported = true; + module_map_[name_modules.first].aux_modules.insert(name); } - module_map_[name].is_exported = true; } - module_map_[name_modules.first].aux_modules.insert( - name_modules.second.begin(), name_modules.second.end()); } for (int64 accumulate_count = GetMaxEdge(&max_edge); @@ -164,10 +173,10 @@ void ModuleGrouper::RecursiveBuildGraph(const string &caller_name, total_count_ += target_count.second; string caller_module_name = UpdateModuleMap(caller->ModuleName()); string callee_module_name = UpdateModuleMap(callee->ModuleName()); - pair caller_ret = + std::pair caller_ret = function_map_.insert(FunctionMap::value_type( caller_name, Function(caller_name, caller_module_name))); - pair callee_ret = + std::pair callee_ret = function_map_.insert(FunctionMap::value_type( callee_name, Function(callee_name, callee_module_name))); AddEdgeCount( @@ -191,7 +200,7 @@ void ModuleGrouper::BuildGraph() { void ModuleGrouper::AddEdgeCount(const CallEdge &edge, int64 count) { edge.from->AddOutEdgeCount(count, edge.to); edge.to->AddInEdgeCount(count, edge.from); - pair ret = edge_map_.insert( + std::pair ret = edge_map_.insert( EdgeMap::value_type(edge, 0)); ret.first->second += count; } @@ -221,17 +230,30 @@ void ModuleGrouper::IntegrateEdge(const CallEdge &edge) { AddEdgeCount(CallEdge(edge.to, callee_count.first), scaled_count * -1); } } - // Add the callee's module as the caller's module's aux-module. + + // Add the callee's module as the caller and parent module's aux-module. ModuleMap::iterator from_module_iter = module_map_.find(edge.from->module); ModuleMap::iterator to_module_iter = module_map_.find(edge.to->module); - if (from_module_iter->first != to_module_iter->first) { + if (from_module_iter->first == to_module_iter->first) { + return; + } + to_module_iter->second.is_exported = true; + std::set primary_modules = from_module_iter->second.parent_modules; + primary_modules.insert(from_module_iter->first); + for (const auto &primary_module : primary_modules) { + if (to_module_iter->first == primary_module) { + continue; + } if (!to_module_iter->second.is_fake) { - from_module_iter->second.aux_modules.insert(to_module_iter->first); + module_map_[primary_module].aux_modules.insert(to_module_iter->first); + to_module_iter->second.parent_modules.insert(primary_module); + } + for (const auto &aux_module : to_module_iter->second.aux_modules) { + if (aux_module != primary_module) { + module_map_[primary_module].aux_modules.insert(aux_module); + module_map_[aux_module].parent_modules.insert(primary_module); + } } - from_module_iter->second.aux_modules.insert( - to_module_iter->second.aux_modules.begin(), - to_module_iter->second.aux_modules.end()); - to_module_iter->second.is_exported = true; } } @@ -274,14 +296,12 @@ bool ModuleGrouper::ShouldIntegrate(const string &from_module, || !module_map_[to_module].is_valid) { return false; } + if (skipped_modules_.find(to_module) != skipped_modules_.end()) { + return false; + } if (from_module == to_module) { return true; } - // Never integrate tcmalloc as auxilary module. - if (to_module == "tcmalloc/tcmalloc_or_debug.cc" - || to_module == "tcmalloc/tcmalloc.cc") { - return false; - } // We preprocess faked module first because it does not have lang field and // flag_values fields. if (!module_map_[to_module].is_fake && !module_map_[from_module].is_fake) { @@ -293,14 +313,21 @@ bool ModuleGrouper::ShouldIntegrate(const string &from_module, return false; } } - set modules; - modules.insert(from_module); - modules.insert(to_module); - modules.insert(module_map_[from_module].aux_modules.begin(), - module_map_[from_module].aux_modules.end()); - modules.insert(module_map_[to_module].aux_modules.begin(), - module_map_[to_module].aux_modules.end()); - return GetTotalMemory(modules) < FLAGS_max_ggc_memory; + std::set from_modules = module_map_[from_module].parent_modules; + from_modules.insert(from_module); + for (const auto &module : from_modules) { + std::set modules; + modules.insert(module); + modules.insert(to_module); + modules.insert(module_map_[module].aux_modules.begin(), + module_map_[module].aux_modules.end()); + modules.insert(module_map_[to_module].aux_modules.begin(), + module_map_[to_module].aux_modules.end()); + if (GetTotalMemory(modules) > FLAGS_max_ggc_memory) { + return false; + } + } + return true; } int64 ModuleGrouper::GetMaxEdge(CallEdge *edge) { @@ -392,7 +419,6 @@ void ModuleGrouper::ReadOptionsByType(const string &binary, } break; case SYSTEM_PATHS: - module->has_system_paths_field = true; if (!sect_data || section_size == 0) return; @@ -463,11 +489,14 @@ void ModuleGrouper::ReadOptionsByType(const string &binary, } else if (module->options[module->options.size() - option_num + j].second != curr) { module->is_valid = false; - LOG(ERROR) << "Duplicated module entry for " << module_name; - break; } curr += strlen(curr) + 1; } + if (!module->is_valid) { + LOG(ERROR) << "Duplicated module(" << module_name + << ") has inconsistent option data, it will not be included " + << "in module grouping"; + } } } } // namespace autofdo diff --git a/module_grouper.h b/module_grouper.h index a05ed25..e75bcfc 100644 --- a/module_grouper.h +++ b/module_grouper.h @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -43,22 +44,20 @@ enum OptionType { class Function; class SymbolMap; class Symbol; -typedef pair Option; -typedef map EdgeCount; +typedef std::pair Option; +typedef std::map EdgeCount; // The structure to store the auxilary information for each module. class Module { public: explicit Module() : num_quote_paths(0), num_bracket_paths(0), num_system_paths(0), - num_cpp_defines(0), num_cpp_includes(0), num_cl_args(0), - has_system_paths_field(false), + num_cpp_defines(0), num_cpp_includes(0), num_cl_args(0), id(0), is_exported(false), is_fake(false), is_valid(true), lang(0), ggc_memory_in_kb(0) {} explicit Module(bool is_fake) : num_quote_paths(0), num_bracket_paths(0), num_system_paths(0), - num_cpp_defines(0), num_cpp_includes(0), num_cl_args(0), - has_system_paths_field(false), + num_cpp_defines(0), num_cpp_includes(0), num_cl_args(0), id(0), is_exported(false), is_fake(is_fake), is_valid(true), lang(0), ggc_memory_in_kb(0) {} @@ -68,9 +67,7 @@ class Module { int num_cpp_defines; int num_cpp_includes; int num_cl_args; - // Binary compatibility flag -- crosstool v17 introduces - // a new field in GCDA file to record system include paths. - bool has_system_paths_field; + int id; // If the module is the auxilary module of other modules. bool is_exported; // If the module is a fake module. @@ -85,9 +82,11 @@ class Module { // Total GC memory consumed by compiler in KiB. uint32 ggc_memory_in_kb; // The module option information originally designed in LIPO. - vector