Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory and performance optimizations. #6

Merged
merged 2 commits into from
May 8, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 12 additions & 5 deletions addr2line.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,12 @@ void GetSection(const autofdo::SectionMap &sections,
namespace autofdo {

Addr2line *Addr2line::Create(const string &binary_name) {
Addr2line *addr2line = new Google3Addr2line(binary_name);
return CreateWithSampledFunctions(binary_name, NULL);
}

Addr2line *Addr2line::CreateWithSampledFunctions(
const string &binary_name, const map<uint64, uint64> *sampled_functions) {
Addr2line *addr2line = new Google3Addr2line(binary_name, sampled_functions);
if (!addr2line->Prepare()) {
delete addr2line;
return NULL;
Expand All @@ -66,9 +71,11 @@ Addr2line *Addr2line::Create(const string &binary_name) {
}
}

Google3Addr2line::Google3Addr2line(const string &binary_name)
Google3Addr2line::Google3Addr2line(const string &binary_name,
const map<uint64, uint64> *sampled_functions)
: Addr2line(binary_name), line_map_(new AddressToLineMap()),
inline_stack_handler_(NULL), elf_(new ElfReader(binary_name)) {}
inline_stack_handler_(NULL), elf_(new ElfReader(binary_name)),
sampled_functions_(sampled_functions) {}

Google3Addr2line::~Google3Addr2line() {
delete line_map_;
Expand Down Expand Up @@ -115,7 +122,7 @@ bool Google3Addr2line::Prepare() {
debug_ranges_size,
&reader);
inline_stack_handler_ = new InlineStackHandler(
&debug_ranges, sections, &reader);
&debug_ranges, sections, &reader, sampled_functions_);

// Extract the line information
// If .debug_info section is available, we will locate .debug_line using
Expand All @@ -126,7 +133,7 @@ bool Google3Addr2line::Prepare() {
while (debug_info_pos < debug_info_size) {
DirectoryVector dirs;
FileVector files;
CULineInfoHandler handler(&files, &dirs, line_map_);
CULineInfoHandler handler(&files, &dirs, line_map_, sampled_functions_);
inline_stack_handler_->set_directory_names(&dirs);
inline_stack_handler_->set_file_names(&files);
inline_stack_handler_->set_line_handler(&handler);
Expand Down
7 changes: 6 additions & 1 deletion addr2line.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class Addr2line {

static Addr2line *Create(const string &binary_name);

static Addr2line *CreateWithSampledFunctions(
const string &binary_name, const map<uint64, uint64> *sampled_functions);

// Reads the binary to prepare necessary binary in data.
// Returns True on success.
virtual bool Prepare() = 0;
Expand All @@ -54,7 +57,8 @@ typedef map<uint64, LineIdentifier> AddressToLineMap;

class Google3Addr2line : public Addr2line {
public:
explicit Google3Addr2line(const string &binary_name);
explicit Google3Addr2line(const string &binary_name,
const map<uint64, uint64> *sampled_functions);
virtual ~Google3Addr2line();
virtual bool Prepare();
virtual void GetInlineStack(uint64 address, SourceStack *stack) const;
Expand All @@ -63,6 +67,7 @@ class Google3Addr2line : public Addr2line {
AddressToLineMap *line_map_;
InlineStackHandler *inline_stack_handler_;
ElfReader *elf_;
const map<uint64, uint64> *sampled_functions_;
DISALLOW_COPY_AND_ASSIGN(Google3Addr2line);
};
} // namespace autofdo
Expand Down
21 changes: 17 additions & 4 deletions profile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,25 @@ void Profile::AggregatePerFunctionProfile() {
}
}

uint64 Profile::ProfileMaps::GetAggregatedCount() const {
uint64 ret = 0;

if (range_count_map.size() > 0) {
for (const auto &range_count : range_count_map) {
ret += range_count.second;
}
} else {
for (const auto &addr_count : address_count_map) {
ret += addr_count.second;
}
}
return ret;
}

void Profile::ProcessPerFunctionProfile(string func_name,
const ProfileMaps &maps) {
if (sample_reader_->GetAggregateSampleCount(
maps.start_addr - symbol_map_->base_addr(),
maps.end_addr - symbol_map_->base_addr())
<= sample_reader_->GetMaxCount() / FLAGS_sample_threshold) {
if (maps.GetAggregatedCount() <=
sample_reader_->GetMaxCount() / FLAGS_sample_threshold) {
return;
}

Expand Down
1 change: 1 addition & 0 deletions profile.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class Profile {
// Internal data structure that aggregates profile for each symbol.
struct ProfileMaps {
ProfileMaps(uint64 start, uint64 end) : start_addr(start), end_addr(end) {}
uint64 GetAggregatedCount() const;
uint64 start_addr;
uint64 end_addr;
AddressCountMap address_count_map;
Expand Down
8 changes: 6 additions & 2 deletions profile_creator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,18 @@ bool ProfileCreator::ReadSample(const string &input_profile_name,

bool ProfileCreator::CreateProfileFromSample(const string &output_profile_name,
const string &output_format) {
Addr2line *addr2line =Addr2line::Create(binary_);
SymbolMap symbol_map(binary_);
set<uint64> sampled_addrs = sample_reader_->GetSampledAddresses();;
map<uint64, uint64> sampled_functions =
symbol_map.GetSampledSymbolStartAddressSizeMap(sampled_addrs);
Addr2line *addr2line =Addr2line::CreateWithSampledFunctions(
binary_, &sampled_functions);

if (addr2line == NULL) {
LOG(ERROR) << "Error reading binary " << binary_;
return false;
}

SymbolMap symbol_map(binary_);
Profile profile(sample_reader_, binary_, addr2line, &symbol_map);
profile.ComputeProfile();

Expand Down
17 changes: 5 additions & 12 deletions sample_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,18 @@ string GetFileNameFromBuildID(quipper::PerfParser *parser,
} // namespace

namespace autofdo {
uint64 SampleReader::GetAggregateSampleCount(uint64 addr_low,
uint64 addr_high) const {
uint64 ret = 0;

set<uint64> SampleReader::GetSampledAddresses() const {
set<uint64> addrs;
if (range_count_map_.size() > 0) {
for (const auto &range_count : range_count_map_) {
if (range_count.first.first >= addr_low
&& range_count.first.first < addr_high) {
ret += range_count.second;
}
addrs.insert(range_count.first.first);
}
} else {
for (const auto &addr_count : address_count_map_) {
if (addr_count.first >= addr_low && addr_count.first < addr_high) {
ret += addr_count.second;
}
addrs.insert(addr_count.first);
}
}
return ret;
return addrs;
}

uint64 SampleReader::GetSampleCountOrZero(uint64 addr) const {
Expand Down
7 changes: 3 additions & 4 deletions sample_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#define AUTOFDO_SAMPLE_READER_H_

#include <map>
#include <set>
#include <string>
#include <vector>
#include <utility>
Expand Down Expand Up @@ -58,10 +59,8 @@ class SampleReader {
return branch_count_map_;
}

// For a given address range [addr_low, addr_high], returns the sum of
// aggregated counts for all instructions within the range. This is
// typically used to check if a function is hot enough to be disassembled.
uint64 GetAggregateSampleCount(uint64 addr_low, uint64 addr_high) const;
set<uint64> GetSampledAddresses() const;

// Returns the sample count for a given instruction.
uint64 GetSampleCountOrZero(uint64 addr) const;
// Returns the total sampled count.
Expand Down
25 changes: 24 additions & 1 deletion symbol_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ ProfileInfo& ProfileInfo::operator+=(const ProfileInfo &s) {
}

struct TargetCountCompare {
bool operator()(const TargetCountPair &t1, const TargetCountPair &t2) {
bool operator()(const TargetCountPair &t1, const TargetCountPair &t2) const {
if (t1.second != t2.second) {
return t1.second > t2.second;
} else {
Expand Down Expand Up @@ -561,4 +561,27 @@ void SymbolMap::ComputeWorkingSets() {
accumulated_count += num_inst * count;
}
}

::map<uint64, uint64> SymbolMap::GetSampledSymbolStartAddressSizeMap(
const set<uint64> &sampled_addrs) const {
// We depend on the fact that sampled_addrs is an ordered set.
::map<uint64, uint64> ret;
uint64 next_start_addr = 0;
for (const auto &addr : sampled_addrs) {
uint64 adjusted_addr = addr + base_addr_;
if (adjusted_addr < next_start_addr) {
continue;
}

AddressSymbolMap::const_iterator iter =
address_symbol_map_.upper_bound(adjusted_addr);
if (iter == address_symbol_map_.begin()) {
continue;
}
iter--;
ret.insert(make_pair(iter->first, iter->second.second));
next_start_addr = iter->first + iter->second.second;
}
return ret;
}
} // namespace autofdo
5 changes: 5 additions & 0 deletions symbol_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@ class SymbolMap {
// * re-groups the module from the updated module info.
void UpdateSymbolMap(const string &binary, const Addr2line *addr2line);

// Returns a map from start addresses of functions that have been sampled to
// the size of the function.
::map<uint64, uint64> GetSampledSymbolStartAddressSizeMap(
const set<uint64> &sampled_addrs) const;

void Dump() const;
void DumpFuncLevelProfileCompare(const SymbolMap &map) const;

Expand Down
72 changes: 71 additions & 1 deletion symbolize/addr2line_inlinestack.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,53 @@ bool InlineStackHandler::StartCompilationUnit(uint64 offset,
return true;
}

void InlineStackHandler::CleanupUnusedSubprograms() {
SubprogramsByOffsetMap* subprograms_by_offset =
subprograms_by_offset_maps_.back();
vector<const SubprogramInfo *> worklist;
for (const auto &offset_subprogram : *subprograms_by_offset) {
if (offset_subprogram.second->used()) {
worklist.push_back(offset_subprogram.second);
}
}

while (worklist.size()) {
const SubprogramInfo *info = worklist.back();
worklist.pop_back();
uint64 specification = info->specification();
uint64 abstract_origin = info->abstract_origin();
if (specification) {
SubprogramInfo *info =
subprograms_by_offset->find(specification)->second;
if (!info->used()) {
info->set_used();
worklist.push_back(info);
}
}
if (abstract_origin) {
SubprogramInfo *info =
subprograms_by_offset->find(abstract_origin)->second;
if (!info->used()) {
info->set_used();
worklist.push_back(info);
}
}
}

// Moves the actually used subprograms into a new map so that we can remove
// the entire original map to free memory.
SubprogramsByOffsetMap* new_map = new SubprogramsByOffsetMap();
for (const auto &offset_subprogram : *subprograms_by_offset) {
if (offset_subprogram.second->used()) {
new_map->insert(offset_subprogram);
} else {
delete offset_subprogram.second;
}
}
delete subprograms_by_offset;
subprograms_by_offset_maps_.back() = new_map;
}

bool InlineStackHandler::StartDIE(uint64 offset,
enum DwarfTag tag,
const AttributeList& attrs) {
Expand Down Expand Up @@ -93,9 +140,19 @@ void InlineStackHandler::EndDIE(uint64 offset) {
die_stack_.pop_back();
if (die == DW_TAG_subprogram ||
die == DW_TAG_inlined_subroutine) {
subprogram_insert_order_.push_back(subprogram_stack_.back());
// If the top level subprogram is used, we mark all subprograms in
// the subprogram_stack_ as used.
if (subprogram_stack_.front()->used()) {
subprogram_stack_.back()->set_used();
}
if (!sampled_functions_ || subprogram_stack_.front()->used()) {
subprogram_insert_order_.push_back(subprogram_stack_.back());
}
subprogram_stack_.pop_back();
}
if (die == DW_TAG_compile_unit && sampled_functions_ != NULL) {
CleanupUnusedSubprograms();
}
}

void InlineStackHandler::ProcessAttributeString(
Expand Down Expand Up @@ -171,6 +228,13 @@ void InlineStackHandler::ProcessAttributeUnsigned(
break;
case DW_AT_low_pc:
subprogram_stack_.back()->SetSingletonRangeLow(data);
// If a symbol's start address is in sampled_functions, we will
// mark the top level subprogram of this symbol as used.
if (sampled_functions_ != NULL &&
subprogram_stack_.size() == 1 &&
sampled_functions_->find(data) != sampled_functions_->end()) {
subprogram_stack_.front()->set_used();
}
break;
case DW_AT_high_pc:
subprogram_stack_.back()->SetSingletonRangeHigh(
Expand All @@ -181,6 +245,12 @@ void InlineStackHandler::ProcessAttributeUnsigned(
AddressRangeList::RangeList ranges;
address_ranges_->ReadRangeList(data, compilation_unit_base_, &ranges);
subprogram_stack_.back()->SwapAddressRanges(&ranges);
if (sampled_functions_ != NULL &&
subprogram_stack_.size() == 1 &&
sampled_functions_->find(AddressRangeList::RangesMin(&ranges))
!= sampled_functions_->end()) {
subprogram_stack_.front()->set_used();
}
break;
}
case DW_AT_decl_line: {
Expand Down