Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 15 additions & 22 deletions src/cpu/o3/commit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,35 +165,27 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
ADD_STAT(committedInstType, statistics::units::Count::get(),
"Class of committed instruction"),
ADD_STAT(commitEligibleSamples, statistics::units::Cycle::get(),
"number cycles where commit BW limit reached")
{
using namespace statistics;
"number cycles where commit BW limit reached"),
ADD_STAT(committedInst, statistics::units::Count::get(),
"Required for Top-Down, number of committed instructions") {
using namespace statistics;

commitSquashedInsts.prereq(commitSquashedInsts);
commitNonSpecStalls.prereq(commitNonSpecStalls);
branchMispredicts.prereq(branchMispredicts);
commitSquashedInsts.prereq(commitSquashedInsts);
commitNonSpecStalls.prereq(commitNonSpecStalls);
branchMispredicts.prereq(branchMispredicts);

numCommittedDist
.init(0,commit->commitWidth,1)
.flags(statistics::pdf);
numCommittedDist.init(0, commit->commitWidth, 1).flags(statistics::pdf);

amos
.init(cpu->numThreads)
.flags(total);
amos.init(cpu->numThreads).flags(total);

membars
.init(cpu->numThreads)
.flags(total);
membars.init(cpu->numThreads).flags(total);

functionCalls
.init(commit->numThreads)
.flags(total);
functionCalls.init(commit->numThreads).flags(total);

committedInstType
.init(commit->numThreads,enums::Num_OpClass)
.flags(total | pdf | dist);
committedInstType.init(commit->numThreads, enums::Num_OpClass)
.flags(total | pdf | dist);

committedInstType.ysubnames(enums::OpClassStrings);
committedInstType.ysubnames(enums::OpClassStrings);
}

void
Expand Down Expand Up @@ -1104,6 +1096,7 @@ Commit::commitInsts()

DPRINTF(CommitRate, "%i\n", num_committed);
stats.numCommittedDist.sample(num_committed);
stats.committedInst += num_committed;

if (num_committed == commitWidth) {
stats.commitEligibleSamples++;
Expand Down
5 changes: 5 additions & 0 deletions src/cpu/o3/commit.hh
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,12 @@ class Commit

/** Number of cycles where the commit bandwidth limit is reached. */
statistics::Scalar commitEligibleSamples;
/** Top Down Methodology, Number of commited instructions*/
statistics::Scalar committedInst;
} stats;

public:
const CommitStats &getStats() const { return stats; }
};

} // namespace o3
Expand Down
195 changes: 194 additions & 1 deletion src/cpu/o3/cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,8 @@ CPU::CPUStats::CPUStats(CPU *cpu)
"to idling"),
ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
"Total number of cycles that CPU has spent quiesced or waiting "
"for an interrupt")
"for an interrupt"),
topDownStats(cpu)
{
// Register any of the O3CPU's stats here.
timesIdled
Expand All @@ -368,6 +369,198 @@ CPU::CPUStats::CPUStats(CPU *cpu)
.prereq(quiesceCycles);
}

CPU::CPUStats::TopDownStats::TopDownStats(CPU *cpu)
: statistics::Group(cpu, "TopDownStats"), topDownL1(cpu), topDownFbL2(cpu),
topDownBsL2(cpu), topDownBbL2(cpu), topDownBbMem(cpu) {}

CPU::CPUStats::TopDownStats::TopDownL1::TopDownL1(CPU *cpu)
: statistics::Group(cpu, "TopDownL1"),
ADD_STAT(frontendBound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Frontend Bound, fraction of slots lost due to frontend "
"undersupplying the backend"),
ADD_STAT(badSpeculation,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Bad Speculation, fraction of slots lost due to mispeculation"),
ADD_STAT(backendBound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Backend Bound, fraction of slots lost due to backend resource"
" constraints."),
ADD_STAT(
retiring,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you format it as done above

statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Retiring, fraction of slots successfully retired by the backend") {

// Total Slots
statistics::Temp totalSlots =
cpu->rename.getWidth() * cpu->baseStats.numCycles;

// L1 Frontend Bound
frontendBound = cpu->fetch.getStats().fetchBubbles / (totalSlots);

// L1 Bad Speculation
// Recovery cycles for mispredictions detected at Decode
int recoveryCycleToDecode = cpu->decode.getFetchToDecodeDelay();

auto decodeBranchMispred =
(int)recoveryCycleToDecode * cpu->decode.getStats().branchMispred;

// Recovery cycles for mispredictions detected at IEW
int recoveryCycleToIEW = cpu->decode.getFetchToDecodeDelay() +
cpu->rename.getDecodeToRenameDelay() +
cpu->iew.getRenameToIEWDelay();

auto iewBadSpec =
(int)recoveryCycleToIEW * (cpu->iew.getStats().branchMispredicts +
cpu->iew.getStats().memOrderViolationEvents);

// Number of wasted slots due to bad speculation
auto wastedSlots = cpu->rename.getStats().renamedInsts -
cpu->commit.getStats().committedInst;

badSpeculation = (wastedSlots + (decodeBranchMispred + iewBadSpec) *
cpu->rename.getWidth()) /
(totalSlots);

// L1 Retiring
retiring = cpu->commit.getStats().committedInst / (totalSlots);

// L1 Backend Bound
backendBound = 1 - (frontendBound + badSpeculation + retiring);
}

CPU::CPUStats::TopDownStats::TopDownFrontendBoundL2::TopDownFrontendBoundL2(
CPU *cpu)
: statistics::Group(cpu, "TopDownL2_FrontendBound"),
ADD_STAT(fetchLatency,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Fetch Latency Bound, frontend stalls due to instruction cache "
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its not only instruction cache but also TLB and BTB

"inefficiency"),
ADD_STAT(fetchBandwidth,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Fetch Bandwidth Bound, frontend stalls due to decoder "
"inefficiency") {
// Frontend L2
fetchLatency =
cpu->fetch.getStats().fetchBubblesMax / (cpu->baseStats.numCycles);
fetchBandwidth =
cpu->cpuStats.topDownStats.topDownL1.frontendBound - fetchLatency;
}

CPU::CPUStats::TopDownStats::TopDownBadSpeculationL2 ::TopDownBadSpeculationL2(
CPU *cpu)
: statistics::Group(cpu, "TopDownL2_BadSpeculation"),
ADD_STAT(branchMissPredicts,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Branch Miss Predicts"),
ADD_STAT(machineClears,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Memory Order Violations") {
auto &iewMissPred = cpu->iew.getStats().branchMispredicts;
auto &decodeMissPred = cpu->decode.getStats().branchMispred;
auto &memOrderViolations = cpu->iew.getStats().memOrderViolationEvents;

auto brMispredictFraction =
(iewMissPred + decodeMissPred) /
(iewMissPred + decodeMissPred + memOrderViolations);

branchMissPredicts = brMispredictFraction *
cpu->cpuStats.topDownStats.topDownL1.badSpeculation;

machineClears =
cpu->cpuStats.topDownStats.topDownL1.badSpeculation - branchMissPredicts;
}

CPU::CPUStats::TopDownStats::TopDownBackendBoundL2::TopDownBackendBoundL2(
CPU *cpu)
: statistics::Group(cpu, "TopDownL2_BackendBound"),
ADD_STAT(memoryBound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Memory Bound, backend stalls due to memory subsystem"),
ADD_STAT(
coreBound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Core Bound, backend stalls due to functional unit constraints") {
// Backend L2
executionStalls = (cpu->iew.instQueue.getStats().numInstsExec0 -
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not properly indented. Should be consistent. 4 spaces.
Same in the lines below and a lot of other places

cpu->rename.getStats().idleCycles +
cpu->iew.instQueue.getStats().numInstsExec1 +
cpu->iew.instQueue.getStats().numInstsExec2) /
(cpu->baseStats.numCycles);
auto memoryBoundRaw = (cpu->iew.instQueue.getStats().loadStallCycles +
cpu->rename.getStats().storeStalls) /
(cpu->baseStats.numCycles);
auto coreBoundRaw = executionStalls - memoryBoundRaw;

auto &totalBackendBound = cpu->cpuStats.topDownStats.topDownL1.backendBound;

memoryBound =
memoryBoundRaw / (memoryBoundRaw + coreBoundRaw) * (totalBackendBound);
coreBound =
coreBoundRaw / (memoryBoundRaw + coreBoundRaw) * (totalBackendBound);
}

CPU::CPUStats::TopDownStats::TopDownBackendBoundL3::TopDownBackendBoundL3(
CPU *cpu)
: statistics::Group(cpu, "TopDownL3_BackendBound_MemoryBound"),
ADD_STAT(l1Bound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"L1 Cache Bound"),
ADD_STAT(l2Bound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"L2 Cache Bound"),
ADD_STAT(l3Bound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"L3 Cache Bound"),
ADD_STAT(extMemBound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"External Memory Bound"),
ADD_STAT(storeBound,
statistics::units::Rate<statistics::units::Count,
statistics::units::Count>::get(),
"Store Bound") {

auto &totalBackendBound = cpu->cpuStats.topDownStats.topDownBbL2.memoryBound;

// Backend Bound / Memory Bound L3
auto l1BoundRaw = (cpu->iew.instQueue.getStats().loadStallCycles -
cpu->iew.instQueue.getStats().L1miss) /
(cpu->baseStats.numCycles);
auto l2BoundRaw = (cpu->iew.instQueue.getStats().L1miss -
cpu->iew.instQueue.getStats().L2miss) /
(cpu->baseStats.numCycles);
auto l3BoundRaw = (cpu->iew.instQueue.getStats().L2miss -
cpu->iew.instQueue.getStats().L3miss) /
(cpu->baseStats.numCycles);
auto extMemBoundRaw =
(cpu->iew.instQueue.getStats().L3miss) / (cpu->baseStats.numCycles);
auto storeBoundRaw =
(cpu->rename.getStats().storeStalls) / (cpu->baseStats.numCycles);

auto totalMemoryBound =
l1BoundRaw + l2BoundRaw + l3BoundRaw + extMemBoundRaw + storeBoundRaw;

l1Bound = l1BoundRaw / totalMemoryBound * totalBackendBound;
l2Bound = l2BoundRaw / totalMemoryBound * totalBackendBound;
l3Bound = l3BoundRaw / totalMemoryBound * totalBackendBound;
extMemBound = extMemBoundRaw / totalMemoryBound * totalBackendBound;
storeBound = storeBoundRaw / totalMemoryBound * totalBackendBound;
}

void
CPU::tick()
{
Expand Down
50 changes: 50 additions & 0 deletions src/cpu/o3/cpu.hh
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,56 @@ class CPU : public BaseCPU
/** Stat for total number of cycles the CPU spends descheduled due to a
* quiesce operation or waiting for an interrupt. */
statistics::Scalar quiesceCycles;

struct TopDownStats : statistics::Group {
TopDownStats(CPU *cpu);

struct TopDownL1 : statistics::Group {
TopDownL1(CPU *cpu);
statistics::Formula frontendBound;
statistics::Formula badSpeculation;
statistics::Formula backendBound;
statistics::Formula retiring;
} topDownL1;

struct TopDownFrontendBoundL2 : statistics::Group {
TopDownFrontendBoundL2(CPU *cpu);
statistics::Formula fetchLatency;
statistics::Formula fetchBandwidth;
} topDownFbL2;

struct TopDownBadSpeculationL2 : statistics::Group{
TopDownBadSpeculationL2(CPU *cpu);
statistics::Formula branchMissPredicts;
statistics::Formula machineClears;
} topDownBsL2;

struct TopDownBackendBoundL2 : statistics::Group {
TopDownBackendBoundL2(CPU *cpu);
statistics::Formula executionStalls;
statistics::Formula memoryBound;
statistics::Formula coreBound;
} topDownBbL2;

struct TopDownBackendBoundL3 : statistics::Group {
TopDownBackendBoundL3(CPU *cpu);
statistics::Formula l1Bound;
statistics::Formula l2Bound;
statistics::Formula l3Bound;
statistics::Formula extMemBound;
statistics::Formula storeBound;
} topDownBbMem;

// struct TopDownFrontendBoundL3 : statistics::Group {
// TopDownFrontendBoundL3(CPU *cpu);
// statistics::Formula iTlbMiss;
// statistics::Formula iCacheMiss;
// statistics::Formula branchResteer;
// statistics::Formula others;
// } topDownFlL3;

} topDownStats;

} cpuStats;

public:
Expand Down
Loading