Skip to content

Commit

Permalink
mem: Atomic ops to same address (#200)
Browse files Browse the repository at this point in the history
Augmenting the DataBlock class with a change log structure to record the
effects of atomic operations on a data block and service these changes
if the atomic operations require return values.

Although the operations are atomic, the coalescer need not send unique
memory requests for each operation. Atomic operations within a wavefront
to the same address are now coalesced into a single memory request. The
response of this request carries all the necessary information to
provide the requesting lanes unique values as a result of their
individual atomic operations. This helps reduce contention for request
and response queues in simulation.

Previously, only the final value of the datablock after all atomic ops
to the same address was visible to the requesting waves. This change
corrects this behavior by allowing each wave to see the effect of this
individual atomic op is a return value is necessary.
  • Loading branch information
BobbyRBruce committed Aug 31, 2023
2 parents fceb7e0 + f6a4533 commit 0e323bc
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 39 deletions.
66 changes: 61 additions & 5 deletions src/mem/ruby/common/DataBlock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,19 @@ namespace ruby

DataBlock::DataBlock(const DataBlock &cp)
{
m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
memcpy(m_data, cp.m_data, RubySystem::getBlockSizeBytes());
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
m_data = new uint8_t[block_bytes];
memcpy(m_data, cp.m_data, block_bytes);
m_alloc = true;
// If this data block is involved in an atomic operation, the effect
// of applying the atomic operations on the data block are recorded in
// m_atomicLog. If so, we must copy over every entry in the change log
for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
block_update = new uint8_t[block_bytes];
memcpy(block_update, cp.m_atomicLog[i], block_bytes);
m_atomicLog.push_back(block_update);
}
}

void
Expand All @@ -73,7 +83,20 @@ DataBlock::clear()
bool
DataBlock::equal(const DataBlock& obj) const
{
return !memcmp(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
size_t block_bytes = RubySystem::getBlockSizeBytes();
// Check that the block contents match
if (memcmp(m_data, obj.m_data, block_bytes)) {
return false;
}
if (m_atomicLog.size() != obj.m_atomicLog.size()) {
return false;
}
for (size_t i = 0; i < m_atomicLog.size(); i++) {
if (memcmp(m_atomicLog[i], obj.m_atomicLog[i], block_bytes)) {
return false;
}
}
return true;
}

void
Expand All @@ -92,7 +115,7 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask)
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
m_data[i] = dblk.m_data[i];
}
mask.performAtomic(m_data);
mask.performAtomic(m_data, m_atomicLog);
}

void
Expand All @@ -107,6 +130,28 @@ DataBlock::print(std::ostream& out) const
out << std::dec << "]" << std::flush;
}

int
DataBlock::numAtomicLogEntries() const
{
return m_atomicLog.size();
}
uint8_t*
DataBlock::popAtomicLogEntryFront()
{
assert(m_atomicLog.size() > 0);
auto ret = m_atomicLog.front();
m_atomicLog.pop_front();
return ret;
}
void
DataBlock::clearAtomicLogEntries()
{
for (auto log : m_atomicLog) {
delete [] log;
}
m_atomicLog.clear();
}

const uint8_t*
DataBlock::getData(int offset, int len) const
{
Expand Down Expand Up @@ -137,7 +182,18 @@ DataBlock::setData(PacketPtr pkt)
DataBlock &
DataBlock::operator=(const DataBlock & obj)
{
memcpy(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
// Copy entire block contents from obj to current block
memcpy(m_data, obj.m_data, block_bytes);
// If this data block is involved in an atomic operation, the effect
// of applying the atomic operations on the data block are recorded in
// m_atomicLog. If so, we must copy over every entry in the change log
for (size_t i = 0; i < obj.m_atomicLog.size(); i++) {
block_update = new uint8_t[block_bytes];
memcpy(block_update, obj.m_atomicLog[i], block_bytes);
m_atomicLog.push_back(block_update);
}
return *this;
}

Expand Down
13 changes: 13 additions & 0 deletions src/mem/ruby/common/DataBlock.hh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <inttypes.h>

#include <cassert>
#include <deque>
#include <iomanip>
#include <iostream>

Expand Down Expand Up @@ -71,6 +72,12 @@ class DataBlock
{
if (m_alloc)
delete [] m_data;

// If data block involved in atomic
// operations, free all meta data
for (auto log : m_atomicLog) {
delete [] log;
}
}

DataBlock& operator=(const DataBlock& obj);
Expand All @@ -80,6 +87,9 @@ class DataBlock
void clear();
uint8_t getByte(int whichByte) const;
const uint8_t *getData(int offset, int len) const;
uint8_t* popAtomicLogEntryFront();
int numAtomicLogEntries() const;
void clearAtomicLogEntries();
uint8_t *getDataMod(int offset);
void setByte(int whichByte, uint8_t data);
void setData(const uint8_t *data, int offset, int len);
Expand All @@ -94,6 +104,9 @@ class DataBlock
void alloc();
uint8_t *m_data;
bool m_alloc;

// Tracks block changes when atomic ops are applied
std::deque<uint8_t*> m_atomicLog;
};

inline void
Expand Down
22 changes: 22 additions & 0 deletions src/mem/ruby/common/WriteMask.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,27 @@ WriteMask::print(std::ostream& out) const
<< std::flush;
}

void
WriteMask::performAtomic(uint8_t * p,
std::deque<uint8_t*>& log) const
{
int offset;
uint8_t *block_update;
// Here, operations occur in FIFO order from the mAtomicOp
// vector. This is done to match the ordering of packets
// that was seen when the initial coalesced request was created.
for (int i = 0; i < mAtomicOp.size(); i++) {
// Save the old value of the data block in case a
// return value is needed
block_update = new uint8_t[mSize];
std::memcpy(block_update, p, mSize);
log.push_back(block_update);
// Perform the atomic operation
offset = mAtomicOp[i].first;
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(&p[offset]);
}
}

} // namespace ruby
} // namespace gem5
29 changes: 9 additions & 20 deletions src/mem/ruby/common/WriteMask.hh
Original file line number Diff line number Diff line change
Expand Up @@ -222,26 +222,15 @@ class WriteMask

void print(std::ostream& out) const;

void
performAtomic(uint8_t * p) const
{
for (int i = 0; i < mAtomicOp.size(); i++) {
int offset = mAtomicOp[i].first;
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(&p[offset]);
}
}

void
performAtomic(DataBlock & blk) const
{
for (int i = 0; i < mAtomicOp.size(); i++) {
int offset = mAtomicOp[i].first;
uint8_t *p = blk.getDataMod(offset);
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(p);
}
}
/*
* Performs atomic operations on the data block pointed to by p. The
* atomic operations to perform are in the vector mAtomicOp. The
* effect of each atomic operation is pushed to the atomicChangeLog
* so that each individual atomic requestor may see the results of their
* specific atomic operation.
*/
void performAtomic(uint8_t * p,
std::deque<uint8_t*>& atomicChangeLog) const;

const AtomicOpVector&
getAtomicOps() const
Expand Down
1 change: 1 addition & 0 deletions src/mem/ruby/protocol/GPU_VIPER-TCC.sm
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ machine(MachineType:TCC, "TCC Cache")
out_msg.isSLCSet := in_msg.isSLCSet;
}
}
cache_entry.DataBlk.clearAtomicLogEntries();
}

action(bar_sendBypassedAtomicResponse, "bar", desc="send bypassed Atomic Ack") {
Expand Down
2 changes: 2 additions & 0 deletions src/mem/ruby/protocol/RubySlicc_Exports.sm
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ structure(DataBlock, external = "yes", desc="..."){
void copyPartial(DataBlock, int, int);
void copyPartial(DataBlock, WriteMask);
void atomicPartial(DataBlock, WriteMask);
int numAtomicLogEntries();
void clearAtomicLogEntries();
}

bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
Expand Down
52 changes: 38 additions & 14 deletions src/mem/ruby/system/GPUCoalescer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -554,25 +554,48 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
success, isRegion);
// update the data
//
// MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
// MUST ADD DOING THIS FOR EACH REQUEST IN COALESCER
std::vector<PacketPtr> pktList = crequest->getPackets();

uint8_t* log = nullptr;
DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",
pktList.size(), request_line_address);
uint32_t offset;
int pkt_size;
for (auto& pkt : pktList) {
request_address = pkt->getAddr();
offset = getOffset(pkt->getAddr());
pkt_size = pkt->getSize();
if (pkt->getPtr<uint8_t>()) {
if ((type == RubyRequestType_LD) ||
(type == RubyRequestType_ATOMIC) ||
(type == RubyRequestType_ATOMIC_RETURN) ||
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
(type == RubyRequestType_Locked_RMW_Read) ||
(type == RubyRequestType_Load_Linked)) {
pkt->setData(
data.getData(getOffset(request_address), pkt->getSize()));
} else {
data.setData(pkt->getPtr<uint8_t>(),
getOffset(request_address), pkt->getSize());
switch(type) {
// Store and AtomicNoReturns follow the same path, as the
// data response is not needed.
case RubyRequestType_ATOMIC_NO_RETURN:
assert(pkt->isAtomicOp());
case RubyRequestType_ST:
data.setData(pkt->getPtr<uint8_t>(), offset, pkt_size);
break;
case RubyRequestType_LD:
pkt->setData(data.getData(offset, pkt_size));
break;
case RubyRequestType_ATOMIC_RETURN:
assert(pkt->isAtomicOp());
// Atomic operations are performed by the WriteMask
// in packet order, set by the crequest. Thus, when
// unpacking the changes from the log, we read from
// the front of the log to correctly map response
// data into the packets.

// Log entry contains the old value before the current
// atomic operation occurred.
log = data.popAtomicLogEntryFront();
pkt->setData(&log[offset]);
delete [] log;
log = nullptr;
break;
default:
panic("Unsupported ruby packet type:%s\n",
RubyRequestType_to_string(type));
break;
}
} else {
DPRINTF(MemoryAccess,
Expand All @@ -581,6 +604,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
RubyRequestType_to_string(type));
}
}
assert(data.numAtomicLogEntries() == 0);

m_outstanding_count--;
assert(m_outstanding_count >= 0);
Expand Down

0 comments on commit 0e323bc

Please sign in to comment.