Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race condition on Publisher shutdown #2812

Merged
merged 3 commits into from
Aug 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 79 additions & 29 deletions gazebo/transport/Publisher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,53 @@ using namespace transport;

uint32_t Publisher::idCounter = 0;

namespace gazebo
{
/// \brief Private class for Publisher
class PublisherPrivate
{
/// \brief Callback when a publish is completed
/// \param[in] _id ID associated with the publication.
public: void OnPublishComplete(uint32_t _pubId);

/// \brief Current publication ids.
public: std::map<uint32_t, int> pubIds;

/// \brief Mutex to protect pubIds
public: std::mutex mutex;
};
}

/// \brief Mutex to protect publisherMap
static std::mutex pubMapMutex;

/// \brief A map of Publisher id and its shared pointer
/// The PublisherPrivate object has to be stored in a global static map
/// and not within the Publisher class. This is because we need to keep the
/// PublisherPrivate object alive for the caller of the OnPublisherComplete,
/// otherwise we run into a situation in which the caller invokes a
/// function of a destroyed object
static std::map<uint32_t, std::shared_ptr<PublisherPrivate>> publisherMap;

//////////////////////////////////////////////////
void PublisherPrivate::OnPublishComplete(uint32_t _id)
{
try
{
// This is the deeply unsatisfying way of dealing with a race
// condition where the publisher is destroyed before all
// OnPublishComplete callbacks are fired.
std::lock_guard<std::mutex> lock(this->mutex);
std::map<uint32_t, int>::iterator iter = this->pubIds.find(_id);
if (iter != this->pubIds.end() && (--iter->second) <= 0)
this->pubIds.erase(iter);
}
catch(...)
{
return;
}
}

//////////////////////////////////////////////////
Publisher::Publisher(const std::string &_topic, const std::string &_msgType,
unsigned int _limit, double _hzRate)
Expand All @@ -43,6 +90,9 @@ Publisher::Publisher(const std::string &_topic, const std::string &_msgType,
this->queueLimitWarned = false;
this->pubId = 0;
this->id = ++idCounter;

std::lock_guard<std::mutex> lock(pubMapMutex);
publisherMap[this->id] = std::make_shared<PublisherPrivate>();
}

//////////////////////////////////////////////////
Expand Down Expand Up @@ -160,15 +210,32 @@ void Publisher::SendMessage()
std::list<MessagePtr> localBuffer;
std::list<uint32_t> localIds;

std::shared_ptr<PublisherPrivate> pubDataPtr;
{
std::lock_guard<std::mutex> lock2(pubMapMutex);
pubDataPtr = publisherMap[this->id];
}

{
boost::mutex::scoped_lock lock(this->mutex);
if (!this->pubIds.empty() || this->messages.empty())
if (this->messages.empty())
return;

{
std::lock_guard<std::mutex> lock2(pubDataPtr->mutex);
if (!pubDataPtr->pubIds.empty())
return;
}

for (unsigned int i = 0; i < this->messages.size(); ++i)
{
this->pubId = (this->pubId + 1) % 10000;
this->pubIds[this->pubId] = 0;

{
std::lock_guard<std::mutex> lock2(pubDataPtr->mutex);
pubDataPtr->pubIds[this->pubId] = 0;
}

localIds.push_back(this->pubId);
}

Expand All @@ -188,12 +255,14 @@ void Publisher::SendMessage()
{
// Send the latest message.
int result = this->publication->Publish(*iter,
boost::bind(&Publisher::OnPublishComplete, this, _1), *pubIter);
std::bind(&PublisherPrivate::OnPublishComplete,
pubDataPtr, std::placeholders::_1), *pubIter);

std::lock_guard<std::mutex> lock2(pubDataPtr->mutex);
if (result > 0)
this->pubIds[*pubIter] = result;
pubDataPtr->pubIds[*pubIter] = result;
else
this->pubIds.erase(*pubIter);
pubDataPtr->pubIds.erase(*pubIter);
}

// Clear the local buffer.
Expand Down Expand Up @@ -227,30 +296,6 @@ std::string Publisher::GetMsgType() const
return this->msgType;
}

//////////////////////////////////////////////////
void Publisher::OnPublishComplete(uint32_t _id)
{
// A null node indicates that the publisher may have been destroyed
// so do not do anything
if (!this->node)
return;

try {
// This is the deeply unsatisfying way of dealing with a race
// condition where the publisher is destroyed before all
// OnPublishComplete callbacks are fired.
boost::mutex::scoped_lock lock(this->mutex);

std::map<uint32_t, int>::iterator iter = this->pubIds.find(_id);
if (iter != this->pubIds.end() && (--iter->second) <= 0)
this->pubIds.erase(iter);
}
catch(...)
{
return;
}
}

//////////////////////////////////////////////////
void Publisher::SetPublication(PublicationPtr _publication)
{
Expand All @@ -267,6 +312,11 @@ void Publisher::Fini()
if (!this->topic.empty())
TopicManager::Instance()->Unadvertise(this->topic, this->id);

{
std::lock_guard<std::mutex> lock(pubMapMutex);
publisherMap.erase(this->id);
}

this->node.reset();
}

Expand Down
5 changes: 1 addition & 4 deletions gazebo/transport/Publisher.hh
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,6 @@ namespace gazebo
private: void PublishImpl(const google::protobuf::Message &_message,
bool _block);

/// \brief Callback when a publish is completed
/// \param[in] _id ID associated with the publication.
private: void OnPublishComplete(uint32_t _id);

/// \brief Topic on which messages are published.
private: std::string topic;

Expand Down Expand Up @@ -173,6 +169,7 @@ namespace gazebo
private: uint32_t pubId;

/// \brief Current publication ids.
/// This variable is no longer used. Kept here for ABI compatibility.
private: std::map<uint32_t, int> pubIds;

/// \brief Unique ID for this publisher.
Expand Down