Skip to content

Commit

Permalink
Make sure grape-engine exit when CTRL-C without crash (#2761)
Browse files Browse the repository at this point in the history
## What do these changes do?

- Reset the pointer to prevent double-shutdown which causes crash inside
grpc
- Terminate dispatcher (and its queues) correctly when closing

## Related issue number

Fixes #2749

Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com>
  • Loading branch information
sighingnow committed May 29, 2023
1 parent df16284 commit e4bb069
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 7 deletions.
3 changes: 3 additions & 0 deletions analytical_engine/core/grape_engine.cc
Expand Up @@ -98,16 +98,19 @@ class GrapeEngine {
LOG(INFO) << "grape-engine (master) RPC server is stopping...";
rpc_server_->StopServer();
service_thread_.join();
rpc_server_.reset();
}

if (dispatcher_) {
LOG(INFO) << "grape-engine dispatcher is stopping...";
dispatcher_->Stop();
dispatcher_.reset();
}

if (vineyard_server_) {
LOG(INFO) << "vineyardd instance is stopping...";
vineyard_server_->Stop();
vineyard_server_.reset();
}
}

Expand Down
1 change: 1 addition & 0 deletions analytical_engine/core/launcher.cc
Expand Up @@ -113,6 +113,7 @@ void VineyardServer::Stop() {
if (proc_ && proc_->valid()) {
kill(proc_->id(), SIGTERM);
proc_->wait();
proc_.reset();
}
}

Expand Down
25 changes: 20 additions & 5 deletions analytical_engine/core/server/dispatcher.cc
Expand Up @@ -50,7 +50,9 @@ Dispatcher::Dispatcher(const grape::CommSpec& comm_spec)
auto publisher = comm_spec_.worker_id() == grape::kCoordinatorRank;
// we use blocking queue as synchronizer
if (publisher) {
cmd_queue_.SetProducerNum(1);
cmd_queue_.SetLimit(1);
result_queue_.SetProducerNum(1);
result_queue_.SetLimit(1);
}
}
Expand All @@ -71,12 +73,22 @@ void Dispatcher::Start() {
}
}

void Dispatcher::Stop() { running_ = false; }
void Dispatcher::Stop() {
running_ = false;
if (!cmd_queue_.End()) {
cmd_queue_.DecProducerNum();
}
if (!result_queue_.End()) {
result_queue_.DecProducerNum();
}
}

std::vector<DispatchResult> Dispatcher::Dispatch(
std::shared_ptr<CommandDetail> cmd) {
cmd_queue_.Push(cmd);
return result_queue_.Pop();
cmd_queue_.Put(cmd);
std::vector<DispatchResult> results;
result_queue_.Get(results);
return results;
}

void Dispatcher::Subscribe(std::shared_ptr<Subscriber> subscriber) {
Expand Down Expand Up @@ -178,7 +190,10 @@ void Dispatcher::subscriberPreprocessCmd(rpc::OperationType type,
void Dispatcher::publisherLoop() {
CHECK_EQ(comm_spec_.worker_id(), grape::kCoordinatorRank);
while (running_) {
auto cmd = cmd_queue_.Pop();
std::shared_ptr<CommandDetail> cmd;
if (!cmd_queue_.Get(cmd)) {
break;
}
grape::sync_comm::Bcast(cmd->type, grape::kCoordinatorRank, MPI_COMM_WORLD);
publisherPreprocessCmd(cmd);
// process local event
Expand All @@ -188,7 +203,7 @@ void Dispatcher::publisherLoop() {
results[0] = std::move(*r);
vineyard::_GatherR(results, comm_spec_.comm());

result_queue_.Push(std::move(results));
result_queue_.Put(std::move(results));
}
}

Expand Down
4 changes: 2 additions & 2 deletions analytical_engine/core/server/dispatcher.h
Expand Up @@ -176,8 +176,8 @@ class Dispatcher {
bool running_;
grape::CommSpec comm_spec_;
std::shared_ptr<Subscriber> subscriber_;
vineyard::BlockingQueue<std::shared_ptr<CommandDetail>> cmd_queue_;
vineyard::BlockingQueue<std::vector<DispatchResult>> result_queue_;
vineyard::PCBlockingQueue<std::shared_ptr<CommandDetail>> cmd_queue_;
vineyard::PCBlockingQueue<std::vector<DispatchResult>> result_queue_;
};

} // namespace gs
Expand Down

0 comments on commit e4bb069

Please sign in to comment.