Skip to content

Commit

Permalink
metaserver: Allow re-replication of chunks only after they are really…
Browse files Browse the repository at this point in the history
… removed from evacuated server.
  • Loading branch information
Daniel R. Fiala committed Jul 31, 2020
1 parent f06e7ee commit 3528d7e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/cc/meta/ChunkServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,13 @@ class ChunkServer :
static int GetMaxChunkServerCount()
{ return sMaxChunkServerCount; }

bool IsRemovePending(chunkId_t const id) const
{ return mChunksRemovePending.Find(id); }
bool EraseRemovePending(chunkId_t const id)
{ return mChunksRemovePending.Erase(id); }
void SetRemovePending(chunkId_t const id)
{ mChunksRemovePending.Insert(id); }

protected:
ChunkServer(const NetConnectionPtr& conn, const string& peerName,
bool replayFlag = false);
Expand Down Expand Up @@ -1071,6 +1078,8 @@ class ChunkServer :

ChunkIdSet mChunksToEvacuate;

ChunkIdSet mChunksRemovePending;

/// Location of the server at which clients can
/// connect to
ServerLocation mLocation;
Expand Down
18 changes: 18 additions & 0 deletions src/cc/meta/LayoutManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11046,10 +11046,12 @@ LayoutManager::ReplicateChunk(
continue;
}
const char* reason = "none";
bool isEvacuated = false;
ChunkServerPtr dataServer;
if (iter != servers.end()) {
ChunkServer& ds = **iter;
reason = "evacuation";
isEvacuated = true;
if (recoveryInfo.HasRecovery()) {
reason = "evacuation recovery";
dataServer = c;
Expand Down Expand Up @@ -11122,6 +11124,9 @@ LayoutManager::ReplicateChunk(
SetReplicationState(clli,
CSMap::Entry::kStatePendingReplication);
}
if (isEvacuated) {
dataServer->SetRemovePending(clli.GetChunkId());
}
// Do not count synchronous failures.
if (cs.ReplicateChunk(clli.GetFileId(), clli.GetChunkId(),
dataServer, recoveryInfo, tier, maxSTier, recovIt,
Expand Down Expand Up @@ -11513,6 +11518,19 @@ LayoutManager::CanReplicateChunkNow(
" hibernated: " << hibernatedCount <<
" needed: " << extraReplicas <<
KFS_LOG_EOM;

for (auto const& srv: servers) {
if (srv->IsRemovePending(chunkId)) {
if (mChunkToServerMap.HasServer(srv, c)) {
KFS_LOG_STREAM_DEBUG << "Skipping CanReplicate; srv:" << srv->GetServerLocation() << " and chunk:" << chunkId << KFS_LOG_EOM;
return false;
} else {
srv->EraseRemovePending(chunkId);
KFS_LOG_STREAM_DEBUG << "Re-allowing CanReplicate; srv:" << srv->GetServerLocation() << " and chunk:" << chunkId << KFS_LOG_EOM;
}
}
}

if (readLeaseWaitFlag) {
SetReplicationState(c, CSMap::Entry::kStatePendingReplication);
return false;
Expand Down

0 comments on commit 3528d7e

Please sign in to comment.