Skip to content

Commit

Permalink
[BP] MB-39613: Unbalanced fts cluster with insufficient replica count
Browse files Browse the repository at this point in the history
With a higher replica count and insufficient nodes in a cluster,
FTS will always reports itself as unbalanced cluster to cluster
manager to result in an active rebalance button.
This can lead to users or automation constantly rebalancing the
cluster and in the worst cases not working at all
(as they'll never move past the rebalance stage)

Change-Id: Icb9b4028e39ca49fdcaac7708e6050c37f009f09
Reviewed-on: http://review.couchbase.org/c/cbgt/+/130105
Well-Formed: Build Bot <build@couchbase.com>
Reviewed-by: Abhinav Dangeti <abhinav@couchbase.com>
Tested-by: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Reviewed-on: http://review.couchbase.org/c/cbgt/+/130609
Tested-by: Abhinav Dangeti <abhinav@couchbase.com>
  • Loading branch information
sreekanth-cb authored and abhinavdangeti committed Jun 17, 2020
1 parent 9595b8f commit 623cf2f
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
13 changes: 12 additions & 1 deletion manager_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,21 @@ func (mgr *Manager) CreateIndex(sourceType,
// Validate maxReplicasAllowed here.
maxReplicasAllowed, _ := strconv.Atoi(mgr.Options()["maxReplicasAllowed"])
if planParams.NumReplicas < 0 || planParams.NumReplicas > maxReplicasAllowed {
return fmt.Errorf("manager_api: CreateIndex, maxReplicasAllowed:"+
return "", fmt.Errorf("manager_api: CreateIndex failed, maxReplicasAllowed:"+
" '%v', but request for '%v'", maxReplicasAllowed, planParams.NumReplicas)
}

nodeDefs, _, err := CfgGetNodeDefs(mgr.cfg, NODE_DEFS_KNOWN)
if err != nil {
return "", fmt.Errorf("manager_api: CreateIndex failed, "+
"CfgGetNodeDefs err: %v", err)
}
if len(nodeDefs.NodeDefs) < planParams.NumReplicas+1 {
return "", fmt.Errorf("manager_api: CreateIndex failed, cluster needs %d "+
"search nodes to support the requested replica count of %d",
planParams.NumReplicas+1, planParams.NumReplicas)
}

tries := 0
version := CfgGetVersion(mgr.cfg)
for {
Expand Down
33 changes: 33 additions & 0 deletions manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,23 @@ func TestManagerPIndexRestartWithFeedAllotmentOptionChange(t *testing.T) {

}

func registerNode(nodeDef *NodeDef, kind string, m *Manager) error {
nodeDefs, cas, err := CfgGetNodeDefs(m.cfg, kind)
if err != nil {
return err
}
if nodeDefs == nil {
nodeDefs = NewNodeDefs(m.version)
}

nodeDefs.UUID = NewUUID()
nodeDefs.NodeDefs[nodeDef.UUID] = nodeDef
nodeDefs.ImplVersion = CfgGetVersion(m.cfg)

_, err = CfgSetNodeDefs(m.cfg, kind, nodeDefs, cas)
return err
}

func TestManagerPIndexRestartWithReplicaCountChange(t *testing.T) {
emptyDir, _ := ioutil.TempDir("./tmp", "test")
defer os.RemoveAll(emptyDir)
Expand Down Expand Up @@ -1391,6 +1408,15 @@ func TestManagerPIndexRestartWithReplicaCountChange(t *testing.T) {
feeds, pindexes)
}

err = registerNode(&NodeDef{
HostPort: "2",
UUID: "2",
ImplVersion: VERSION,
}, NODE_DEFS_KNOWN, m)
if err != nil {
t.Errorf("failed err: %v", err)
}

// update the replicaCount to "1"
planParams = PlanParams{
MaxPartitionsPerPIndex: 1,
Expand Down Expand Up @@ -1429,6 +1455,13 @@ func TestManagerPIndexRestartWithReplicaCountChange(t *testing.T) {
m.stats.TotJanitorRestartPIndex)
}

err = registerNode(&NodeDef{
HostPort: "3",
UUID: "3", ImplVersion: VERSION}, NODE_DEFS_KNOWN, m)
if err != nil {
t.Errorf("failed err: %v", err)
}

// update the replicaCount to "2"
planParams = PlanParams{
MaxPartitionsPerPIndex: 1,
Expand Down

0 comments on commit 623cf2f

Please sign in to comment.