Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

MB-5969: Add vBucketServerMap santity checking

Also, set verbose and debug mode default as true
  • Loading branch information...
commit d3e8d7a0c1b76ea417ca4029e518cada38768cc4 1 parent 31224a2
@bcui6611 bcui6611 authored
View
2  cbhealthchecker
@@ -24,7 +24,7 @@ def parse_opt():
time_file_name = time.strftime('%Y-%m-%d_%H-%M-%S')
(bucket, cluster, user, password, txtfile, htmlfile, statsfile, verbose, debug,
inputfile, scale, statsonly) = ('all', '', '', '', time_file_name + '.txt',
- time_file_name + '.html', time_file_name + '.json', False, False, '', 'day', False)
+ time_file_name + '.html', time_file_name + '.json', True, True, '', 'day', False)
try:
(opts, _args) = getopt.getopt(sys.argv[1:],
View
76 cluster_stats.py
@@ -334,6 +334,63 @@ def run(self, accessor, scale, threshold=None):
result[bucket] = trend
return result
+class VbucketMapSanity:
+ def run(self, accessor, scale, threshold=None):
+ result = {}
+ for bucket, bucketinfo in stats_buffer.bucket_info.iteritems():
+ num_error = []
+ trend = []
+ numReplica = bucketinfo['vBucketServerMap']['numReplicas']
+ vbucketMap = bucketinfo['vBucketServerMap']['vBucketMap']
+ len_serverMap = len(bucketinfo['vBucketServerMap']['serverList'])
+ # check one - vbucket map length
+ len_map = len(vbucketMap)
+ if len_map != accessor["threshold"]:
+ symptom = "vBucketMap length {0} is not equal to {1}".format(len_map, accessor["threshold"])
+ num_error.append({"node" : bucket, "value" : symptom})
+
+ correct_len = numReplica + 1
+ for vbucket in vbucketMap:
+ if type(vbucket) is list:
+ len_element = len(vbucket)
+ #check two - each vbucket map correctness
+ if len_element != correct_len:
+ symptom = "vBucketMap element length {0} is not consistent to replica {1}".format(len_element, numReplica)
+ num_error.append({"node" : bucket, "value" : symptom})
+ for element in vbucket:
+ #check three - each vbucket index correctness
+ if element > len_serverMap - 1:
+ symptom = "vBucketMap element server index {0} can not be found in server list".format(element)
+ num_error.append({"node" : bucket, "value" : symptom})
+ #check four - check unqiueness for vbucket
+ new_set = set(vbucket)
+ if len(new_set) < len_element:
+ symptom = "vBucketMap element {0} violates index uniqueness".format(vbucket)
+ num_error.append({"node" : bucket, "value" : symptom})
+ if len(num_error) > 0:
+ trend.append(("error", num_error))
+ result[bucket] = trend
+
+ return result
+
+
+class VbucketServerListSanity:
+ def run(self, accessor, scale, threshold=None):
+ result = {}
+ for bucket, bucketinfo in stats_buffer.bucket_info.iteritems():
+ num_error = []
+ trend = []
+ serverMap = bucketinfo['vBucketServerMap']['serverList']
+ new_set = set(serverMap)
+ if len(new_set) < len(serverMap):
+ symptom = "vBucketMap server list {0} violates node uniqueness".format(serverMap)
+ num_error.append({"node" : bucket, "value" : symptom})
+ if len(num_error) > 0:
+ trend.append(("error", num_error))
+ result[bucket] = trend
+
+ return result
+
class RebalanceStuck:
def run(self, accessor, scale, threshold=None):
result = {}
@@ -583,6 +640,25 @@ def run(self, accessor, scale, threshold=None):
"perBucket" : True,
"perNode" : True,
},
+ {"name" : "VBucketServerMap",
+ "ingredients" : [
+ {
+ "name" : "vbucketMap",
+ "description" : "Sanity checks for vBucket map",
+ "code" : "VbucketMapSanity",
+ "threshold" : 1024,
+ "formula" : "",
+ },
+ {
+ "name" : "vbucketServerList",
+ "description" : "Sanity checks for vBucket server list",
+ "code" : "VbucketServerListSanity",
+ "formula" : "",
+ },
+ ],
+ "indicator" : True,
+ "perBucket" : True,
+ },
{"name" : "MemoryUsage",
"ingredients" : [
{
View
2  collector.py
@@ -135,7 +135,7 @@ def get_bucketlist(self, server, port, user, password, bucketname, opts):
bucketinfo['numReplica'] = bucket['replicaNumber']
bucketinfo['ramQuota'] = bucket['quota']['ram']
bucketinfo['master'] = server
-
+ bucketinfo['vBucketServerMap'] = bucket['vBucketServerMap']
bucketStats = bucket['basicStats']
bucketinfo['bucketStats'] = {}
for key in bucketStats.iterkeys():
View
5 prescription.py
@@ -17,6 +17,11 @@
"impact" : "Data is missing and unavailable. Writes to missing vBuckets will fail.",
"action" : "Run rebalance to recreate missing vBuckets. If issue persists please contact support@couchbase.com",
},
+ "VBucketServerMap" : {
+ "cause" : "vBucketServerMap sanity checking fails",
+ "impact" : "Rebalance may fail",
+ "action" : "Please contact support@couchbase.com",
+ },
"RebalancePerformance" : {
"cause" : "Amount of data that should be moved between nodes, called TAP Queue, is is higher than threshold.",
"impact" : "Rebalances will take long time, freeze, or may fail due to timeout.",
Please sign in to comment.
Something went wrong with that request. Please try again.