Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Generate relative feature complete report

  • Loading branch information...
commit 63f75e9bab699961b3a86f3faff1d95a6cff9db5 1 parent e9073c3
@bcui6611 bcui6611 authored
View
4 Makefile.am
@@ -4,9 +4,9 @@ default:
pythonlibdir=$(libdir)/python
-pythonlib_SCRIPTS= cbworkloadgen
+pythonlib_SCRIPTS= healthChecker
-PYTHON_TOOLS= wrapper/cbworkloadgen
+PYTHON_TOOLS= wrapper/healthChecker
${PYTHON_TOOLS}: wrapper/wrapper
cp $< $@
View
0  README
No changes.
View
148 analyzer.py
@@ -1,12 +1,11 @@
+import sys
import datetime
-import dbaccessor
-import util
+import logging
+import util_cli as util
import cluster_stats
-import bucket_stats
import diskqueue_stats
import node_stats
-
import stats_buffer
from Cheetah.Template import Template
@@ -29,68 +28,45 @@
cluster_symptoms = {}
bucket_symptoms = {}
bucket_node_symptoms = {}
+bucket_node_status = {}
node_symptoms = {}
indicator_error = {}
indicator_warn = {}
node_disparate = {}
-def format_output(counter, result):
- if len(result) == 1:
- if counter.has_key("unit") and counter["unit"] == "GB":
- return util.pretty_float(result[0])
- else:
- return result[0]
- else:
- return result
-
class StatsAnalyzer:
- def __init__(self):
- self.accessor = dbaccessor.DbAccesor()
+ def __init__(self, log):
+ self.log = log
def run_analysis(self):
- self.accessor.connect_db()
- self.accessor.browse_db()
for bucket in stats_buffer.buckets.iterkeys():
bucket_list.append(bucket)
bucket_symptoms[bucket] = []
bucket_node_symptoms[bucket] = {}
+ bucket_node_status[bucket] = {}
for capsule, package_name in capsules:
for pill in capsule:
- #print pill['name']
+ self.log.debug(pill['name'])
for counter in pill['ingredients']:
- if counter['type'] == 'SQL':
- result = eval("{0}.{1}().run(self.accessor, \"{2}\")".format(package_name, counter['code'], counter['stmt']))
- elif counter['type'] == 'pythonSQL':
- result = eval("{0}.{1}().run(self.accessor)".format(package_name, counter['code']))
- elif counter['type'] == 'python':
- result = eval("{0}.{1}().run(counter)".format(package_name, counter['code']))
-
- #if counter.has_key("unit") and counter["unit"] == "GB":
- # util.pretty_print({counter["description"] : result})
- #else:
- # util.pretty_print({counter["description"] : result})
+ result = eval("{0}.{1}().run(counter)".format(package_name, counter['code']))
- #print counter
+ self.log.debug(counter)
if pill.has_key("clusterwise") and pill["clusterwise"] :
if isinstance(result, dict):
if result.has_key("cluster"):
- if counter.has_key("unit") and counter["unit"] == "GB":
- cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value": util.humanize_bytes(result["cluster"])}
- else:
- cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result["cluster"]}
+ cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result["cluster"]}
else:
cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result}
else:
cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result}
if pill.has_key("perBucket") and pill["perBucket"] :
- #bucket_symptoms[counter["name"]] = {"description" : counter["description"], "value":result}
for bucket, values in result.iteritems():
if bucket == "cluster":
continue
for val in values:
- if val[0] == "variance":
+ if val[0] == "variance" or val[0] == "error":
continue
elif val[0] == "total":
bucket_symptoms[bucket].append({"description" : counter["description"], "value" : values[-1][1]})
@@ -104,14 +80,49 @@ def run_analysis(self):
if pill.has_key("nodewise") and pill["nodewise"]:
node_list[counter["name"]] = {"description" : counter["description"], "value":result}
- if pill.has_key("indicator") and pill["indicator"] :
+ if pill.has_key("indicator"):
if len(result) > 0:
for bucket,values in result.iteritems():
- if values.has_key("error"):
- indicator_error[counter["name"]] = {"description" : counter["description"], "bucket": bucket, "value":values["error"]}
- if values.has_key("warn"):
- indicator_warn[counter["name"]] = {"description" : counter["description"], "bucket": bucket, "value":values["warn"]}
-
+ if type(values) is dict:
+ if values.has_key("error"):
+ indicator_error[counter["name"]] = {"description" : counter["description"],
+ "bucket": bucket,
+ "value":values["error"],
+ "cause" : pill["indicator"]["cause"],
+ "impact" : pill["indicator"]["impact"],
+ "action" : pill["indicator"]["action"],
+ }
+ for val in values["error"]:
+ bucket_node_status[bucket][val["node"]] = "error"
+
+ if values.has_key("warn"):
+ indicator_warn[counter["name"]] = {"description" : counter["description"],
+ "bucket": bucket,
+ "value":values["warn"],
+ "cause" : pill["indicator"]["cause"],
+ "impact" : pill["indicator"]["impact"],
+ "action" : pill["indicator"]["action"],
+ }
+ elif type(values) is list:
+ for val in values:
+ if val[0] == "error":
+ indicator_error[counter["name"]] = {"description" : counter["description"],
+ "bucket": bucket,
+ "value":val[1],
+ "cause" : pill["indicator"]["cause"],
+ "impact" : pill["indicator"]["impact"],
+ "action" : pill["indicator"]["action"],
+ }
+ for val in values["error"]:
+ bucket_node_status[bucket][val["node"]] = "error"
+ elif val[0] == "warn":
+ indicator_warn[counter["name"]] = {"description" : counter["description"],
+ "bucket": bucket,
+ "value":val[1],
+ "cause" : pill["indicator"]["cause"],
+ "impact" : pill["indicator"]["impact"],
+ "action" : pill["indicator"]["action"],
+ }
if pill.has_key("nodeDisparate") and pill["nodeDisparate"] :
for bucket,values in result.iteritems():
if bucket == "cluster":
@@ -121,42 +132,49 @@ def run_analysis(self):
continue;
if val[0] == "variance" and val[1] != 0:
node_disparate[counter["name"]] = {"description" : counter["description"], "bucket": bucket, "value":values}
-
- self.accessor.close()
- self.accessor.remove_db()
-
- def run_report(self):
+
+ if len(indicator_error) > 0:
+ globals["cluster_health"] = "error"
+ elif len(indicator_warn) > 0:
+ globals["cluster_health"] = "warning"
+
+ def run_report(self, txtfile, htmlfile, verbose):
dict = {
"globals" : globals,
"cluster_symptoms" : cluster_symptoms,
"bucket_symptoms" : bucket_symptoms,
"bucket_node_symptoms" : bucket_node_symptoms,
+ "bucket_node_status" : bucket_node_status,
"node_symptoms" : node_symptoms,
"node_list" : node_list,
"bucket_list" : bucket_list,
"indicator_warn" : indicator_warn,
"indicator_error" : indicator_error,
+ "verbose" : verbose,
}
- debug = True
- if debug:
- print "Nodelist Overview"
- util.pretty_print(node_list)
+ f = open(txtfile, 'w')
+ report = {}
+ report["Report Time"] = globals["report_time"].strftime("%Y-%m-%d %H:%M:%S")
+
+ report["Nodelist Overview"] = node_list
- print "Cluster Overview"
- util.pretty_print(cluster_symptoms)
-
- print "Bucket Metrics"
- util.pretty_print(bucket_symptoms)
-
- print "Bucket Node Metrics"
- util.pretty_print(bucket_node_symptoms)
+ report["Cluster Overview"] = cluster_symptoms
+
+ report["Bucket Metrics"] = bucket_symptoms
+
+ report["Bucket Node Metrics"] = bucket_node_symptoms
- print "Key indicators"
- util.pretty_print(indicator_error)
- util.pretty_print(indicator_warn)
+ report["Key indicators"] = (indicator_error, indicator_warn)
- print "Node disparate"
- util.pretty_print(node_disparate)
- #print Template(file="report-htm.tmpl", searchList=[dict])
+ report["Node disparate"] = node_disparate
+
+ print >> f, util.pretty_print(report)
+ f.close()
+
+ f = open(htmlfile, 'w')
+ print >> f, Template(file="report-htm.tmpl", searchList=[dict])
+ f.close()
+
+ sys.stderr.write("\nThis run finishes successfully. Please find output result at " + htmlfile)
View
70 buckets.py
@@ -7,11 +7,21 @@
rest_cmds = {
'bucket-list': '/pools/default/buckets',
+ 'bucket-flush': '/pools/default/buckets/',
+ 'bucket-delete': '/pools/default/buckets/',
+ 'bucket-create': '/pools/default/buckets/',
+ 'bucket-edit': '/pools/default/buckets/',
+ 'bucket-get': '/pools/default/buckets',
'bucket-stats': '/pools/default/buckets/{0}/stats?zoom=hour',
'bucket-node-stats': '/pools/default/buckets/{0}/stats/{1}?zoom={2}'
}
methods = {
'bucket-list': 'GET',
+ 'bucket-delete': 'DELETE',
+ 'bucket-create': 'POST',
+ 'bucket-edit': 'POST',
+ 'bucket-flush': 'POST',
+ 'bucket-get': 'GET',
'bucket-stats': 'GET',
'bucket-node-stats': 'GET',
}
@@ -58,13 +68,68 @@ def runCmd(self, cmd, server, port,
# get the parameters straight
+ if cmd in ('bucket-create', 'bucket-edit'):
+ if bucketname:
+ rest.setParam('name', bucketname)
+ if bucketname == "default":
+ if bucketport and bucketport != "11211":
+ usage("default bucket must be on port 11211.")
+ if bucketpassword:
+ usage("default bucket should only have empty password.")
+ authtype = 'sasl'
+ else:
+ if bucketport == "11211":
+ authtype = 'sasl'
+ else:
+ authtype = 'none'
+ if bucketpassword:
+ usage("a sasl bucket is supported only on port 11211.")
+ if buckettype:
+ rest.setParam('bucketType', buckettype)
+ if authtype:
+ rest.setParam('authType', authtype)
+ if bucketport:
+ rest.setParam('proxyPort', bucketport)
+ if bucketpassword:
+ rest.setParam('saslPassword', bucketpassword)
+ if bucketramsize:
+ rest.setParam('ramQuotaMB', bucketramsize)
+ if bucketreplication:
+ rest.setParam('replicaNumber', bucketreplication)
+ if cmd in ('bucket-delete', 'bucket-flush', 'bucket-edit'):
+ self.rest_cmd = self.rest_cmd + bucketname
+ if cmd == 'bucket-flush':
+ self.rest_cmd = self.rest_cmd + '/controller/doFlush'
+
opts = {}
- opts['error_msg'] = "unable to %s" % cmd
+ opts['error_msg'] = "unable to %s; please check your username (-u) and password (-p);" % cmd
opts['success_msg'] = "%s" % cmd
data = rest.restCmd(methods[cmd], self.rest_cmd,
self.user, self.password, opts)
- return rest.getJson(data)
+ if cmd in("bucket-get", "bucket-stats", "bucket-node-stats"):
+ return rest.getJson(data)
+ elif cmd == "bucket-list":
+ if output == 'json':
+ print data
+ else:
+ json = rest.getJson(data)
+ for bucket in json:
+ print '%s' % bucket['name']
+ print ' bucketType: %s' % bucket['bucketType']
+ print ' authType: %s' % bucket['authType']
+ if bucket['authType'] == "sasl":
+ print ' saslPassword: %s' % bucket['saslPassword']
+ else:
+ print ' proxyPort: %s' % bucket['proxyPort']
+ print ' numReplicas: %s' % bucket['replicaNumber']
+ print ' ramQuota: %s' % bucket['quota']['ram']
+ print ' ramUsed: %s' % bucket['basicStats']['memUsed']
+ else:
+ if output == 'json':
+ print rest.jsonMessage(data)
+ else:
+ print data
class BucketStats:
def __init__(self, bucket_name):
@@ -102,4 +167,3 @@ def runCmd(self, cmd, server, port,
data = rest.restCmd(methods[cmd], self.rest_cmd,
user, password, opts)
return rest.getJson(data)
-
View
319 cluster_stats.py
@@ -1,18 +1,22 @@
-import dbaccessor
import stats_buffer
-import util
+import util_cli as util
-class ExecSQL:
- def run(self, accessor, stmt):
- result = accessor.execute(stmt)
- return result[0]
+class BucketSummary:
+ def run(self, accessor):
+ return stats_buffer.bucket_info
class DGMRatio:
def run(self, accessor):
- hdd = accessor.execute("SELECT sum(usedbyData) FROM StorageInfo WHERE type='hdd'")
- ram = accessor.execute("SELECT sum(usedbyData) FROM StorageInfo WHERE type='ram'")
- if ram[0] > 0:
- ratio = hdd[0] / ram[0]
+ result = []
+ hdd_total = 0
+ ram_total = 0
+ for node, nodeinfo in stats_buffer.nodes.iteritems():
+ if nodeinfo["StorageInfo"].has_key("hdd"):
+ hdd_total += nodeinfo['StorageInfo']['hdd']['usedByData']
+ if nodeinfo["StorageInfo"].has_key("ram"):
+ ram_total += nodeinfo['StorageInfo']['ram']['usedByData']
+ if ram_total > 0:
+ ratio = hdd_total / ram_total
else:
ratio = 0
return ratio
@@ -26,6 +30,7 @@ def run(self, accessor):
"curr_items": [],
"vb_replica_curr_items": [],
}
+ num_error = []
for counter in accessor["counter"]:
values = stats_info[accessor["scale"]][counter]
nodeStats = values["nodeStats"]
@@ -39,8 +44,10 @@ def run(self, accessor):
if replica[1] == 0:
res.append((active[0], "No replica"))
else:
- ratio = 1.0 * active[1] / replica[1]
+ ratio = 1.0 * active[1] / replica[1]
res.append((active[0], util.pretty_float(ratio)))
+ if ratio < accessor["threshold"]:
+ num_error.append({"node":active[0], "value": ratio})
active_total += active[1]
replica_total += replica[1]
if replica_total == 0:
@@ -49,12 +56,16 @@ def run(self, accessor):
ratio = active_total * 1.0 / replica_total
cluster += ratio
res.append(("total", util.pretty_float(ratio)))
+ if ratio != accessor["threshold"]:
+ num_error.append({"node":"total", "value": ratio})
+ if len(num_error) > 0:
+ res.append(("error", num_error))
result[bucket] = res
result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets))
return result
class OpsRatio:
- def run(self, accessor):
+ def run(self, accessor):
result = {}
for bucket, stats_info in stats_buffer.buckets.iteritems():
ops_avg = {
@@ -82,11 +93,11 @@ def run(self, accessor):
write_total += write_ratio
del_ratio = delete[1] * 100 / count
del_total += del_ratio
- res.append((read[0], "{0}:{1}:{2}".format(read_ratio, write_ratio, del_ratio)))
+ res.append((read[0], "{0}:{1}:{2}".format(int(read_ratio+.5), int(write_ratio+.5), int(del_ratio+.5))))
read_total /= len(ops_avg['cmd_get'])
write_total /= len(ops_avg['cmd_set'])
del_total /= len(ops_avg['delete_hits'])
- res.append(("total", "{0}:{1}:{2}".format(read_total, write_total, del_total)))
+ res.append(("total", "{0}:{1}:{2}".format(int(read_total+.5), int(write_total+.5), int(del_total+.5))))
result[bucket] = res
return result
@@ -104,18 +115,24 @@ def run(self, accessor):
trend = []
total = 0
data = []
+ num_error = []
for node, vals in nodeStats.iteritems():
- a, b = util.linreg(timestamps, vals)
- value = a * timestamps[-1] + b
+ #a, b = util.linreg(timestamps, vals)
+ value = sum(vals) / samplesCount
total += value
+ if value > accessor["threshold"]:
+ num_error.append({"node":node, "value":value})
trend.append((node, util.pretty_float(value)))
data.append(value)
total /= len(nodeStats)
trend.append(("total", util.pretty_float(total)))
trend.append(("variance", util.two_pass_variance(data)))
+ if len(num_error) > 0:
+ trend.append(("error", num_error))
cluster += total
result[bucket] = trend
- result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets))
+ if len(stats_buffer.buckets) > 0:
+ result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets))
return result
class MemUsed:
@@ -133,8 +150,9 @@ def run(self, accessor):
data = []
for node, vals in nodeStats.iteritems():
avg = sum(vals) / samplesCount
- trend.append((node, util.pretty_float(avg)))
+ trend.append((node, util.size_label(avg)))
data.append(avg)
+ #print data
trend.append(("variance", util.two_pass_variance(data)))
result[bucket] = trend
return result
@@ -142,6 +160,8 @@ def run(self, accessor):
class ItemGrowth:
def run(self, accessor):
result = {}
+ start_cluster = 0
+ end_cluster = 0
for bucket, stats_info in stats_buffer.buckets.iteritems():
trend = []
values = stats_info[accessor["scale"]][accessor["counter"]]
@@ -155,16 +175,17 @@ def run(self, accessor):
trend.append((node, 0))
else:
start_val = b
+ start_cluster += b
end_val = a * timestamps[-1] + b
+ end_cluster += end_val
rate = (end_val * 1.0 / b - 1.0) * 100
- trend.append((node, util.pretty_float(rate)))
+ trend.append((node, util.pretty_float(rate) + "%"))
result[bucket] = trend
+ if len(stats_buffer.buckets) > 0:
+ rate = (end_cluster * 1.0 / start_cluster - 1.0) * 100
+ result["cluster"] = util.pretty_float(rate) + "%"
return result
-class AvgItemSize:
- def run(self, accessor):
- return 0
-
class NumVbuckt:
def run(self, accessor):
result = {}
@@ -174,21 +195,98 @@ def run(self, accessor):
nodeStats = values["nodeStats"]
for node, vals in nodeStats.iteritems():
if vals[-1] < accessor["threshold"]:
- num_error.append({"node":node, "value":vals[-1]})
+ num_error.append({"node":node, "value": int(vals[-1])})
if len(num_error) > 0:
result[bucket] = {"error" : num_error}
return result
+class RebalanceStuck:
+ def run(self, accessor):
+ result = {}
+ for bucket, bucket_stats in stats_buffer.node_stats.iteritems():
+ num_error = []
+ for node, stats_info in bucket_stats.iteritems():
+ for key, value in stats_info.iteritems():
+ if key.find(accessor["counter"]) >= 0:
+ if accessor.has_key("threshold"):
+ if int(value) > accessor["threshold"]:
+ num_error.append({"node":node, "value": (key, value)})
+ else:
+ num_error.append({"node":node, "value": (key, value)})
+ if len(num_error) > 0:
+ result[bucket] = {"error" : num_error}
+ return result
+
+class MemoryFramentation:
+ def run(self, accessor):
+ result = {}
+ for bucket, bucket_stats in stats_buffer.node_stats.iteritems():
+ num_error = []
+ for node, stats_info in bucket_stats.iteritems():
+ for key, value in stats_info.iteritems():
+ if key.find(accessor["counter"]) >= 0:
+ if accessor.has_key("threshold"):
+ if int(value) > accessor["threshold"]:
+ if accessor.has_key("unit"):
+ if accessor["unit"] == "time":
+ num_error.append({"node":node, "value": (key, util.time_label(value))})
+ elif accessor["unit"] == "size":
+ num_error.append({"node":node, "value": (key, util.size_label(value))})
+ else:
+ num_error.append({"node":node, "value": (key, value)})
+ else:
+ num_error.append({"node":node, "value": (key, value)})
+ if len(num_error) > 0:
+ result[bucket] = {"error" : num_error}
+ return result
+
+class EPEnginePerformance:
+ def run(self, accessor):
+ result = {}
+ for bucket, bucket_stats in stats_buffer.node_stats.iteritems():
+ num_error = []
+ for node, stats_info in bucket_stats.iteritems():
+ for key, value in stats_info.iteritems():
+ if key.find(accessor["counter"]) >= 0:
+ if accessor.has_key("threshold"):
+ if accessor["counter"] == "flusherState" and value != accessor["threshold"]:
+ num_error.append({"node":node, "value": (key, value)})
+ elif accessor["counter"] == "flusherCompleted" and value == accessor["threshold"]:
+ num_error.append({"node":node, "value": (key, value)})
+ else:
+ if value > accessor["threshold"]:
+ num_error.append({"node":node, "value": (key, value)})
+ if len(num_error) > 0:
+ result[bucket] = {"error" : num_error}
+ return result
+
+class TotalDataSize:
+ def run(self, accessor):
+ result = []
+ total = 0
+ for node, nodeinfo in stats_buffer.nodes.iteritems():
+ if nodeinfo["StorageInfo"].has_key("hdd"):
+ total += nodeinfo['StorageInfo']['hdd']['usedByData']
+ result.append(util.size_label(total))
+ return result
+
+class AvailableDiskSpace:
+ def run(self, accessor):
+ result = []
+ total = 0
+ for node, nodeinfo in stats_buffer.nodes.iteritems():
+ if nodeinfo["StorageInfo"].has_key("hdd"):
+ total += nodeinfo['StorageInfo']['hdd']['free']
+ result.append(util.size_label(total))
+ return result
+
ClusterCapsule = [
{"name" : "TotalDataSize",
"ingredients" : [
{
"name" : "totalDataSize",
"description" : "Total Data Size across cluster",
- "type" : "SQL",
- "stmt" : "SELECT sum(usedbyData) FROM StorageInfo WHERE type='hdd'",
- "code" : "ExecSQL",
- "unit" : "GB",
+ "code" : "TotalDataSize",
}
],
"clusterwise" : True,
@@ -200,10 +298,7 @@ def run(self, accessor):
{
"name" : "availableDiskSpace",
"description" : "Available disk space",
- "type" : "SQL",
- "stmt" : "SELECT sum(free) FROM StorageInfo WHERE type='hdd'",
- "code" : "ExecSQL",
- "unit" : "GB",
+ "code" : "AvailableDiskSpace",
}
],
"clusterwise" : True,
@@ -216,17 +311,19 @@ def run(self, accessor):
"name" : "cacheMissRatio",
"description" : "Cache miss ratio",
"counter" : "ep_cache_miss_rate",
- "type" : "python",
"scale" : "hour",
"code" : "CacheMissRatio",
- "unit" : "percentage",
"threshold" : 2,
},
],
"clusterwise" : True,
"perNode" : True,
"perBucket" : True,
- "indicator" : False,
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ },
"nodeDisparate" : True,
},
{"name" : "DGM",
@@ -234,7 +331,6 @@ def run(self, accessor):
{
"name" : "dgm",
"description" : "Disk to Memory Ratio",
- "type" : "pythonSQL",
"code" : "DGMRatio"
},
],
@@ -246,28 +342,33 @@ def run(self, accessor):
"ingredients" : [
{
"name" : "activeReplicaResidencyRatio",
- "description" : "Active and Replica Residentcy Ratio",
- "type" : "python",
+ "description" : "Active and Replica Resident Ratio",
"counter" : ["curr_items", "vb_replica_curr_items"],
"scale" : "minute",
"code" : "ARRatio",
+ "threshold" : 1,
},
],
"clusterwise" : True,
"perNode" : True,
"perBucket" : True,
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ },
},
{"name" : "OPSPerformance",
"ingredients" : [
{
"name" : "opsPerformance",
"description" : "Read/Write/Delete ops ratio",
- "type" : "python",
"scale" : "minute",
"counter" : ["cmd_get", "cmd_set", "delete_hits"],
"code" : "OpsRatio",
},
- ]
+ ],
+ "perBucket" : True,
},
{"name" : "GrowthRate",
"ingredients" : [
@@ -275,23 +376,12 @@ def run(self, accessor):
"name" : "dataGrowthRateForItems",
"description" : "Data Growth rate for items",
"counter" : "curr_items",
- "type" : "python",
"scale" : "day",
"code" : "ItemGrowth",
"unit" : "percentage",
},
- ]
- },
- {"name" : "AverageDocumentSize",
- "ingredients" : [
- {
- "name" : "averageDocumentSize",
- "description" : "Average Document Size",
- "type" : "python",
- "code" : "AvgItemSize",
- "unit" : "KB",
- },
- ]
+ ],
+ "clusterwise" : True,
},
{"name" : "VBucketNumber",
"ingredients" : [
@@ -299,7 +389,6 @@ def run(self, accessor):
"name" : "activeVbucketNumber",
"description" : "Active VBucket number is less than expected",
"counter" : "vb_active_num",
- "type" : "python",
"scale" : "hour",
"code" : "NumVbuckt",
"threshold" : 1024,
@@ -308,28 +397,136 @@ def run(self, accessor):
"name" : "replicaVBucketNumber",
"description" : "Replica VBucket number is less than expected",
"counter" : "vb_replica_num",
- "type" : "python",
"scale" : "hour",
"code" : "NumVbuckt",
"threshold" : 1024,
},
],
- "indicator" : True,
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ },
},
{"name" : "MemoryUsage",
"ingredients" : [
{
"name" : "memoryUsage",
- "description" : "Check if memory usage and/or fragmentaion",
- "type" : "python",
+ "description" : "Check memory usage",
"counter" : "mem_used",
"scale" : "hour",
"code" : "MemUsed",
},
],
- "perNode" : True,
"nodeDisparate" : True,
},
+ {"name" : "RebalancePerformance",
+ "ingredients" : [
+ {
+ "name" : "rebalanceStuck",
+ "description" : "Check if rebalance is stuck",
+ "counter" : "idle",
+ "code" : "RebalanceStuck",
+ },
+ {
+ "name" : "highBackfillRemaing",
+ "description" : "Tap queue backfilll remaining is too high",
+ "counter" : "ep_tap_queue_backfillremaining",
+ "code" : "RebalanceStuck",
+ "threshold" : 1000,
+ },
+ ],
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ }
+ },
+ {"name" : "MemoryFragmentation",
+ "ingredients" : [
+ {
+ "name" : "totalFragmentation",
+ "description" : "Total memory fragmentation",
+ "counter" : "total_fragmentation_bytes",
+ "code" : "MemoryFramentation",
+ "unit" : "size",
+ "threshold" : 1073741824, # 1GB
+ },
+ {
+ "name" : "diskDelete",
+ "description" : "Averge disk delete time",
+ "counter" : "disk_del",
+ "code" : "MemoryFramentation",
+ "unit" : "time",
+ "threshold" : 1000 #1ms
+ },
+ {
+ "name" : "diskUpdate",
+ "description" : "Averge disk update time",
+ "counter" : "disk_update",
+ "code" : "MemoryFramentation",
+ "unit" : "time",
+ "threshold" : 1000 #1ms
+ },
+ {
+ "name" : "diskInsert",
+ "description" : "Averge disk insert time",
+ "type" : "python",
+ "counter" : "disk_insert",
+ "code" : "MemoryFramentation",
+ "unit" : "time",
+ "threshold" : 1000 #1ms
+ },
+ {
+ "name" : "diskCommit",
+ "description" : "Averge disk commit time",
+ "counter" : "disk_commit",
+ "code" : "MemoryFramentation",
+ "unit" : "time",
+ "threshold" : 5000000 #10s
+ },
+ ],
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ },
+ },
+ {"name" : "EPEnginePerformance",
+ "ingredients" : [
+ {
+ "name" : "flusherState",
+ "description" : "Engine flusher state",
+ "counter" : "ep_flusher_state",
+ "code" : "EPEnginePerformance",
+ "threshold" : "running",
+ },
+ {
+ "name" : "flusherCompleted",
+ "description" : "Flusher completed",
+ "counter" : "ep_flusher_num_completed",
+ "code" : "EPEnginePerformance",
+ "threshold" : 0
+ },
+ {
+ "name" : "avgItemLoadTime",
+ "description" : "Average item loaded time",
+ "counter" : "ep_bg_load_avg",
+ "code" : "EPEnginePerformance",
+ "threshold" : 100,
+ },
+ {
+ "name" : "avgItemWaitTime",
+ "description" : "Averge item waited time",
+ "counter" : "ep_bg_wait_avg",
+ "code" : "EPEnginePerformance",
+ "threshold" : 100
+ },
+ ],
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ },
+ },
]
-
-
View
6 configure.ac
@@ -1,10 +1,10 @@
-# workload-generator
-# Copyright (C) 2011 Couchbase, INC
+# health-checker
+# Copyright (C) 2012 Couchbase, INC
# All rights reserved.
#
AC_PREREQ(2.59)
m4_include([m4/version.m4])
-AC_INIT(workload-generator, VERSION_NUMBER, bin@couchbase.com)
+AC_INIT(healthChecker, VERSION_NUMBER, bin@couchbase.com)
AC_CONFIG_AUX_DIR(config)
AM_INIT_AUTOMAKE
AC_CONFIG_FILES(Makefile wrapper/wrapper)
View
10 dbaccessor.py
@@ -41,14 +41,6 @@ def create_databases(self):
self.cursor.execute(""" CREATE UNIQUE INDEX IF NOT EXISTS server_idx on
ServerNode(host, port, master) """)
- self.cursor.execute(""" CREATE TABLE IF NOT EXISTS DiskInfo (
- diskInfoId INTEGER PRIMARY KEY,
- path TEXT NOT NULL,
- sizeBytes INTEGER,
- usagePercent INTEGER,
- serverId INTEGER,
- FOREIGN KEY(serverId) REFERENCES ServerNode(serverId))""")
-
self.cursor.execute(""" CREATE TABLE IF NOT EXISTS MemoryInfo (
memoryInfoId INTEGER PRIMARY KEY,
allocated INTEGER,
@@ -163,7 +155,7 @@ def process_node_stats(self, nodeId, nodeInfo):
hdd['usedByData'],
nodeId));
ram = nodeInfo['storageTotals']['ram']
- if hdd is not None:
+ if ram is not None:
self.cursor.execute(sqlstmt.format('ram',
hdd['free'],
hdd['quotaTotal'],
View
31 diskqueue_stats.py
@@ -1,7 +1,5 @@
-import dbaccessor
import stats_buffer
-import util
-counter_name = 'disk_write_queue'
+import util_cli as util
class AvgDiskQueue:
def run(self, accessor):
@@ -102,20 +100,18 @@ def run(self, accessor):
"counter" : "disk_write_queue",
"pernode" : True,
"scale" : "minute",
- "type" : "python",
"code" : "AvgDiskQueue",
"threshold" : {
"low" : 50000000,
"high" : 1000000000
},
- },
+ },
{
"name" : "diskQueueTrend",
"description" : "Persistence severely behind - disk write queue continues growing",
"counter" : "disk_write_queue",
"pernode" : True,
"scale" : "hour",
- "type" : "python",
"code" : "DiskQueueTrend",
"threshold" : {
"low" : 0,
@@ -123,7 +119,11 @@ def run(self, accessor):
},
},
],
- "indicator" : True,
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ },
},
{"name" : "ReplicationTrend",
"ingredients" : [
@@ -133,7 +133,6 @@ def run(self, accessor):
"counter" : "ep_tap_total_total_backlog_size",
"pernode" : True,
"scale" : "hour",
- "type" : "python",
"code" : "TapQueueTrend",
"threshold" : {
"low" : 0,
@@ -141,7 +140,11 @@ def run(self, accessor):
},
}
],
- "indicator" : True,
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ },
},
{"name" : "DiskQueueDrainingAnalysis",
"description" : "",
@@ -152,20 +155,18 @@ def run(self, accessor):
"counter" : ["vb_active_queue_drain", "disk_write_queue"],
"pernode" : True,
"scale" : "minute",
- "type" : "python",
"code" : "DiskQueueDrainingRate",
"threshold" : {
"drainRate" : 0,
"diskLength" : 100000,
},
- },
+ },
{
"name" : "replicaDiskQueueDrainRate",
"description" : "Persistence severely behind - replica disk queue draining rate is below threshold",
"counter" : ["vb_replica_queue_drain", "disk_write_queue"],
"pernode" : True,
"scale" : "minute",
- "type" : "python",
"code" : "DiskQueueDrainingRate",
"threshold" : {
"drainRate" : 0,
@@ -173,6 +174,10 @@ def run(self, accessor):
},
},
],
- "indicator" : True,
+ "indicator" : {
+ "cause" : "blah",
+ "impact" : "blah",
+ "action" : "blah",
+ }
},
]
View
172 healthChecker.py
@@ -6,32 +6,32 @@
import os
import traceback
import copy
+import logging
-import dbaccessor
+import collector
import analyzer
import stats_buffer
-import util
-
-import listservers
-import buckets
-import node
-import info
import util_cli as util
-import mc_bin_client
-import simplejson
import node_map
+log = logging.getLogger('healthChecker')
+log.setLevel(logging.INFO)
+log.addHandler(logging.StreamHandler())
+
def parse_opt():
- (cluster, user, password) = ('', '','')
+ (cluster, user, password, txtfile, htmlfile, verbose) = ('', '', '', 'kpi_report.txt', 'health_report.html', True)
try:
(opts, _args) = getopt.getopt(sys.argv[1:],
- 'c:dp:u:', [
+ 'c:dvp:u:t:h:', [
'cluster=',
'debug',
+ 'verbose',
'password=',
- 'user='
+ 'user=',
+ 'txt=',
+ 'html=',
])
except getopt.GetoptError, err:
usage(err)
@@ -44,129 +44,47 @@ def parse_opt():
if opt in ('-p', '--password'):
password = arg
if opt in ('-d', '--debug'):
- debug = True
+ log.setLevel(logging.DEBUG)
+ if opt in ('-t', '--txt'):
+ txtfile = arg
+ if opt in ('-h', '--html'):
+ htmlfile = arg
+
if not cluster:
- usage("please provide a CLUSTER, or use -h for more help.")
- return (cluster, user, password, opts)
+ usage()
+ return (cluster, user, password, txtfile, htmlfile, verbose, opts)
-def get_stats(mc, stats):
- try:
- node_stats = mc.stats('')
- if node_stats:
- for key, val in node_stats.items():
- stats[key] = val
- except Exception, err:
- #print "ERROR: command: %s: %s:%d, %s" % ('stats all', server, port, err)
- traceback.print_exc()
- #sys.exit(1)
-
- try:
- node_stats = mc.stats('tap')
- if node_stats:
- for key, val in node_stats.items():
- stats[key] = val
- except Exception, err:
- #print "ERROR: command: %s: %s:%d, %s" % ('stats tap', server, port, err)
- traceback.print_exc()
- #sys.exit(1)
-
-def stats_formatter(stats, prefix=" ", cmp=None):
- if stats:
- longest = max((len(x) + 2) for x in stats.keys())
- for stat, val in sorted(stats.items(), cmp=cmp):
- s = stat + ":"
- print "%s%s%s" % (prefix, s.ljust(longest), val)
-
-def collect_data():
-
- (cluster, user, password, opts) = parse_opt()
- server, port = util.hostport(cluster)
-
- nodes = []
- commands = {
- 'host-list' : listservers.ListServers,
- 'server-info' : info.Info,
- 'bucket-list' : buckets.Buckets,
- 'bucket-stats' : buckets.BucketStats,
- 'bucket-node-stats' : buckets.BucketNodeStats,
- }
-
- accessor = dbaccessor.DbAccesor()
-
- accessor.connect_db()
- accessor.create_databases();
-
- #get node list and its status
- try:
- cmd = 'host-list'
- c = commands[cmd]()
- nodes = c.runCmd(cmd, server, port, user, password, opts)
- except Exception, err:
- print "ERROR: command: %s: %s:%d, %s" % (cmd, server, port, err)
- sys.exit(1)
-
- #get each node information
- try:
- cmd = 'server-info'
- c = commands[cmd]()
- for node in nodes:
- (node_server, node_port) = util.hostport(node['hostname'])
- if node_map.address_map.has_key(node_server):
- node_server = node_map.address_map[node_server]
- nodeid = accessor.create_or_update_node(node_server, node_port, node['status'], server)
- if node['status'] == 'healthy':
- node_info = c.runCmd(cmd, node_server, node_port, user, password, opts)
- accessor.process_node_stats(nodeid, node_info)
- #stats = {}
- #mc = mc_bin_client.MemcachedClient(node_server, node['ports']['direct'])
- #get_stats(mc, stats)
- else:
- print "Unhealthy node: %s:%s" %(node_server, node['status'])
- except Exception, err:
- traceback.print_exc()
- #print "ERROR: command: %s: %s:%d, %s" % (cmd, server, port, err)
- sys.exit(1)
-
- #get each bucket information
- try:
- cmd = 'bucket-list'
- c = commands[cmd]()
- json = c.runCmd(cmd, server, port, user, password, opts)
- for bucket in json:
- (bucket_name, bucket_id) = accessor.process_bucket(bucket, server)
-
- # get bucket related stats
- cmd = 'bucket-stats'
- c = buckets.BucketStats(bucket_name)
- json = c.runCmd(cmd, server, port, user, password, opts)
- stats_buffer.buckets_summary[bucket_name] = json
-
- #retrieve bucket stats per node
- stats_buffer.buckets[bucket_name] = copy.deepcopy(stats_buffer.stats)
- cmd = 'bucket-node-stats'
- for scale, stat_set in stats_buffer.buckets[bucket_name].iteritems():
- for stat in stat_set.iterkeys():
- print "retieving: ", stat, " scale:", scale
- c = buckets.BucketNodeStats(bucket_name, stat, scale)
- json = c.runCmd(cmd, server, port, user, password, opts)
- stats_buffer.buckets[bucket_name][scale][stat] = json
- #accessor.process_bucket_node_stats(bucket_id, server, stat, json)
- except Exception, err:
- traceback.print_exc()
- #print "ERROR: command: %s: %s:%d, %s" % (cmd, server, port, err)
- sys.exit(1)
-
- accessor.close()
+def usage(error_msg=''):
+ if error_msg:
+ print "ERROR: %s" % error_msg
+ sys.exit(2)
+
+ print """healthChecker - cluster key performance indicator stats
+
+usage: healthChecker CLUSTER OPTIONS
+
+CLUSTER:
+ --cluster=HOST[:PORT] or -c HOST[:PORT]
+
+OPTIONS:
+ -u USERNAME, --user=USERNAME admin username of the cluster
+ -p PASSWORD, --password=PASSWORD admin password of the cluster
+ -o FILENAME, --output=FILENAME Default output filename is 'kpi_report.txt'
+ -d --debug
+ -v --verbose Display detailed node level information
+"""
+ sys.exit(2)
def main():
-
+ (cluster, user, password, txtfile, htmlfile, verbose, opts) = parse_opt()
#make snapshot for the current cluster status
- collect_data()
+ retriever = collector.StatsCollector(log)
+ retriever.collect_data(cluster, user, password, opts)
#analyze the snapshot and historic data
- performer = analyzer.StatsAnalyzer()
+ performer = analyzer.StatsAnalyzer(log)
performer.run_analysis()
- performer.run_report()
+ performer.run_report(txtfile, htmlfile, verbose)
if __name__ == '__main__':
main()
View
237 htmlreport.tmpl
@@ -1,237 +0,0 @@
-<HTML>
-<HEAD><TITLE>sample-report-v2</TITLE>
-<STYLE type="text/css">
-
-body {margin-top: 0px;margin-left: 0px;}
-
-#page_1 {position:relative; overflow: hidden;margin-top: 96px;margin-left: 94px;margin-bottom: 73px;width: 620px;background-image: url(sample-report-v2_images/sample-report-v21.jpg);background-position: 0px 48px;background-size: 604px 704px;background-repeat: no-repeat;}
-
-
-
-#page_2 {position:relative; overflow: hidden;margin-top: 128px;margin-left: 96px;margin-bottom: 107px;width: 607px;background-image: url(sample-report-v2_images/sample-report-v22.jpg);background-position: 60px 362px;background-size: 128px 362px;background-repeat: no-repeat;}
-
-
-
-#page_3 {position:relative; overflow: hidden;margin-top: 63px;margin-left: 120px;margin-bottom: 71px;width: 583px;background-image: url(sample-report-v2_images/sample-report-v23.jpg);background-position: 84px 148px;background-size: 43px 726px;background-repeat: no-repeat;}
-
-
-
-#page_4 {position:relative; overflow: hidden;margin-top: 63px;margin-left: 96px;margin-bottom: 335px;width: 607px;background-image: url(sample-report-v2_images/sample-report-v24.jpg);background-position: 108px 206px;background-size: 23px 19px;background-repeat: no-repeat;}
-
-
-
-.ft0{font: 35px 'Arial';color: #17365d;line-height: 40px;}
-.ft1{font: italic 19px 'Arial';color: #345a8a;line-height: 23px;}
-.ft2{font: bold 16px 'Arial';line-height: 19px;}
-.ft3{font: 16px 'Arial';line-height: 18px;}
-.ft4{font: bold 21px 'Arial';color: #345a8a;line-height: 24px;}
-.ft5{font: bold 17px 'Arial';color: #4f81bd;line-height: 19px;}
-.ft6{font: 16px 'Arial';line-height: 18px;white-space: nowrap;}
-.ft7{font: 13px 'Courier New';line-height: 16px;white-space: nowrap;position: relative; bottom: -18px;}
-.ft8{font: bold 16px 'Arial';line-height: 19px;white-space: nowrap;}
-.ft9{font: 15px 'Arial';line-height: 17px;white-space: nowrap;}
-.ft10{font: 13px 'Courier New';line-height: 15px;}
-.ft11{font: 16px 'Arial';margin-left: 16px;line-height: 17px;}
-.ft12{font: 10px 'Symbol';line-height: 12px;}
-.ft13{font: 16px 'Arial';margin-left: 18px;line-height: 18px;}
-.ft14{font: 16px 'Symbol';line-height: 20px;}
-.ft15{font: bold 16px 'Arial';line-height: 17px;white-space: nowrap;}
-.ft16{font: 16px 'Symbol';line-height: 29px;}
-.ft17{font: 16px 'Arial';line-height: 27px;}
-.ft18{font: 16px 'Courier New';line-height: 27px;}
-.ft19{font: 16px 'Courier New';line-height: 38px;}
-.ft20{font: 16px 'Arial';line-height: 38px;}
-.ft21{font: 15px 'Arial';line-height: 17px;}
-.ft22{font: 16px 'Arial';margin-left: 4px;line-height: 18px;}
-.ft23{font: 10px 'Symbol';line-height: 4px;}
-.ft24{font: 16px 'Arial';margin-left: 18px;line-height: 36px;}
-
-.p0{text-align: left;padding-left: 2px;margin-top: 0px;margin-bottom: 0px;}
-.p1{text-align: left;padding-left: 2px;margin-top: 40px;margin-bottom: 0px;}
-.p2{text-align: left;padding-left: 2px;margin-top: 3px;margin-bottom: 0px;}
-.p3{text-align: left;padding-left: 2px;margin-top: 85px;margin-bottom: 0px;}
-.p4{text-align: left;padding-left: 2px;margin-top: 34px;margin-bottom: 0px;}
-.p5{text-align: justify;padding-left: 74px;margin-top: 0px;margin-bottom: 0px;}
-.p6{text-align: justify;padding-left: 26px;margin-top: 0px;margin-bottom: 0px;}
-.p7{text-align: left;padding-left: 26px;margin-top: 6px;margin-bottom: 0px;}
-.p8{text-align: left;padding-left: 74px;padding-right: 306px;margin-top: 0px;margin-bottom: 0px;text-indent: -47px;}
-.p9{text-align: left;padding-left: 74px;padding-right: 302px;margin-top: 0px;margin-bottom: 0px;}
-.p10{text-align: left;margin-top: 0px;margin-bottom: 0px;}
-.p11{text-align: left;margin-top: 70px;margin-bottom: 0px;}
-.p12{text-align: left;margin-top: 38px;margin-bottom: 0px;}
-.p13{text-align: left;margin-top: 50px;margin-bottom: 0px;}
-.p14{text-align: left;margin-top: 34px;margin-bottom: 0px;}
-.p15{text-align: justify;margin-top: 0px;margin-bottom: 0px;}
-.p16{text-align: justify;padding-left: 24px;margin-top: 6px;margin-bottom: 0px;}
-.p17{text-align: justify;padding-left: 24px;margin-top: 0px;margin-bottom: 0px;}
-.p18{text-align: justify;padding-left: 48px;padding-right: 27px;margin-top: 0px;margin-bottom: 0px;text-indent: -24px;}
-
-.td0{text-align: left;padding-left: 0px;padding-right: 0px;width: 15px;vertical-align: bottom;}
-.td1{text-align: left;padding-left: 9px;padding-right: 0px;width: 251px;vertical-align: bottom;}
-.td2{text-align: left;padding-left: 2px;padding-right: 0px;width: 110px;vertical-align: bottom;}
-.td3{text-align: left;padding-left: 57px;padding-right: 0px;width: 203px;vertical-align: bottom;}
-.td4{text-align: left;padding-left: 0px;padding-right: 0px;width: 112px;vertical-align: bottom;}
-.td5{border-left: #000000 1pt solid;border-right: #000000 1pt solid;border-top: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 140px;vertical-align: bottom;}
-.td6{border-right: #000000 1pt solid;border-top: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 332px;vertical-align: bottom;}
-.td7{border-right: #000000 1pt solid;border-top: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 89px;vertical-align: bottom;}
-.td8{border-left: #000000 1pt solid;border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 140px;vertical-align: bottom;}
-.td9{border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 332px;vertical-align: bottom;}
-.td10{border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 89px;vertical-align: bottom;}
-.td11{border-left: #000000 1pt solid;border-right: #000000 1pt solid;border-top: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 311px;vertical-align: bottom;}
-.td12{border-right: #000000 1pt solid;border-top: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 258px;vertical-align: bottom;}
-.td13{border-left: #000000 1pt solid;border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 311px;vertical-align: bottom;}
-.td14{border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 258px;vertical-align: bottom;}
-.td15{border-left: #000000 1pt solid;border-right: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 311px;vertical-align: bottom;}
-.td16{border-right: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 258px;vertical-align: bottom;}
-.td17{border-left: #000000 1pt solid;border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 0px;padding-right: 0px;width: 318px;vertical-align: bottom;}
-.td18{border-right: #000000 1pt solid;text-align: left;padding-left: 0px;padding-right: 0px;width: 24px;vertical-align: bottom;}
-.td19{border-right: #000000 1pt solid;border-top: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 311px;vertical-align: bottom;}
-.td20{border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 311px;vertical-align: bottom;}
-.td21{border-right: #000000 1pt solid;text-align: left;padding-left: 7px;padding-right: 0px;width: 311px;vertical-align: bottom;}
-.td22{border-right: #000000 1pt solid;border-bottom: #000000 1pt solid;text-align: left;padding-left: 0px;padding-right: 0px;width: 318px;vertical-align: bottom;}
-.td23{text-align: left;padding-left: 0px;padding-right: 0px;width: 342px;vertical-align: bottom;}
-.td24{text-align: left;padding-left: 0px;padding-right: 0px;width: 265px;vertical-align: bottom;}
-.td25{text-align: left;padding-left: 0px;padding-right: 0px;width: 24px;vertical-align: bottom;}
-.td26{text-align: left;padding-left: 24px;padding-right: 0px;width: 294px;vertical-align: bottom;}
-.td27{border-bottom: #000000 1pt solid;text-align: left;padding-left: 0px;padding-right: 0px;line-height: 17px;width: 318px;vertical-align: bottom;}
-.td28{border-bottom: #000000 1pt solid;text-align: left;padding-left: 0px;padding-right: 0px;line-height: 17px;width: 265px;vertical-align: bottom;}
-
-.tr0{height: 54px;}
-.tr1{height: 19px;}
-.tr2{height: 21px;}
-.tr3{height: 20px;}
-.tr4{height: 41px;}
-.tr5{height: 18px;}
-.tr6{height: 40px;}
-.tr7{height: 55px;}
-.tr8{height: 56px;}
-.tr9{height: 53px;}
-.tr10{height: 74px;}
-.tr11{height: 22px;}
-.tr12{height: 34px;}
-.tr13{height: 89px;}
-
-.t0{width: 387px;margin-left: 26px;margin-top: 5px;font: 13px 'Symbol';line-height: 16px;}
-.t1{width: 582px;margin-left: 26px;margin-top: 51px;font: 16px 'Arial';line-height: 18px;}
-.t2{width: 583px;margin-left: 24px;font: 16px 'Arial';line-height: 18px;}
-.t3{width: 607px;font: 16px 'Arial';line-height: 18px;}
-.t4{width: 583px;font: 16px 'Arial';line-height: 18px;}
-
-</STYLE>
-</HEAD>
-
-<BODY>
-<DIV id="page_1">
-
-<P class="p0"><FONT class="ft0">Couchbase Cluster Health Check Report</FONT></P>
-<P class="p1"><FONT class="ft1">Tool Version: $globals['versions']</FONT></P>
-<P class="p0"><FONT class="ft1">Execution Time: $globals['report_time']</FONT></P>
-<P class="p2"><FONT class="ft2">Overall cluster health: </FONT><FONT class="ft3">$globals.cluster_health</FONT></P>
-<P class="p3"><FONT class="ft4">Section 1 - Couchbase &#8211; Alerts</FONT></P>
-<P class="p4"><FONT class="ft5">Cluster-wide metrics</FONT></P>
-
-<P class="p0"><FONT class="ft2">1. </FONT><FONT class="ft3">Persistence severely behind - Immediate Action Needed</FONT></P>
-<TABLE cellpadding=0 cellspacing=0 class="t0">
-<TR class="tr0">
- <TD class="td0">&#8226;</TD>
- <TD class="td1"><FONT class="ft6">Symptom</FONT><FONT class="ft7">o</FONT></TD>
- <TD class="td2"><FONT class="ft8">1 million items</FONT></TD>
-</TR>
-<TR class="tr1">
- <TD class="td0">&nbsp;</TD>
- <TD class="td3"><FONT class="ft9">Disk write queue has reached</FONT></TD>
- <TD class="td4">&nbsp;</TD>
-</TR>
-</TABLE>
-<P class="p5"><FONT class="ft10">o</FONT><FONT class="ft11">Drain rate has slowed down to</FONT></P>
-<P class="p6"><FONT class="ft12">&#8226;</FONT><FONT class="ft13">Causes - Disk write queue is backed-up, I/O rates unable to sustain write rates</FONT></P>
-<P class="p6"><FONT class="ft12">&#8226;</FONT><FONT class="ft13">Impact - If the node goes down, data will be lost</FONT></P>
-<P class="p6"><FONT class="ft12">&#8226;</FONT><FONT class="ft13">Action -</FONT></P>
-<P class="p4"><FONT class="ft4">Section 2 - Couchbase Cluster Overview</FONT></P>
-<P class="p7"><FONT class="ft14">&#8226; </FONT><FONT class="ft3">Node list</FONT></P>
-<TABLE cellpadding=0 cellspacing=0 class="t1">
-<TR class="tr2">
- <TD class="td5"><FONT class="ft15">Node IP</FONT></TD>
- <TD class="td6"><FONT class="ft15">Couchbase Server Version</FONT></TD>
- <TD class="td7"><FONT class="ft15">Status</FONT></TD>
-</TR>
-#for $node in $node_list["nodeList"]["value"]
-<TR class="tr3">
- <TD class="td8">$node["ip"]</TD>
- <TD class="td9">$node["version"]</TD>
- <TD class="td10">$node["status"]</TD>
-</TR>
-#end for
-</TABLE>
-<P class="p8"><FONT class="ft16">&#8226; </FONT><FONT class="ft17">Total number of nodes in the cluster: $node_list["numNodes"]["value"] </FONT><FONT class="ft18">o </FONT><FONT class="ft17">Number of nodes down: $node_list["numDownNodes"]["value"]</FONT></P>
-<P class="p9"><FONT class="ft19">o </FONT><FONT class="ft20">Number of nodes warming up: $node_list["numWarmupNodes"]["value"] </FONT><FONT class="ft19">o </FONT><FONT class="ft20">Number of nodes failed over: $node_list["numFailedOverNodes"]["value"]</FONT></P>
-</DIV>
-<DIV id="page_2">
-
-<P class="p10"><FONT class="ft5">Cluster-wide metrics</FONT></P>
-<TABLE cellpadding=0 cellspacing=0 class="t2">
-#for $key, $value in $cluster_symptoms.iteritems()
-<TR class="tr4">
- <TD class="td11">$value["description"]</TD>
- <TD class="td12">$value["value"]</TD>
-</TR>
-#end for
-</TABLE>
-<P class="p11"><FONT class="ft5">Bucket metrics</FONT></P>
-#for $bucket in $bucket_list
-<P class="p10"><FONT class="ft2">Bucket name: </FONT><FONT class="ft3">$bucket</FONT></P>
-<P class="p12"><FONT class="ft2">Status &#8211; Attention needed</FONT></P>
-<TABLE cellpadding=0 cellspacing=0 class="t3">
-#for $symptom in $bucket_symptoms[$bucket]
-<TR class="tr6">
- <TD class="td18">&nbsp;</TD>
- <TD class="td19">$symptom["description"]</TD>
- <TD class="td12">$symptom["value"]</TD>
-</TR>
-#end for
-#for $node, $node_values in $bucket_node_symptoms[$bucket].iteritems()
-<TR class="tr1">
- <TD class="td25">&nbsp;</TD>
- <TD class="td26"><FONT class="ft8">Node-level information</FONT></TD>
- <TD class="td24">&nbsp;</TD>
-</TR>
-<TR class="tr8">
- <TD class="td25">&nbsp;</TD>
- <TD class="td26"><FONT class="ft8">IP address: </FONT>$node</TD>
- <TD class="td24">&nbsp;</TD>
-</TR>
-<TR class="tr2">
- <TD class="td25">&nbsp;</TD>
- <TD class="td26"><FONT class="ft8">Status &#8211; OK</FONT></TD>
- <TD class="td24">&nbsp;</TD>
-</TR>
-</DIV>
-<DIV id="page_3">
-<TR class="tr5">
- <TD class="td25">&nbsp;</TD>
- <TD class="td27">&nbsp;</TD>
- <TD class="td28">&nbsp;</TD>
-</TR>
-#for $node_value in $node_values
-<TR class="tr3">
- <TD class="td18">&nbsp;</TD>
- <TD class="td20">$node_value["description"]</TD>
- <TD class="td14">$node_value["value"]</TD>
-</TR>
-</DIV>
-</TABLE>
-#end for
-#end for
-<DIV id="page_4">
-#end for
-
-<P class="p13"><FONT class="ft4">Section 3 - Couchbase &#8211; Warning Indicators</FONT></P>
-<P class="p14"><FONT class="ft5">Cluster-wide metrics</FONT></P>
-<P class="p15"><FONT class="ft21">1.</FONT><FONT class="ft22">Replica Resident ratio approaching alert levels</FONT></P>
-<P class="p16"><FONT class="ft12">&#8226;</FONT><FONT class="ft13">Symptom - Replica Resident ratio decreased over 24 hours to </FONT><FONT class="ft2">0.50</FONT></P>
-<P class="p17"><FONT class="ft12">&#8226;</FONT><FONT class="ft13">Cause -</FONT></P>
-<P class="p18"><FONT class="ft23">&#8226;</FONT><FONT class="ft24">Impact - Failing over a node will slow down cluster severely because a backfill from disk will be required and will result in eviction of active items on node)</FONT></P>
-<P class="p17"><FONT class="ft12">&#8226;</FONT><FONT class="ft13">Action -</FONT></P>
-</DIV>
-</body>
-</HTML>
View
10 info.py
@@ -21,6 +21,7 @@ def runCmd(self, cmd, server, port,
for (o, a) in opts:
if o == '-d' or o == '--debug':
self.debug = True
+
rest = restclient.RestClient(server, port, {'debug':self.debug})
opts = {'error_msg': 'server-info error'}
@@ -32,5 +33,10 @@ def runCmd(self, cmd, server, port,
for x in ['license', 'licenseValid', 'licenseValidUntil']:
if x in json:
del(json[x])
- #print simplejson.dumps(json, sort_keys=True, indent=2)
- return json
+ if cmd == 'get-server-info':
+ return json
+ elif cmd == 'server-eshell':
+ p = subprocess.call(['erl','-name','ctl@127.0.0.1',
+ '-setcookie',json['otpCookie'],'-hidden','-remsh',json['otpNode']])
+ else:
+ print simplejson.dumps(json, sort_keys=True, indent=2)
View
25 listservers.py
@@ -23,6 +23,7 @@ def runCmd(self, cmd, server, port,
self.port = port
self.user = user
self.password = password
+
for (o, a) in opts:
if o in ('-o', '--output'):
self.output = a
@@ -33,9 +34,17 @@ def runCmd(self, cmd, server, port,
self.port,
self.user,
self.password)
-
- # obtain dict of nodes. If not dict, is error message
- return self.getNodes(data)
+ if (self.output == 'return'):
+ return self.getNodes(data)
+ elif (self.output == 'json'):
+ print data
+ else:
+ # obtain dict of nodes. If not dict, is error message
+ nodes = self.getNodes(data)
+ if type(nodes) == type(list()):
+ self.printNodes(nodes)
+ else:
+ print self.error
def getData(self, server, port, user, password):
"""
@@ -68,7 +77,11 @@ def printNodes(self, nodes):
if self.cmd == "host-list":
print node['hostname']
else:
+ if node.get('otpNode') is None:
+ raise Exception("could not access node;" +
+ " please check your username (-u) and password (-p)")
+
print '%s %s %s %s' % (node['otpNode'],
- node['hostname'],
- node['status'],
- node['clusterMembership'])
+ node['hostname'],
+ node['status'],
+ node['clusterMembership'])
View
453 node.py
@@ -1,453 +0,0 @@
-"""
- Implementation for rebalance, add, remove, stop rebalance.
-"""
-
-import time
-import os
-import sys
-import util_cli as util
-import socket
-
-from usage import usage
-from restclient import *
-from listservers import *
-
-# the rest commands and associated URIs for various node operations
-
-rest_cmds = {
- 'rebalance' :'/controller/rebalance',
- 'rebalance-stop' :'/controller/stopRebalance',
- 'rebalance-status' :'/pools/default/rebalanceProgress',
- 'server-add' :'/controller/addNode',
- 'server-readd' :'/controller/reAddNode',
- 'failover' :'/controller/failOver',
- 'cluster-init' :'/settings/web',
- 'node-init' :'/nodes/self/controller/settings',
-}
-
-server_no_remove = [
- 'rebalance-stop',
- 'rebalance-status',
- 'server-add',
- 'server-readd',
- 'failover'
-]
-server_no_add = [
- 'rebalance-stop',
- 'rebalance-status',
- 'failover',
-]
-
-# Map of operations and the HTTP methods used against the REST interface
-
-methods = {
- 'rebalance' :'POST',
- 'rebalance-stop' :'POST',
- 'rebalance-status' :'GET',
- 'eject-server' :'POST',
- 'server-add' :'POST',
- 'server-readd' :'POST',
- 'failover' :'POST',
- 'cluster-init' :'POST',
- 'node-init' :'POST',
-}
-
-# Map of HTTP success code, success message and error message for
-# handling HTTP response properly
-
-class Node:
- def __init__(self):
- self.rest_cmd = rest_cmds['rebalance-status']
- self.method = 'GET'
- self.debug = False
- self.server = ''
- self.port = ''
- self.user = ''
- self.password = ''
- self.params = {}
- self.output = 'standard'
- self.password_new = None
- self.username_new = None
- self.port_new = None
- self.per_node_quota = None
- self.data_path = None
-
- def runCmd(self, cmd, server, port,
- user, password, opts):
- self.rest_cmd = rest_cmds[cmd]
- self.method = methods[cmd]
- self.server = server
- self.port = int(port)
- self.user = user
- self.password = password
-
- servers = self.processOpts(cmd, opts)
-
- if self.debug:
- print "INFO: servers %s" % servers
-
- if cmd == 'server-add' and not servers['add']:
- usage("please list one or more --server-add=HOST[:PORT];"
- " or use -h for more help.")
-
- if cmd == 'server-readd' and not servers['add']:
- usage("please list one or more --server-add=HOST[:PORT];"
- " or use -h for more help.")
-
- if cmd in ('server-add', 'rebalance'):
- self.addServers(servers['add'])
- if cmd == 'rebalance':
- self.rebalance(servers)
-
- if cmd == 'server-readd':
- self.reAddServers(servers)
-
- if cmd == 'rebalance-status':
- output_result = self.rebalanceStatus()
- print output_result
-
- if cmd == 'rebalance-stop':
- output_result = self.rebalanceStop()
- print output_result
-
- if cmd == 'failover':
- if len(servers['failover']) <= 0:
- usage("please list one or more --server-failover=HOST[:PORT];"
- " or use -h for more help.")
-
- self.failover(servers)
-
- if cmd == 'cluster-init':
- self.clusterInit()
-
- if cmd == 'node-init':
- self.nodeInit()
-
-
- def clusterInit(self):
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- if self.port_new:
- rest.setParam('port', self.port_new)
- else:
- rest.setParam('port', 'SAME')
- rest.setParam('initStatus', 'done')
- if self.username_new:
- rest.setParam('username', self.username_new)
- else:
- rest.setParam('username', self.user)
- if self.password_new:
- rest.setParam('password', self.password_new)
- else:
- rest.setParam('password', self.password)
-
- opts = {}
- opts['error_msg'] = "unable to init %s" % self.server
- opts['success_msg'] = "init %s" % self.server
-
- output_result = rest.restCmd(self.method,
- self.rest_cmd,
- self.user,
- self.password,
- opts)
- print output_result
-
- # per node quota unfortunately runs against a different location
- if not self.per_node_quota:
- return
-
- if self.port_new:
- self.port = int(self.port_new)
- if self.username_new:
- self.user = self.username_new
- if self.password_new:
- self.password = self.password_new
-
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- if self.per_node_quota:
- rest.setParam('memoryQuota', self.per_node_quota)
-
- output_result = rest.restCmd(self.method,
- '/pools/default',
- self.user,
- self.password,
- opts)
- print output_result
-
-
- def nodeInit(self):
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- if self.data_path:
- rest.setParam('path', self.data_path)
-
- opts = {}
- opts['error_msg'] = "unable to init %s" % self.server
- opts['success_msg'] = "init %s" % self.server
-
- output_result = rest.restCmd(self.method,
- self.rest_cmd,
- self.user,
- self.password,
- opts)
- print output_result
-
-
- def processOpts(self, cmd, opts):
- """ Set standard opts.
- note: use of a server key keeps optional
- args aligned with server.
- """
- servers = {
- 'add': {},
- 'remove': {},
- 'failover': {}
- }
-
- # don't allow options that don't correspond to given commands
-
- for o, a in opts:
- usage_msg = "option '%s' is not used with command '%s'" % (o, cmd)
-
- if o in ( "-r", "--server-remove"):
- if cmd in server_no_remove:
- usage(usage_msg)
- elif o in ( "-a", "--server-add",
- "--server-add-username",
- "--server-add-password"):
- if cmd in server_no_add:
- usage(usage_msg)
-
- server = None
-
- for o, a in opts:
- if o in ("-a", "--server-add"):
- if a == "self":
- a = socket.gethostbyname(socket.getfqdn())
- server = "%s:%d" % util.hostport(a)
- servers['add'][server] = { 'user':'', 'password':''}
- elif o == "--server-add-username":
- if server is None:
- usage("please specify --server-add"
- " before --server-add-username")
- servers['add'][server]['user'] = a
- elif o == "--server-add-password":
- if server is None:
- usage("please specify --server-add"
- " before --server-add-password")
- servers['add'][server]['password'] = a
- elif o in ( "-r", "--server-remove"):
- server = "%s:%d" % util.hostport(a)
- servers['remove'][server] = True
- server = None
- elif o in ( "--server-failover"):
- server = "%s:%d" % util.hostport(a)
- servers['failover'][server] = True
- server = None
- elif o in ('-o', '--output'):
- if a == 'json':
- self.output = a
- server = None
- elif o in ('-d', '--debug'):
- self.debug = True
- server = None
- elif o == '--cluster-init-password':
- self.password_new = a
- elif o == '--cluster-init-username':
- self.username_new = a
- elif o == '--cluster-init-port':
- self.port_new = a
- elif o == '--cluster-init-ramsize':
- self.per_node_quota = a
- elif o == '--node-init-data-path':
- self.data_path = a
-
- return servers
-
- def addServers(self, servers):
- for server in servers:
- user = servers[server]['user']
- password = servers[server]['password']
- output_result = self.serverAdd(server,
- user,
- password)
- print output_result
-
- def serverAdd(self, add_server, add_with_user, add_with_password):
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- rest.setParam('hostname', add_server)
- if add_with_user and add_with_password:
- rest.setParam('user', add_with_user)
- rest.setParam('password', add_with_password)
-
- opts = {}
- opts['error_msg'] = "unable to server-add %s" % add_server
- opts['success_msg'] = "server-add %s" % add_server
-
- output_result = rest.restCmd('POST',
- rest_cmds['server-add'],
- self.user,
- self.password,
- opts)
- return output_result
-
- def reAddServers(self, servers):
- known_otps, eject_otps, failover_otps, readd_otps = \
- self.getNodeOtps(to_readd=servers['add'])
-
- for readd_otp in readd_otps:
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- rest.setParam('otpNode', readd_otp)
-
- opts = {}
- opts['error_msg'] = "unable to re-add %s" % readd_otp
- opts['success_msg'] = "re-add %s" % readd_otp
-
- output_result = rest.restCmd('POST',
- rest_cmds['server-readd'],
- self.user,
- self.password,
- opts)
- print output_result
-
- def getNodeOtps(self, to_eject=[], to_failover=[], to_readd=[]):
- """ Convert known nodes into otp node id's.
- """
- listservers = ListServers()
- known_nodes_list = listservers.getNodes(
- listservers.getData(self.server,
- self.port,
- self.user,
- self.password))
- known_otps = []
- eject_otps = []
- failover_otps = []
- readd_otps = []
-
- for node in known_nodes_list:
- known_otps.append(node['otpNode'])
- if node['hostname'] in to_eject:
- eject_otps.append(node['otpNode'])
- if node['hostname'] in to_failover:
- failover_otps.append(node['otpNode'])
- if node['hostname'] in to_readd:
- readd_otps.append(node['otpNode'])
-
- return (known_otps, eject_otps, failover_otps, readd_otps)
-
- def rebalance(self, servers):
- known_otps, eject_otps, failover_otps, readd_otps = \
- self.getNodeOtps(to_eject=servers['remove'])
-
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- rest.setParam('knownNodes', ','.join(known_otps))
- rest.setParam('ejectedNodes', ','.join(eject_otps))
-
- opts = {}
- opts['success_msg'] = 'rebalanced cluster'
- opts['error_msg'] = 'unable to rebalance cluster'
-
- output_result = rest.restCmd('POST',
- rest_cmds['rebalance'],
- self.user,
- self.password,
- opts)
- if self.debug:
- print "INFO: rebalance started: %s" % output_result
-
- sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
-
- print "INFO: rebalancing",
-
- status, error = self.rebalanceStatus(prefix='\n')
- while status == 'running':
- print ".",
- time.sleep(0.5)
- try:
- status, error = self.rebalanceStatus(prefix='\n')
- except socket.error:
- time.sleep(2)
- status, error = self.rebalanceStatus(prefix='\n')
-
- if error:
- print '\n' + error
- sys.exit(1)
- else:
- print '\n' + output_result
-
- def rebalanceStatus(self, prefix=''):
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- opts = { 'error_msg':'unable to obtain rebalance status'}
-
- output_result = rest.restCmd('GET',
- rest_cmds['rebalance-status'],
- self.user,
- self.password,
- opts)
-
- json = rest.getJson(output_result)
- if type(json) == type(list()):
- print prefix + ("ERROR: %s" % json[0])
- sys.exit(1)
-
- if 'errorMessage' in json:
- error_message = json['errorMessage']
- else:
- error_message = None
-
- return json['status'],error_message
-
- def rebalanceStop(self):
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
-
- opts = {}
- opts['success_msg'] = 'rebalance cluster stopped'
- opts['error_msg'] = 'unable to stop rebalance'
-
- output_result = rest.restCmd('POST',
- rest_cmds['rebalance-stop'],
- self.user,
- self.password,
- opts)
- return output_result
-
-
- def failover(self, servers):
- known_otps, eject_otps, failover_otps, readd_otps = \
- self.getNodeOtps(to_failover=servers['failover'])
-
- if len(failover_otps) <= 0:
- usage("specified servers are not part of the cluster: %s" %
- servers['failover'].keys())
-
- for failover_otp in failover_otps:
- rest = restclient.RestClient(self.server,
- self.port,
- {'debug':self.debug})
- rest.setParam('otpNode', failover_otp)
-
- opts = {}
- opts['error_msg'] = "unable to failover %s" % failover_otp
- opts['success_msg'] = "failover %s" % failover_otp
-
- output_result = rest.restCmd('POST',
- rest_cmds['failover'],
- self.user,
- self.password,
- opts)
- print output_result
-
View
8 node_map.py
@@ -0,0 +1,8 @@
+address_map = {
+"10.12.87.41" : "23.20.45.23",
+"10.12.95.171" : "107.22.84.123",
+"10.194.169.187" : "107.22.70.136",
+"10.12.98.26" : "23.20.50.242",
+"10.144.64.38" : "50.17.157.98",
+"10.12.97.189" : "107.22.11.161",
+}
View
306 node_stats.py
@@ -1,30 +1,83 @@
import stats_buffer
-import util
-
-class ExecSQL:
- def run(self, accessor, stmt):
- result = accessor.execute(stmt)
- return result[0]
+import util_cli as util
class NodeList:
def run(self, accessor):
result = []
- nodelist = accessor.execute("SELECT host, port, version, os, status FROM ServerNode", True)
- for node in nodelist:
- result.append({"ip": node[0], "port": node[1], "version" :node[2], "os": node[3], "status" : node[4]})
-
+ for node, node_info in stats_buffer.nodes.iteritems():
+ result.append({"ip": node, "port": node_info['port'], "version" :node_info['version'], "os": node_info['os'], "status" :node_info['status']})
+ return result
+
+class NumNodes:
+ def run(self, accessor):
+ result = []
+ result.append(len(stats_buffer.nodes))
+ return result
+
+class NumDownNodes:
+ def run(self, accessor):
+ result = []
+ result.append(len(filter(lambda (a, b): b["status"]=="down", stats_buffer.nodes.items())))
+ return result
+
+class NumWarmupNodes:
+ def run(self, accessor):
+ result = []
+ result.append(len(filter(lambda (a, b): b["status"]=="warmup", stats_buffer.nodes.items())))
+ return result
+
+class NumFailOverNodes:
+ def run(self, accessor):
+ result = []
+ result.append(len(filter(lambda (a, b): b["clusterMembership"]!="active", stats_buffer.nodes.items())))
return result
class BucketList:
def run(self, accessor):
result = []
- bucketlist = accessor.execute("SELECT name FROM Bucket", True)
- for bucket in bucketlist:
- result.append({"name": bucket[0]})
-
+ for bucket in stats_buffer.bucket_info.keys():
+ result.append({"name": bucket})
+
return result
+class NodeStorageStats:
+ def run(self, accessor):
+ result = []
+ for node, values in stats_buffer.nodes.iteritems():
+ if values["StorageInfo"].has_key("hdd"):
+ result.append({"ip": values["host"],
+ "port": values["port"],
+ "type" : "hdd",
+ "free": util.size_label(values["StorageInfo"]["hdd"]["free"]),
+ "quotaTotal" : util.size_label(values["StorageInfo"]["hdd"]["quotaTotal"]),
+ "used" : util.size_label(values["StorageInfo"]["hdd"]["used"]),
+ "usedByData" : util.size_label(values["StorageInfo"]["hdd"]["usedByData"]),
+ "total" : util.size_label(values["StorageInfo"]["hdd"]["total"])})
+ if values["StorageInfo"].has_key("ram"):
+ result.append({"ip": values["host"],
+ "port": values["port"],
+ "type" : "ram",
+ "quotaTotal" : util.size_label(values["StorageInfo"]["ram"]["quotaTotal"]),
+ "used" : util.size_label(values["StorageInfo"]["ram"]["used"]),
+ "usedByData" : util.size_label(values["StorageInfo"]["ram"]["usedByData"]),
+ "total" : util.size_label(values["StorageInfo"]["ram"]["total"])})
+ return result
+
+class NodeSystemStats:
+ def run(self, accessor):
+ result = []
+ for node, values in stats_buffer.nodes.iteritems():
+ result.append({"ip": values["host"],
+ "port": values["port"],
+ "cpuUtilization" :util.pretty_float(values["systemStats"]["cpu_utilization_rate"]),
+ "swapTotal": util.size_label(values["systemStats"]["swap_total"]),
+ "swapUsed" : util.size_label(values["systemStats"]["swap_used"]),