From 028faa87b815e7ccebf6223f5a14c8a625bf42dc Mon Sep 17 00:00:00 2001 From: Bin Cui Date: Fri, 15 Jun 2012 14:15:31 -0700 Subject: [PATCH] Able to specify a specific bucket or all bucket By default, all bucket stats will be analyzed. With -b option, a specific bucket can be used as a filter for generating report --- cbhealthchecker | 22 +++++---- collector.py | 129 +++++++++++++++++++++++++----------------------- 2 files changed, 79 insertions(+), 72 deletions(-) diff --git a/cbhealthchecker b/cbhealthchecker index 7657d23..6b22e21 100755 --- a/cbhealthchecker +++ b/cbhealthchecker @@ -20,11 +20,12 @@ log.setLevel(logging.INFO) log.addHandler(logging.StreamHandler()) def parse_opt(): - (cluster, user, password, txtfile, htmlfile, verbose) = ('', '', '', 'clusterstats.txt', 'health_report.html', False) + (bucket, cluster, user, password, txtfile, htmlfile, verbose) = ('all', '', '', '', 'clusterstats.txt', 'health_report.html', False) try: (opts, _args) = getopt.getopt(sys.argv[1:], - 'c:dvp:u:o:', [ + 'b:c:dvp:u:o:', [ + 'bucket=', 'cluster=', 'debug', 'verbose', @@ -36,6 +37,8 @@ def parse_opt(): usage(err) for (opt, arg) in opts: + if opt in ('-b', '--bucket'): + bucket = arg if opt in ('-c', '--cluster'): cluster = arg if opt in ('-u', '--user'): @@ -51,7 +54,7 @@ def parse_opt(): if not cluster: usage() - return (cluster, user, password, txtfile, htmlfile, verbose, opts) + return (bucket, cluster, user, password, txtfile, htmlfile, verbose, opts) def usage(error_msg=''): if error_msg: @@ -66,21 +69,22 @@ CLUSTER: --cluster=HOST[:PORT] or -c HOST[:PORT] Default port is 8091 USERNAME: - -u USERNAME, --user=USERNAME admin username of the cluster + -u USERNAME, --user=USERNAME admin username of the cluster PASSWORD: - -p PASSWORD, --password=PASSWORD admin password of the cluster + -p PASSWORD, --password=PASSWORD admin password of the cluster OPTIONS: - -o FILENAME, --output=FILENAME Default output filename is 'kpi_report.txt' + -b BUCKETNAME, --bucket=BUCKETNAME Specific bucket name. Default is all buckets + -o FILENAME, --output=FILENAME Default output filename is 'kpi_report.txt' -d --debug - -v --verbose Display detailed node level information + -v --verbose Display detailed node level information """ sys.exit(2) def main(): - (cluster, user, password, txtfile, htmlfile, verbose, opts) = parse_opt() + (bucket, cluster, user, password, txtfile, htmlfile, verbose, opts) = parse_opt() #make snapshot for the current cluster status retriever = collector.StatsCollector(log) - retriever.collect_data(cluster, user, password, opts) + retriever.collect_data(bucket, cluster, user, password, opts) #analyze the snapshot and historic data performer = analyzer.StatsAnalyzer(log) diff --git a/collector.py b/collector.py index 1585d11..3f6f12f 100755 --- a/collector.py +++ b/collector.py @@ -116,35 +116,36 @@ def get_hostlist(self, server, port, user, password, opts): traceback.print_exc() sys.exit(1) - def get_bucketlist(self, server, port, user, password, opts): + def get_bucketlist(self, server, port, user, password, bucketname, opts): try: bucketlist = buckets.Buckets().runCmd('bucket-get', server, port, user, password, opts) for bucket in bucketlist: bucket_name = bucket['name'] - bucketinfo = {} - bucketinfo['name'] = bucket_name - bucketinfo['bucketType'] = bucket['bucketType'] - bucketinfo['authType'] = bucket['authType'] - bucketinfo['saslPassword'] = bucket['saslPassword'] - bucketinfo['numReplica'] = bucket['replicaNumber'] - bucketinfo['ramQuota'] = bucket['quota']['ram'] - bucketinfo['master'] = server - - bucketStats = bucket['basicStats'] - bucketinfo['bucketStats'] = {} - bucketinfo['bucketStats']['diskUsed'] = bucketStats['diskUsed'] - bucketinfo['bucketStats']['memUsed'] = bucketStats['memUsed'] - bucketinfo['bucketStats']['diskFetches'] = bucketStats['diskFetches'] - bucketinfo['bucketStats']['quotaPercentUsed'] = bucketStats['quotaPercentUsed'] - bucketinfo['bucketStats']['opsPerSec'] = bucketStats['opsPerSec'] - bucketinfo['bucketStats']['itemCount'] = bucketStats['itemCount'] - - stats_buffer.bucket_info[bucket_name] = bucketinfo - - # get bucket related stats - c = buckets.BucketStats(bucket_name) - json = c.runCmd('bucket-stats', server, port, user, password, opts) - stats_buffer.buckets_summary[bucket_name] = json + if bucketname == 'all' or bucket_name == bucketname: + bucketinfo = {} + bucketinfo['name'] = bucket_name + bucketinfo['bucketType'] = bucket['bucketType'] + bucketinfo['authType'] = bucket['authType'] + bucketinfo['saslPassword'] = bucket['saslPassword'] + bucketinfo['numReplica'] = bucket['replicaNumber'] + bucketinfo['ramQuota'] = bucket['quota']['ram'] + bucketinfo['master'] = server + + bucketStats = bucket['basicStats'] + bucketinfo['bucketStats'] = {} + bucketinfo['bucketStats']['diskUsed'] = bucketStats['diskUsed'] + bucketinfo['bucketStats']['memUsed'] = bucketStats['memUsed'] + bucketinfo['bucketStats']['diskFetches'] = bucketStats['diskFetches'] + bucketinfo['bucketStats']['quotaPercentUsed'] = bucketStats['quotaPercentUsed'] + bucketinfo['bucketStats']['opsPerSec'] = bucketStats['opsPerSec'] + bucketinfo['bucketStats']['itemCount'] = bucketStats['itemCount'] + + stats_buffer.bucket_info[bucket_name] = bucketinfo + + # get bucket related stats + c = buckets.BucketStats(bucket_name) + json = c.runCmd('bucket-stats', server, port, user, password, opts) + stats_buffer.buckets_summary[bucket_name] = json return bucketlist except Exception, err: traceback.print_exc() @@ -181,45 +182,47 @@ def get_mc_stats_per_node(self, mc, stats): except Exception, err: traceback.print_exc() - def get_mc_stats(self, server, bucketlist, nodes): + def get_mc_stats(self, server, bucketlist, nodes, bucketname): for bucket in bucketlist: bucket_name = bucket['name'] - self.log.info("bucket: %s" % bucket_name) - stats_buffer.node_stats[bucket_name] = {} - for node in nodes: - (node_server, node_port) = util.hostport(node['hostname']) - self.log.info(" node: %s %s" % (node_server, node['ports']['direct'])) - if node['status'] == 'healthy': - try: - stats = {} - mc = mc_bin_client.MemcachedClient(node_server, node['ports']['direct']) - if bucket["name"] != "Default": - mc.sasl_auth_plain(bucket_name.encode("utf8"), bucket["saslPassword"].encode("utf8")) - self.get_mc_stats_per_node(mc, stats) - stats_buffer.node_stats[bucket_name][node['hostname']] = stats - except Exception, err: - stats_buffer.nodes[node['hostname']]['status'] = 'down' - traceback.print_exc() - - def get_ns_stats(self, bucketlist, server, port, user, password, opts): + if bucketname == 'all' or bucket_name == bucketname: + self.log.info("bucket: %s" % bucket_name) + stats_buffer.node_stats[bucket_name] = {} + for node in nodes: + (node_server, node_port) = util.hostport(node['hostname']) + self.log.info(" node: %s %s" % (node_server, node['ports']['direct'])) + if node['status'] == 'healthy': + try: + stats = {} + mc = mc_bin_client.MemcachedClient(node_server, node['ports']['direct']) + if bucket["name"] != "Default": + mc.sasl_auth_plain(bucket_name.encode("utf8"), bucket["saslPassword"].encode("utf8")) + self.get_mc_stats_per_node(mc, stats) + stats_buffer.node_stats[bucket_name][node['hostname']] = stats + except Exception, err: + stats_buffer.nodes[node['hostname']]['status'] = 'down' + traceback.print_exc() + + def get_ns_stats(self, bucketlist, server, port, user, password, bucketname, opts): for bucket in bucketlist: bucket_name = bucket['name'] - stats_buffer.buckets[bucket_name] = copy.deepcopy(stats_buffer.stats) - cmd = 'bucket-node-stats' - for scale, stat_set in stats_buffer.buckets[bucket_name].iteritems(): - for stat in stat_set.iterkeys(): - try : - sys.stderr.write('.') - self.log.debug("retrieve: %s" % stat) - c = buckets.BucketNodeStats(bucket_name, stat, scale) - - json = c.runCmd('bucket-node-stats', server, port, user, password, opts) - stats_buffer.buckets[bucket_name][scale][stat] = json - except Exception, err: - traceback.print_exc() - sys.stderr.write('\n') - - def collect_data(self,cluster, user, password, opts): + if bucketname == 'all' or bucket_name == bucketname: + stats_buffer.buckets[bucket_name] = copy.deepcopy(stats_buffer.stats) + cmd = 'bucket-node-stats' + for scale, stat_set in stats_buffer.buckets[bucket_name].iteritems(): + for stat in stat_set.iterkeys(): + try : + sys.stderr.write('.') + self.log.debug("retrieve: %s" % stat) + c = buckets.BucketNodeStats(bucket_name, stat, scale) + + json = c.runCmd('bucket-node-stats', server, port, user, password, opts) + stats_buffer.buckets[bucket_name][scale][stat] = json + except Exception, err: + traceback.print_exc() + sys.stderr.write('\n') + + def collect_data(self, bucketname, cluster, user, password, opts): server, port = util.hostport(cluster) #get node list info @@ -227,14 +230,14 @@ def collect_data(self,cluster, user, password, opts): self.log.debug(util.pretty_print(stats_buffer.nodes)) #get bucket list - bucketlist = self.get_bucketlist(server, port, user, password, opts) + bucketlist = self.get_bucketlist(server, port, user, password, bucketname, opts) self.log.debug(util.pretty_print(stats_buffer.bucket_info)) #get stats from ep-engine - self.get_mc_stats(server, bucketlist, nodes) + self.get_mc_stats(server, bucketlist, nodes, bucketname) self.log.debug(util.pretty_print(stats_buffer.node_stats)) #get stats from ns-server - self.get_ns_stats(bucketlist, server, port, user, password, opts) + self.get_ns_stats(bucketlist, server, port, user, password, bucketname, opts) self.log.debug(util.pretty_print(stats_buffer.buckets))