diff --git a/Makefile.am b/Makefile.am index f61f8c3..fdd7a1a 100755 --- a/Makefile.am +++ b/Makefile.am @@ -4,9 +4,9 @@ default: pythonlibdir=$(libdir)/python -pythonlib_SCRIPTS= cbworkloadgen +pythonlib_SCRIPTS= healthChecker -PYTHON_TOOLS= wrapper/cbworkloadgen +PYTHON_TOOLS= wrapper/healthChecker ${PYTHON_TOOLS}: wrapper/wrapper cp $< $@ diff --git a/README b/README deleted file mode 100644 index e69de29..0000000 diff --git a/analyzer.py b/analyzer.py index a90b5b7..3a15183 100755 --- a/analyzer.py +++ b/analyzer.py @@ -1,12 +1,11 @@ +import sys import datetime -import dbaccessor -import util +import logging +import util_cli as util import cluster_stats -import bucket_stats import diskqueue_stats import node_stats - import stats_buffer from Cheetah.Template import Template @@ -29,68 +28,45 @@ cluster_symptoms = {} bucket_symptoms = {} bucket_node_symptoms = {} +bucket_node_status = {} node_symptoms = {} indicator_error = {} indicator_warn = {} node_disparate = {} -def format_output(counter, result): - if len(result) == 1: - if counter.has_key("unit") and counter["unit"] == "GB": - return util.pretty_float(result[0]) - else: - return result[0] - else: - return result - class StatsAnalyzer: - def __init__(self): - self.accessor = dbaccessor.DbAccesor() + def __init__(self, log): + self.log = log def run_analysis(self): - self.accessor.connect_db() - self.accessor.browse_db() for bucket in stats_buffer.buckets.iterkeys(): bucket_list.append(bucket) bucket_symptoms[bucket] = [] bucket_node_symptoms[bucket] = {} + bucket_node_status[bucket] = {} for capsule, package_name in capsules: for pill in capsule: - #print pill['name'] + self.log.debug(pill['name']) for counter in pill['ingredients']: - if counter['type'] == 'SQL': - result = eval("{0}.{1}().run(self.accessor, \"{2}\")".format(package_name, counter['code'], counter['stmt'])) - elif counter['type'] == 'pythonSQL': - result = eval("{0}.{1}().run(self.accessor)".format(package_name, counter['code'])) - elif counter['type'] == 'python': - result = eval("{0}.{1}().run(counter)".format(package_name, counter['code'])) - - #if counter.has_key("unit") and counter["unit"] == "GB": - # util.pretty_print({counter["description"] : result}) - #else: - # util.pretty_print({counter["description"] : result}) + result = eval("{0}.{1}().run(counter)".format(package_name, counter['code'])) - #print counter + self.log.debug(counter) if pill.has_key("clusterwise") and pill["clusterwise"] : if isinstance(result, dict): if result.has_key("cluster"): - if counter.has_key("unit") and counter["unit"] == "GB": - cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value": util.humanize_bytes(result["cluster"])} - else: - cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result["cluster"]} + cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result["cluster"]} else: cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result} else: cluster_symptoms[counter["name"]] = {"description" : counter["description"], "value":result} if pill.has_key("perBucket") and pill["perBucket"] : - #bucket_symptoms[counter["name"]] = {"description" : counter["description"], "value":result} for bucket, values in result.iteritems(): if bucket == "cluster": continue for val in values: - if val[0] == "variance": + if val[0] == "variance" or val[0] == "error": continue elif val[0] == "total": bucket_symptoms[bucket].append({"description" : counter["description"], "value" : values[-1][1]}) @@ -104,14 +80,49 @@ def run_analysis(self): if pill.has_key("nodewise") and pill["nodewise"]: node_list[counter["name"]] = {"description" : counter["description"], "value":result} - if pill.has_key("indicator") and pill["indicator"] : + if pill.has_key("indicator"): if len(result) > 0: for bucket,values in result.iteritems(): - if values.has_key("error"): - indicator_error[counter["name"]] = {"description" : counter["description"], "bucket": bucket, "value":values["error"]} - if values.has_key("warn"): - indicator_warn[counter["name"]] = {"description" : counter["description"], "bucket": bucket, "value":values["warn"]} - + if type(values) is dict: + if values.has_key("error"): + indicator_error[counter["name"]] = {"description" : counter["description"], + "bucket": bucket, + "value":values["error"], + "cause" : pill["indicator"]["cause"], + "impact" : pill["indicator"]["impact"], + "action" : pill["indicator"]["action"], + } + for val in values["error"]: + bucket_node_status[bucket][val["node"]] = "error" + + if values.has_key("warn"): + indicator_warn[counter["name"]] = {"description" : counter["description"], + "bucket": bucket, + "value":values["warn"], + "cause" : pill["indicator"]["cause"], + "impact" : pill["indicator"]["impact"], + "action" : pill["indicator"]["action"], + } + elif type(values) is list: + for val in values: + if val[0] == "error": + indicator_error[counter["name"]] = {"description" : counter["description"], + "bucket": bucket, + "value":val[1], + "cause" : pill["indicator"]["cause"], + "impact" : pill["indicator"]["impact"], + "action" : pill["indicator"]["action"], + } + for val in values["error"]: + bucket_node_status[bucket][val["node"]] = "error" + elif val[0] == "warn": + indicator_warn[counter["name"]] = {"description" : counter["description"], + "bucket": bucket, + "value":val[1], + "cause" : pill["indicator"]["cause"], + "impact" : pill["indicator"]["impact"], + "action" : pill["indicator"]["action"], + } if pill.has_key("nodeDisparate") and pill["nodeDisparate"] : for bucket,values in result.iteritems(): if bucket == "cluster": @@ -121,42 +132,49 @@ def run_analysis(self): continue; if val[0] == "variance" and val[1] != 0: node_disparate[counter["name"]] = {"description" : counter["description"], "bucket": bucket, "value":values} - - self.accessor.close() - self.accessor.remove_db() - - def run_report(self): + + if len(indicator_error) > 0: + globals["cluster_health"] = "error" + elif len(indicator_warn) > 0: + globals["cluster_health"] = "warning" + + def run_report(self, txtfile, htmlfile, verbose): dict = { "globals" : globals, "cluster_symptoms" : cluster_symptoms, "bucket_symptoms" : bucket_symptoms, "bucket_node_symptoms" : bucket_node_symptoms, + "bucket_node_status" : bucket_node_status, "node_symptoms" : node_symptoms, "node_list" : node_list, "bucket_list" : bucket_list, "indicator_warn" : indicator_warn, "indicator_error" : indicator_error, + "verbose" : verbose, } - debug = True - if debug: - print "Nodelist Overview" - util.pretty_print(node_list) + f = open(txtfile, 'w') + report = {} + report["Report Time"] = globals["report_time"].strftime("%Y-%m-%d %H:%M:%S") + + report["Nodelist Overview"] = node_list - print "Cluster Overview" - util.pretty_print(cluster_symptoms) - - print "Bucket Metrics" - util.pretty_print(bucket_symptoms) - - print "Bucket Node Metrics" - util.pretty_print(bucket_node_symptoms) + report["Cluster Overview"] = cluster_symptoms + + report["Bucket Metrics"] = bucket_symptoms + + report["Bucket Node Metrics"] = bucket_node_symptoms - print "Key indicators" - util.pretty_print(indicator_error) - util.pretty_print(indicator_warn) + report["Key indicators"] = (indicator_error, indicator_warn) - print "Node disparate" - util.pretty_print(node_disparate) - #print Template(file="report-htm.tmpl", searchList=[dict]) \ No newline at end of file + report["Node disparate"] = node_disparate + + print >> f, util.pretty_print(report) + f.close() + + f = open(htmlfile, 'w') + print >> f, Template(file="report-htm.tmpl", searchList=[dict]) + f.close() + + sys.stderr.write("\nThis run finishes successfully. Please find output result at " + htmlfile) \ No newline at end of file diff --git a/buckets.py b/buckets.py index 7ab5c28..2bfa108 100755 --- a/buckets.py +++ b/buckets.py @@ -7,11 +7,21 @@ rest_cmds = { 'bucket-list': '/pools/default/buckets', + 'bucket-flush': '/pools/default/buckets/', + 'bucket-delete': '/pools/default/buckets/', + 'bucket-create': '/pools/default/buckets/', + 'bucket-edit': '/pools/default/buckets/', + 'bucket-get': '/pools/default/buckets', 'bucket-stats': '/pools/default/buckets/{0}/stats?zoom=hour', 'bucket-node-stats': '/pools/default/buckets/{0}/stats/{1}?zoom={2}' } methods = { 'bucket-list': 'GET', + 'bucket-delete': 'DELETE', + 'bucket-create': 'POST', + 'bucket-edit': 'POST', + 'bucket-flush': 'POST', + 'bucket-get': 'GET', 'bucket-stats': 'GET', 'bucket-node-stats': 'GET', } @@ -58,13 +68,68 @@ def runCmd(self, cmd, server, port, # get the parameters straight + if cmd in ('bucket-create', 'bucket-edit'): + if bucketname: + rest.setParam('name', bucketname) + if bucketname == "default": + if bucketport and bucketport != "11211": + usage("default bucket must be on port 11211.") + if bucketpassword: + usage("default bucket should only have empty password.") + authtype = 'sasl' + else: + if bucketport == "11211": + authtype = 'sasl' + else: + authtype = 'none' + if bucketpassword: + usage("a sasl bucket is supported only on port 11211.") + if buckettype: + rest.setParam('bucketType', buckettype) + if authtype: + rest.setParam('authType', authtype) + if bucketport: + rest.setParam('proxyPort', bucketport) + if bucketpassword: + rest.setParam('saslPassword', bucketpassword) + if bucketramsize: + rest.setParam('ramQuotaMB', bucketramsize) + if bucketreplication: + rest.setParam('replicaNumber', bucketreplication) + if cmd in ('bucket-delete', 'bucket-flush', 'bucket-edit'): + self.rest_cmd = self.rest_cmd + bucketname + if cmd == 'bucket-flush': + self.rest_cmd = self.rest_cmd + '/controller/doFlush' + opts = {} - opts['error_msg'] = "unable to %s" % cmd + opts['error_msg'] = "unable to %s; please check your username (-u) and password (-p);" % cmd opts['success_msg'] = "%s" % cmd data = rest.restCmd(methods[cmd], self.rest_cmd, self.user, self.password, opts) - return rest.getJson(data) + if cmd in("bucket-get", "bucket-stats", "bucket-node-stats"): + return rest.getJson(data) + elif cmd == "bucket-list": + if output == 'json': + print data + else: + json = rest.getJson(data) + for bucket in json: + print '%s' % bucket['name'] + print ' bucketType: %s' % bucket['bucketType'] + print ' authType: %s' % bucket['authType'] + if bucket['authType'] == "sasl": + print ' saslPassword: %s' % bucket['saslPassword'] + else: + print ' proxyPort: %s' % bucket['proxyPort'] + print ' numReplicas: %s' % bucket['replicaNumber'] + print ' ramQuota: %s' % bucket['quota']['ram'] + print ' ramUsed: %s' % bucket['basicStats']['memUsed'] + else: + if output == 'json': + print rest.jsonMessage(data) + else: + print data class BucketStats: def __init__(self, bucket_name): @@ -102,4 +167,3 @@ def runCmd(self, cmd, server, port, data = rest.restCmd(methods[cmd], self.rest_cmd, user, password, opts) return rest.getJson(data) - diff --git a/cluster_stats.py b/cluster_stats.py index e7ee4da..5d476c2 100755 --- a/cluster_stats.py +++ b/cluster_stats.py @@ -1,18 +1,22 @@ -import dbaccessor import stats_buffer -import util +import util_cli as util -class ExecSQL: - def run(self, accessor, stmt): - result = accessor.execute(stmt) - return result[0] +class BucketSummary: + def run(self, accessor): + return stats_buffer.bucket_info class DGMRatio: def run(self, accessor): - hdd = accessor.execute("SELECT sum(usedbyData) FROM StorageInfo WHERE type='hdd'") - ram = accessor.execute("SELECT sum(usedbyData) FROM StorageInfo WHERE type='ram'") - if ram[0] > 0: - ratio = hdd[0] / ram[0] + result = [] + hdd_total = 0 + ram_total = 0 + for node, nodeinfo in stats_buffer.nodes.iteritems(): + if nodeinfo["StorageInfo"].has_key("hdd"): + hdd_total += nodeinfo['StorageInfo']['hdd']['usedByData'] + if nodeinfo["StorageInfo"].has_key("ram"): + ram_total += nodeinfo['StorageInfo']['ram']['usedByData'] + if ram_total > 0: + ratio = hdd_total / ram_total else: ratio = 0 return ratio @@ -26,6 +30,7 @@ def run(self, accessor): "curr_items": [], "vb_replica_curr_items": [], } + num_error = [] for counter in accessor["counter"]: values = stats_info[accessor["scale"]][counter] nodeStats = values["nodeStats"] @@ -39,8 +44,10 @@ def run(self, accessor): if replica[1] == 0: res.append((active[0], "No replica")) else: - ratio = 1.0 * active[1] / replica[1] + ratio = 1.0 * active[1] / replica[1] res.append((active[0], util.pretty_float(ratio))) + if ratio < accessor["threshold"]: + num_error.append({"node":active[0], "value": ratio}) active_total += active[1] replica_total += replica[1] if replica_total == 0: @@ -49,12 +56,16 @@ def run(self, accessor): ratio = active_total * 1.0 / replica_total cluster += ratio res.append(("total", util.pretty_float(ratio))) + if ratio != accessor["threshold"]: + num_error.append({"node":"total", "value": ratio}) + if len(num_error) > 0: + res.append(("error", num_error)) result[bucket] = res result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) return result class OpsRatio: - def run(self, accessor): + def run(self, accessor): result = {} for bucket, stats_info in stats_buffer.buckets.iteritems(): ops_avg = { @@ -82,11 +93,11 @@ def run(self, accessor): write_total += write_ratio del_ratio = delete[1] * 100 / count del_total += del_ratio - res.append((read[0], "{0}:{1}:{2}".format(read_ratio, write_ratio, del_ratio))) + res.append((read[0], "{0}:{1}:{2}".format(int(read_ratio+.5), int(write_ratio+.5), int(del_ratio+.5)))) read_total /= len(ops_avg['cmd_get']) write_total /= len(ops_avg['cmd_set']) del_total /= len(ops_avg['delete_hits']) - res.append(("total", "{0}:{1}:{2}".format(read_total, write_total, del_total))) + res.append(("total", "{0}:{1}:{2}".format(int(read_total+.5), int(write_total+.5), int(del_total+.5)))) result[bucket] = res return result @@ -104,18 +115,24 @@ def run(self, accessor): trend = [] total = 0 data = [] + num_error = [] for node, vals in nodeStats.iteritems(): - a, b = util.linreg(timestamps, vals) - value = a * timestamps[-1] + b + #a, b = util.linreg(timestamps, vals) + value = sum(vals) / samplesCount total += value + if value > accessor["threshold"]: + num_error.append({"node":node, "value":value}) trend.append((node, util.pretty_float(value))) data.append(value) total /= len(nodeStats) trend.append(("total", util.pretty_float(total))) trend.append(("variance", util.two_pass_variance(data))) + if len(num_error) > 0: + trend.append(("error", num_error)) cluster += total result[bucket] = trend - result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) + if len(stats_buffer.buckets) > 0: + result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) return result class MemUsed: @@ -133,8 +150,9 @@ def run(self, accessor): data = [] for node, vals in nodeStats.iteritems(): avg = sum(vals) / samplesCount - trend.append((node, util.pretty_float(avg))) + trend.append((node, util.size_label(avg))) data.append(avg) + #print data trend.append(("variance", util.two_pass_variance(data))) result[bucket] = trend return result @@ -142,6 +160,8 @@ def run(self, accessor): class ItemGrowth: def run(self, accessor): result = {} + start_cluster = 0 + end_cluster = 0 for bucket, stats_info in stats_buffer.buckets.iteritems(): trend = [] values = stats_info[accessor["scale"]][accessor["counter"]] @@ -155,16 +175,17 @@ def run(self, accessor): trend.append((node, 0)) else: start_val = b + start_cluster += b end_val = a * timestamps[-1] + b + end_cluster += end_val rate = (end_val * 1.0 / b - 1.0) * 100 - trend.append((node, util.pretty_float(rate))) + trend.append((node, util.pretty_float(rate) + "%")) result[bucket] = trend + if len(stats_buffer.buckets) > 0: + rate = (end_cluster * 1.0 / start_cluster - 1.0) * 100 + result["cluster"] = util.pretty_float(rate) + "%" return result -class AvgItemSize: - def run(self, accessor): - return 0 - class NumVbuckt: def run(self, accessor): result = {} @@ -174,21 +195,98 @@ def run(self, accessor): nodeStats = values["nodeStats"] for node, vals in nodeStats.iteritems(): if vals[-1] < accessor["threshold"]: - num_error.append({"node":node, "value":vals[-1]}) + num_error.append({"node":node, "value": int(vals[-1])}) if len(num_error) > 0: result[bucket] = {"error" : num_error} return result +class RebalanceStuck: + def run(self, accessor): + result = {} + for bucket, bucket_stats in stats_buffer.node_stats.iteritems(): + num_error = [] + for node, stats_info in bucket_stats.iteritems(): + for key, value in stats_info.iteritems(): + if key.find(accessor["counter"]) >= 0: + if accessor.has_key("threshold"): + if int(value) > accessor["threshold"]: + num_error.append({"node":node, "value": (key, value)}) + else: + num_error.append({"node":node, "value": (key, value)}) + if len(num_error) > 0: + result[bucket] = {"error" : num_error} + return result + +class MemoryFramentation: + def run(self, accessor): + result = {} + for bucket, bucket_stats in stats_buffer.node_stats.iteritems(): + num_error = [] + for node, stats_info in bucket_stats.iteritems(): + for key, value in stats_info.iteritems(): + if key.find(accessor["counter"]) >= 0: + if accessor.has_key("threshold"): + if int(value) > accessor["threshold"]: + if accessor.has_key("unit"): + if accessor["unit"] == "time": + num_error.append({"node":node, "value": (key, util.time_label(value))}) + elif accessor["unit"] == "size": + num_error.append({"node":node, "value": (key, util.size_label(value))}) + else: + num_error.append({"node":node, "value": (key, value)}) + else: + num_error.append({"node":node, "value": (key, value)}) + if len(num_error) > 0: + result[bucket] = {"error" : num_error} + return result + +class EPEnginePerformance: + def run(self, accessor): + result = {} + for bucket, bucket_stats in stats_buffer.node_stats.iteritems(): + num_error = [] + for node, stats_info in bucket_stats.iteritems(): + for key, value in stats_info.iteritems(): + if key.find(accessor["counter"]) >= 0: + if accessor.has_key("threshold"): + if accessor["counter"] == "flusherState" and value != accessor["threshold"]: + num_error.append({"node":node, "value": (key, value)}) + elif accessor["counter"] == "flusherCompleted" and value == accessor["threshold"]: + num_error.append({"node":node, "value": (key, value)}) + else: + if value > accessor["threshold"]: + num_error.append({"node":node, "value": (key, value)}) + if len(num_error) > 0: + result[bucket] = {"error" : num_error} + return result + +class TotalDataSize: + def run(self, accessor): + result = [] + total = 0 + for node, nodeinfo in stats_buffer.nodes.iteritems(): + if nodeinfo["StorageInfo"].has_key("hdd"): + total += nodeinfo['StorageInfo']['hdd']['usedByData'] + result.append(util.size_label(total)) + return result + +class AvailableDiskSpace: + def run(self, accessor): + result = [] + total = 0 + for node, nodeinfo in stats_buffer.nodes.iteritems(): + if nodeinfo["StorageInfo"].has_key("hdd"): + total += nodeinfo['StorageInfo']['hdd']['free'] + result.append(util.size_label(total)) + return result + ClusterCapsule = [ {"name" : "TotalDataSize", "ingredients" : [ { "name" : "totalDataSize", "description" : "Total Data Size across cluster", - "type" : "SQL", - "stmt" : "SELECT sum(usedbyData) FROM StorageInfo WHERE type='hdd'", - "code" : "ExecSQL", - "unit" : "GB", + "code" : "TotalDataSize", } ], "clusterwise" : True, @@ -200,10 +298,7 @@ def run(self, accessor): { "name" : "availableDiskSpace", "description" : "Available disk space", - "type" : "SQL", - "stmt" : "SELECT sum(free) FROM StorageInfo WHERE type='hdd'", - "code" : "ExecSQL", - "unit" : "GB", + "code" : "AvailableDiskSpace", } ], "clusterwise" : True, @@ -216,17 +311,19 @@ def run(self, accessor): "name" : "cacheMissRatio", "description" : "Cache miss ratio", "counter" : "ep_cache_miss_rate", - "type" : "python", "scale" : "hour", "code" : "CacheMissRatio", - "unit" : "percentage", "threshold" : 2, }, ], "clusterwise" : True, "perNode" : True, "perBucket" : True, - "indicator" : False, + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + }, "nodeDisparate" : True, }, {"name" : "DGM", @@ -234,7 +331,6 @@ def run(self, accessor): { "name" : "dgm", "description" : "Disk to Memory Ratio", - "type" : "pythonSQL", "code" : "DGMRatio" }, ], @@ -246,28 +342,33 @@ def run(self, accessor): "ingredients" : [ { "name" : "activeReplicaResidencyRatio", - "description" : "Active and Replica Residentcy Ratio", - "type" : "python", + "description" : "Active and Replica Resident Ratio", "counter" : ["curr_items", "vb_replica_curr_items"], "scale" : "minute", "code" : "ARRatio", + "threshold" : 1, }, ], "clusterwise" : True, "perNode" : True, "perBucket" : True, + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + }, }, {"name" : "OPSPerformance", "ingredients" : [ { "name" : "opsPerformance", "description" : "Read/Write/Delete ops ratio", - "type" : "python", "scale" : "minute", "counter" : ["cmd_get", "cmd_set", "delete_hits"], "code" : "OpsRatio", }, - ] + ], + "perBucket" : True, }, {"name" : "GrowthRate", "ingredients" : [ @@ -275,23 +376,12 @@ def run(self, accessor): "name" : "dataGrowthRateForItems", "description" : "Data Growth rate for items", "counter" : "curr_items", - "type" : "python", "scale" : "day", "code" : "ItemGrowth", "unit" : "percentage", }, - ] - }, - {"name" : "AverageDocumentSize", - "ingredients" : [ - { - "name" : "averageDocumentSize", - "description" : "Average Document Size", - "type" : "python", - "code" : "AvgItemSize", - "unit" : "KB", - }, - ] + ], + "clusterwise" : True, }, {"name" : "VBucketNumber", "ingredients" : [ @@ -299,7 +389,6 @@ def run(self, accessor): "name" : "activeVbucketNumber", "description" : "Active VBucket number is less than expected", "counter" : "vb_active_num", - "type" : "python", "scale" : "hour", "code" : "NumVbuckt", "threshold" : 1024, @@ -308,28 +397,136 @@ def run(self, accessor): "name" : "replicaVBucketNumber", "description" : "Replica VBucket number is less than expected", "counter" : "vb_replica_num", - "type" : "python", "scale" : "hour", "code" : "NumVbuckt", "threshold" : 1024, }, ], - "indicator" : True, + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + }, }, {"name" : "MemoryUsage", "ingredients" : [ { "name" : "memoryUsage", - "description" : "Check if memory usage and/or fragmentaion", - "type" : "python", + "description" : "Check memory usage", "counter" : "mem_used", "scale" : "hour", "code" : "MemUsed", }, ], - "perNode" : True, "nodeDisparate" : True, }, + {"name" : "RebalancePerformance", + "ingredients" : [ + { + "name" : "rebalanceStuck", + "description" : "Check if rebalance is stuck", + "counter" : "idle", + "code" : "RebalanceStuck", + }, + { + "name" : "highBackfillRemaing", + "description" : "Tap queue backfilll remaining is too high", + "counter" : "ep_tap_queue_backfillremaining", + "code" : "RebalanceStuck", + "threshold" : 1000, + }, + ], + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + } + }, + {"name" : "MemoryFragmentation", + "ingredients" : [ + { + "name" : "totalFragmentation", + "description" : "Total memory fragmentation", + "counter" : "total_fragmentation_bytes", + "code" : "MemoryFramentation", + "unit" : "size", + "threshold" : 1073741824, # 1GB + }, + { + "name" : "diskDelete", + "description" : "Averge disk delete time", + "counter" : "disk_del", + "code" : "MemoryFramentation", + "unit" : "time", + "threshold" : 1000 #1ms + }, + { + "name" : "diskUpdate", + "description" : "Averge disk update time", + "counter" : "disk_update", + "code" : "MemoryFramentation", + "unit" : "time", + "threshold" : 1000 #1ms + }, + { + "name" : "diskInsert", + "description" : "Averge disk insert time", + "type" : "python", + "counter" : "disk_insert", + "code" : "MemoryFramentation", + "unit" : "time", + "threshold" : 1000 #1ms + }, + { + "name" : "diskCommit", + "description" : "Averge disk commit time", + "counter" : "disk_commit", + "code" : "MemoryFramentation", + "unit" : "time", + "threshold" : 5000000 #10s + }, + ], + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + }, + }, + {"name" : "EPEnginePerformance", + "ingredients" : [ + { + "name" : "flusherState", + "description" : "Engine flusher state", + "counter" : "ep_flusher_state", + "code" : "EPEnginePerformance", + "threshold" : "running", + }, + { + "name" : "flusherCompleted", + "description" : "Flusher completed", + "counter" : "ep_flusher_num_completed", + "code" : "EPEnginePerformance", + "threshold" : 0 + }, + { + "name" : "avgItemLoadTime", + "description" : "Average item loaded time", + "counter" : "ep_bg_load_avg", + "code" : "EPEnginePerformance", + "threshold" : 100, + }, + { + "name" : "avgItemWaitTime", + "description" : "Averge item waited time", + "counter" : "ep_bg_wait_avg", + "code" : "EPEnginePerformance", + "threshold" : 100 + }, + ], + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + }, + }, ] - - diff --git a/configure.ac b/configure.ac index aeda7d6..7a97088 100755 --- a/configure.ac +++ b/configure.ac @@ -1,10 +1,10 @@ -# workload-generator -# Copyright (C) 2011 Couchbase, INC +# health-checker +# Copyright (C) 2012 Couchbase, INC # All rights reserved. # AC_PREREQ(2.59) m4_include([m4/version.m4]) -AC_INIT(workload-generator, VERSION_NUMBER, bin@couchbase.com) +AC_INIT(healthChecker, VERSION_NUMBER, bin@couchbase.com) AC_CONFIG_AUX_DIR(config) AM_INIT_AUTOMAKE AC_CONFIG_FILES(Makefile wrapper/wrapper) diff --git a/dbaccessor.py b/dbaccessor.py index c39fb83..4c134da 100755 --- a/dbaccessor.py +++ b/dbaccessor.py @@ -41,14 +41,6 @@ def create_databases(self): self.cursor.execute(""" CREATE UNIQUE INDEX IF NOT EXISTS server_idx on ServerNode(host, port, master) """) - self.cursor.execute(""" CREATE TABLE IF NOT EXISTS DiskInfo ( - diskInfoId INTEGER PRIMARY KEY, - path TEXT NOT NULL, - sizeBytes INTEGER, - usagePercent INTEGER, - serverId INTEGER, - FOREIGN KEY(serverId) REFERENCES ServerNode(serverId))""") - self.cursor.execute(""" CREATE TABLE IF NOT EXISTS MemoryInfo ( memoryInfoId INTEGER PRIMARY KEY, allocated INTEGER, @@ -163,7 +155,7 @@ def process_node_stats(self, nodeId, nodeInfo): hdd['usedByData'], nodeId)); ram = nodeInfo['storageTotals']['ram'] - if hdd is not None: + if ram is not None: self.cursor.execute(sqlstmt.format('ram', hdd['free'], hdd['quotaTotal'], diff --git a/diskqueue_stats.py b/diskqueue_stats.py index 9ca6839..38555c9 100755 --- a/diskqueue_stats.py +++ b/diskqueue_stats.py @@ -1,7 +1,5 @@ -import dbaccessor import stats_buffer -import util -counter_name = 'disk_write_queue' +import util_cli as util class AvgDiskQueue: def run(self, accessor): @@ -102,20 +100,18 @@ def run(self, accessor): "counter" : "disk_write_queue", "pernode" : True, "scale" : "minute", - "type" : "python", "code" : "AvgDiskQueue", "threshold" : { "low" : 50000000, "high" : 1000000000 }, - }, + }, { "name" : "diskQueueTrend", "description" : "Persistence severely behind - disk write queue continues growing", "counter" : "disk_write_queue", "pernode" : True, "scale" : "hour", - "type" : "python", "code" : "DiskQueueTrend", "threshold" : { "low" : 0, @@ -123,7 +119,11 @@ def run(self, accessor): }, }, ], - "indicator" : True, + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + }, }, {"name" : "ReplicationTrend", "ingredients" : [ @@ -133,7 +133,6 @@ def run(self, accessor): "counter" : "ep_tap_total_total_backlog_size", "pernode" : True, "scale" : "hour", - "type" : "python", "code" : "TapQueueTrend", "threshold" : { "low" : 0, @@ -141,7 +140,11 @@ def run(self, accessor): }, } ], - "indicator" : True, + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + }, }, {"name" : "DiskQueueDrainingAnalysis", "description" : "", @@ -152,20 +155,18 @@ def run(self, accessor): "counter" : ["vb_active_queue_drain", "disk_write_queue"], "pernode" : True, "scale" : "minute", - "type" : "python", "code" : "DiskQueueDrainingRate", "threshold" : { "drainRate" : 0, "diskLength" : 100000, }, - }, + }, { "name" : "replicaDiskQueueDrainRate", "description" : "Persistence severely behind - replica disk queue draining rate is below threshold", "counter" : ["vb_replica_queue_drain", "disk_write_queue"], "pernode" : True, "scale" : "minute", - "type" : "python", "code" : "DiskQueueDrainingRate", "threshold" : { "drainRate" : 0, @@ -173,6 +174,10 @@ def run(self, accessor): }, }, ], - "indicator" : True, + "indicator" : { + "cause" : "blah", + "impact" : "blah", + "action" : "blah", + } }, ] \ No newline at end of file diff --git a/healthChecker.py b/healthChecker.py index c1930b9..c99585d 100644 --- a/healthChecker.py +++ b/healthChecker.py @@ -6,32 +6,32 @@ import os import traceback import copy +import logging -import dbaccessor +import collector import analyzer import stats_buffer -import util - -import listservers -import buckets -import node -import info import util_cli as util -import mc_bin_client -import simplejson import node_map +log = logging.getLogger('healthChecker') +log.setLevel(logging.INFO) +log.addHandler(logging.StreamHandler()) + def parse_opt(): - (cluster, user, password) = ('', '','') + (cluster, user, password, txtfile, htmlfile, verbose) = ('', '', '', 'kpi_report.txt', 'health_report.html', True) try: (opts, _args) = getopt.getopt(sys.argv[1:], - 'c:dp:u:', [ + 'c:dvp:u:t:h:', [ 'cluster=', 'debug', + 'verbose', 'password=', - 'user=' + 'user=', + 'txt=', + 'html=', ]) except getopt.GetoptError, err: usage(err) @@ -44,129 +44,47 @@ def parse_opt(): if opt in ('-p', '--password'): password = arg if opt in ('-d', '--debug'): - debug = True + log.setLevel(logging.DEBUG) + if opt in ('-t', '--txt'): + txtfile = arg + if opt in ('-h', '--html'): + htmlfile = arg + if not cluster: - usage("please provide a CLUSTER, or use -h for more help.") - return (cluster, user, password, opts) + usage() + return (cluster, user, password, txtfile, htmlfile, verbose, opts) -def get_stats(mc, stats): - try: - node_stats = mc.stats('') - if node_stats: - for key, val in node_stats.items(): - stats[key] = val - except Exception, err: - #print "ERROR: command: %s: %s:%d, %s" % ('stats all', server, port, err) - traceback.print_exc() - #sys.exit(1) - - try: - node_stats = mc.stats('tap') - if node_stats: - for key, val in node_stats.items(): - stats[key] = val - except Exception, err: - #print "ERROR: command: %s: %s:%d, %s" % ('stats tap', server, port, err) - traceback.print_exc() - #sys.exit(1) - -def stats_formatter(stats, prefix=" ", cmp=None): - if stats: - longest = max((len(x) + 2) for x in stats.keys()) - for stat, val in sorted(stats.items(), cmp=cmp): - s = stat + ":" - print "%s%s%s" % (prefix, s.ljust(longest), val) - -def collect_data(): - - (cluster, user, password, opts) = parse_opt() - server, port = util.hostport(cluster) - - nodes = [] - commands = { - 'host-list' : listservers.ListServers, - 'server-info' : info.Info, - 'bucket-list' : buckets.Buckets, - 'bucket-stats' : buckets.BucketStats, - 'bucket-node-stats' : buckets.BucketNodeStats, - } - - accessor = dbaccessor.DbAccesor() - - accessor.connect_db() - accessor.create_databases(); - - #get node list and its status - try: - cmd = 'host-list' - c = commands[cmd]() - nodes = c.runCmd(cmd, server, port, user, password, opts) - except Exception, err: - print "ERROR: command: %s: %s:%d, %s" % (cmd, server, port, err) - sys.exit(1) - - #get each node information - try: - cmd = 'server-info' - c = commands[cmd]() - for node in nodes: - (node_server, node_port) = util.hostport(node['hostname']) - if node_map.address_map.has_key(node_server): - node_server = node_map.address_map[node_server] - nodeid = accessor.create_or_update_node(node_server, node_port, node['status'], server) - if node['status'] == 'healthy': - node_info = c.runCmd(cmd, node_server, node_port, user, password, opts) - accessor.process_node_stats(nodeid, node_info) - #stats = {} - #mc = mc_bin_client.MemcachedClient(node_server, node['ports']['direct']) - #get_stats(mc, stats) - else: - print "Unhealthy node: %s:%s" %(node_server, node['status']) - except Exception, err: - traceback.print_exc() - #print "ERROR: command: %s: %s:%d, %s" % (cmd, server, port, err) - sys.exit(1) - - #get each bucket information - try: - cmd = 'bucket-list' - c = commands[cmd]() - json = c.runCmd(cmd, server, port, user, password, opts) - for bucket in json: - (bucket_name, bucket_id) = accessor.process_bucket(bucket, server) - - # get bucket related stats - cmd = 'bucket-stats' - c = buckets.BucketStats(bucket_name) - json = c.runCmd(cmd, server, port, user, password, opts) - stats_buffer.buckets_summary[bucket_name] = json - - #retrieve bucket stats per node - stats_buffer.buckets[bucket_name] = copy.deepcopy(stats_buffer.stats) - cmd = 'bucket-node-stats' - for scale, stat_set in stats_buffer.buckets[bucket_name].iteritems(): - for stat in stat_set.iterkeys(): - print "retieving: ", stat, " scale:", scale - c = buckets.BucketNodeStats(bucket_name, stat, scale) - json = c.runCmd(cmd, server, port, user, password, opts) - stats_buffer.buckets[bucket_name][scale][stat] = json - #accessor.process_bucket_node_stats(bucket_id, server, stat, json) - except Exception, err: - traceback.print_exc() - #print "ERROR: command: %s: %s:%d, %s" % (cmd, server, port, err) - sys.exit(1) - - accessor.close() +def usage(error_msg=''): + if error_msg: + print "ERROR: %s" % error_msg + sys.exit(2) + + print """healthChecker - cluster key performance indicator stats + +usage: healthChecker CLUSTER OPTIONS + +CLUSTER: + --cluster=HOST[:PORT] or -c HOST[:PORT] + +OPTIONS: + -u USERNAME, --user=USERNAME admin username of the cluster + -p PASSWORD, --password=PASSWORD admin password of the cluster + -o FILENAME, --output=FILENAME Default output filename is 'kpi_report.txt' + -d --debug + -v --verbose Display detailed node level information +""" + sys.exit(2) def main(): - + (cluster, user, password, txtfile, htmlfile, verbose, opts) = parse_opt() #make snapshot for the current cluster status - collect_data() + retriever = collector.StatsCollector(log) + retriever.collect_data(cluster, user, password, opts) #analyze the snapshot and historic data - performer = analyzer.StatsAnalyzer() + performer = analyzer.StatsAnalyzer(log) performer.run_analysis() - performer.run_report() + performer.run_report(txtfile, htmlfile, verbose) if __name__ == '__main__': main() diff --git a/htmlreport.tmpl b/htmlreport.tmpl deleted file mode 100755 index 7f67a72..0000000 --- a/htmlreport.tmpl +++ /dev/null @@ -1,237 +0,0 @@ - -sample-report-v2 - - - - -
- -

Couchbase Cluster Health Check Report

-

Tool Version: $globals['versions']

-

Execution Time: $globals['report_time']

-

Overall cluster health: $globals.cluster_health

-

Section 1 - Couchbase – Alerts

-

Cluster-wide metrics

- -

1. Persistence severely behind - Immediate Action Needed

- - - - - - - - - - - -
Symptomo1 million items
 Disk write queue has reached 
-

oDrain rate has slowed down to

-

Causes - Disk write queue is backed-up, I/O rates unable to sustain write rates

-

Impact - If the node goes down, data will be lost

-

Action -

-

Section 2 - Couchbase Cluster Overview

-

Node list

- - - - - - -#for $node in $node_list["nodeList"]["value"] - - - - - -#end for -
Node IPCouchbase Server VersionStatus
$node["ip"]$node["version"]$node["status"]
-

Total number of nodes in the cluster: $node_list["numNodes"]["value"] o Number of nodes down: $node_list["numDownNodes"]["value"]

-

o Number of nodes warming up: $node_list["numWarmupNodes"]["value"] o Number of nodes failed over: $node_list["numFailedOverNodes"]["value"]

-
-
- -

Cluster-wide metrics

- -#for $key, $value in $cluster_symptoms.iteritems() - - - - -#end for -
$value["description"]$value["value"]
-

Bucket metrics

-#for $bucket in $bucket_list -

Bucket name: $bucket

-

Status – Attention needed

- -#for $symptom in $bucket_symptoms[$bucket] - - - - - -#end for -#for $node, $node_values in $bucket_node_symptoms[$bucket].iteritems() - - - - - - - - - - - - - - - - -
-
- - - - -#for $node_value in $node_values - - - - - - -
 $symptom["description"]$symptom["value"]
 Node-level information 
 IP address: $node 
 Status – OK 
   
 $node_value["description"]$node_value["value"]
-#end for -#end for -
-#end for - -

Section 3 - Couchbase – Warning Indicators

-

Cluster-wide metrics

-

1.Replica Resident ratio approaching alert levels

-

Symptom - Replica Resident ratio decreased over 24 hours to 0.50

-

Cause -

-

Impact - Failing over a node will slow down cluster severely because a backfill from disk will be required and will result in eviction of active items on node)

-

Action -

-
- - diff --git a/info.py b/info.py index 0bb5d83..14d2062 100755 --- a/info.py +++ b/info.py @@ -21,6 +21,7 @@ def runCmd(self, cmd, server, port, for (o, a) in opts: if o == '-d' or o == '--debug': self.debug = True + rest = restclient.RestClient(server, port, {'debug':self.debug}) opts = {'error_msg': 'server-info error'} @@ -32,5 +33,10 @@ def runCmd(self, cmd, server, port, for x in ['license', 'licenseValid', 'licenseValidUntil']: if x in json: del(json[x]) - #print simplejson.dumps(json, sort_keys=True, indent=2) - return json + if cmd == 'get-server-info': + return json + elif cmd == 'server-eshell': + p = subprocess.call(['erl','-name','ctl@127.0.0.1', + '-setcookie',json['otpCookie'],'-hidden','-remsh',json['otpNode']]) + else: + print simplejson.dumps(json, sort_keys=True, indent=2) diff --git a/listservers.py b/listservers.py index a591041..51bc30d 100755 --- a/listservers.py +++ b/listservers.py @@ -23,6 +23,7 @@ def runCmd(self, cmd, server, port, self.port = port self.user = user self.password = password + for (o, a) in opts: if o in ('-o', '--output'): self.output = a @@ -33,9 +34,17 @@ def runCmd(self, cmd, server, port, self.port, self.user, self.password) - - # obtain dict of nodes. If not dict, is error message - return self.getNodes(data) + if (self.output == 'return'): + return self.getNodes(data) + elif (self.output == 'json'): + print data + else: + # obtain dict of nodes. If not dict, is error message + nodes = self.getNodes(data) + if type(nodes) == type(list()): + self.printNodes(nodes) + else: + print self.error def getData(self, server, port, user, password): """ @@ -68,7 +77,11 @@ def printNodes(self, nodes): if self.cmd == "host-list": print node['hostname'] else: + if node.get('otpNode') is None: + raise Exception("could not access node;" + + " please check your username (-u) and password (-p)") + print '%s %s %s %s' % (node['otpNode'], - node['hostname'], - node['status'], - node['clusterMembership']) + node['hostname'], + node['status'], + node['clusterMembership']) diff --git a/node.py b/node.py deleted file mode 100755 index 361836e..0000000 --- a/node.py +++ /dev/null @@ -1,453 +0,0 @@ -""" - Implementation for rebalance, add, remove, stop rebalance. -""" - -import time -import os -import sys -import util_cli as util -import socket - -from usage import usage -from restclient import * -from listservers import * - -# the rest commands and associated URIs for various node operations - -rest_cmds = { - 'rebalance' :'/controller/rebalance', - 'rebalance-stop' :'/controller/stopRebalance', - 'rebalance-status' :'/pools/default/rebalanceProgress', - 'server-add' :'/controller/addNode', - 'server-readd' :'/controller/reAddNode', - 'failover' :'/controller/failOver', - 'cluster-init' :'/settings/web', - 'node-init' :'/nodes/self/controller/settings', -} - -server_no_remove = [ - 'rebalance-stop', - 'rebalance-status', - 'server-add', - 'server-readd', - 'failover' -] -server_no_add = [ - 'rebalance-stop', - 'rebalance-status', - 'failover', -] - -# Map of operations and the HTTP methods used against the REST interface - -methods = { - 'rebalance' :'POST', - 'rebalance-stop' :'POST', - 'rebalance-status' :'GET', - 'eject-server' :'POST', - 'server-add' :'POST', - 'server-readd' :'POST', - 'failover' :'POST', - 'cluster-init' :'POST', - 'node-init' :'POST', -} - -# Map of HTTP success code, success message and error message for -# handling HTTP response properly - -class Node: - def __init__(self): - self.rest_cmd = rest_cmds['rebalance-status'] - self.method = 'GET' - self.debug = False - self.server = '' - self.port = '' - self.user = '' - self.password = '' - self.params = {} - self.output = 'standard' - self.password_new = None - self.username_new = None - self.port_new = None - self.per_node_quota = None - self.data_path = None - - def runCmd(self, cmd, server, port, - user, password, opts): - self.rest_cmd = rest_cmds[cmd] - self.method = methods[cmd] - self.server = server - self.port = int(port) - self.user = user - self.password = password - - servers = self.processOpts(cmd, opts) - - if self.debug: - print "INFO: servers %s" % servers - - if cmd == 'server-add' and not servers['add']: - usage("please list one or more --server-add=HOST[:PORT];" - " or use -h for more help.") - - if cmd == 'server-readd' and not servers['add']: - usage("please list one or more --server-add=HOST[:PORT];" - " or use -h for more help.") - - if cmd in ('server-add', 'rebalance'): - self.addServers(servers['add']) - if cmd == 'rebalance': - self.rebalance(servers) - - if cmd == 'server-readd': - self.reAddServers(servers) - - if cmd == 'rebalance-status': - output_result = self.rebalanceStatus() - print output_result - - if cmd == 'rebalance-stop': - output_result = self.rebalanceStop() - print output_result - - if cmd == 'failover': - if len(servers['failover']) <= 0: - usage("please list one or more --server-failover=HOST[:PORT];" - " or use -h for more help.") - - self.failover(servers) - - if cmd == 'cluster-init': - self.clusterInit() - - if cmd == 'node-init': - self.nodeInit() - - - def clusterInit(self): - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - if self.port_new: - rest.setParam('port', self.port_new) - else: - rest.setParam('port', 'SAME') - rest.setParam('initStatus', 'done') - if self.username_new: - rest.setParam('username', self.username_new) - else: - rest.setParam('username', self.user) - if self.password_new: - rest.setParam('password', self.password_new) - else: - rest.setParam('password', self.password) - - opts = {} - opts['error_msg'] = "unable to init %s" % self.server - opts['success_msg'] = "init %s" % self.server - - output_result = rest.restCmd(self.method, - self.rest_cmd, - self.user, - self.password, - opts) - print output_result - - # per node quota unfortunately runs against a different location - if not self.per_node_quota: - return - - if self.port_new: - self.port = int(self.port_new) - if self.username_new: - self.user = self.username_new - if self.password_new: - self.password = self.password_new - - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - if self.per_node_quota: - rest.setParam('memoryQuota', self.per_node_quota) - - output_result = rest.restCmd(self.method, - '/pools/default', - self.user, - self.password, - opts) - print output_result - - - def nodeInit(self): - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - if self.data_path: - rest.setParam('path', self.data_path) - - opts = {} - opts['error_msg'] = "unable to init %s" % self.server - opts['success_msg'] = "init %s" % self.server - - output_result = rest.restCmd(self.method, - self.rest_cmd, - self.user, - self.password, - opts) - print output_result - - - def processOpts(self, cmd, opts): - """ Set standard opts. - note: use of a server key keeps optional - args aligned with server. - """ - servers = { - 'add': {}, - 'remove': {}, - 'failover': {} - } - - # don't allow options that don't correspond to given commands - - for o, a in opts: - usage_msg = "option '%s' is not used with command '%s'" % (o, cmd) - - if o in ( "-r", "--server-remove"): - if cmd in server_no_remove: - usage(usage_msg) - elif o in ( "-a", "--server-add", - "--server-add-username", - "--server-add-password"): - if cmd in server_no_add: - usage(usage_msg) - - server = None - - for o, a in opts: - if o in ("-a", "--server-add"): - if a == "self": - a = socket.gethostbyname(socket.getfqdn()) - server = "%s:%d" % util.hostport(a) - servers['add'][server] = { 'user':'', 'password':''} - elif o == "--server-add-username": - if server is None: - usage("please specify --server-add" - " before --server-add-username") - servers['add'][server]['user'] = a - elif o == "--server-add-password": - if server is None: - usage("please specify --server-add" - " before --server-add-password") - servers['add'][server]['password'] = a - elif o in ( "-r", "--server-remove"): - server = "%s:%d" % util.hostport(a) - servers['remove'][server] = True - server = None - elif o in ( "--server-failover"): - server = "%s:%d" % util.hostport(a) - servers['failover'][server] = True - server = None - elif o in ('-o', '--output'): - if a == 'json': - self.output = a - server = None - elif o in ('-d', '--debug'): - self.debug = True - server = None - elif o == '--cluster-init-password': - self.password_new = a - elif o == '--cluster-init-username': - self.username_new = a - elif o == '--cluster-init-port': - self.port_new = a - elif o == '--cluster-init-ramsize': - self.per_node_quota = a - elif o == '--node-init-data-path': - self.data_path = a - - return servers - - def addServers(self, servers): - for server in servers: - user = servers[server]['user'] - password = servers[server]['password'] - output_result = self.serverAdd(server, - user, - password) - print output_result - - def serverAdd(self, add_server, add_with_user, add_with_password): - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - rest.setParam('hostname', add_server) - if add_with_user and add_with_password: - rest.setParam('user', add_with_user) - rest.setParam('password', add_with_password) - - opts = {} - opts['error_msg'] = "unable to server-add %s" % add_server - opts['success_msg'] = "server-add %s" % add_server - - output_result = rest.restCmd('POST', - rest_cmds['server-add'], - self.user, - self.password, - opts) - return output_result - - def reAddServers(self, servers): - known_otps, eject_otps, failover_otps, readd_otps = \ - self.getNodeOtps(to_readd=servers['add']) - - for readd_otp in readd_otps: - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - rest.setParam('otpNode', readd_otp) - - opts = {} - opts['error_msg'] = "unable to re-add %s" % readd_otp - opts['success_msg'] = "re-add %s" % readd_otp - - output_result = rest.restCmd('POST', - rest_cmds['server-readd'], - self.user, - self.password, - opts) - print output_result - - def getNodeOtps(self, to_eject=[], to_failover=[], to_readd=[]): - """ Convert known nodes into otp node id's. - """ - listservers = ListServers() - known_nodes_list = listservers.getNodes( - listservers.getData(self.server, - self.port, - self.user, - self.password)) - known_otps = [] - eject_otps = [] - failover_otps = [] - readd_otps = [] - - for node in known_nodes_list: - known_otps.append(node['otpNode']) - if node['hostname'] in to_eject: - eject_otps.append(node['otpNode']) - if node['hostname'] in to_failover: - failover_otps.append(node['otpNode']) - if node['hostname'] in to_readd: - readd_otps.append(node['otpNode']) - - return (known_otps, eject_otps, failover_otps, readd_otps) - - def rebalance(self, servers): - known_otps, eject_otps, failover_otps, readd_otps = \ - self.getNodeOtps(to_eject=servers['remove']) - - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - rest.setParam('knownNodes', ','.join(known_otps)) - rest.setParam('ejectedNodes', ','.join(eject_otps)) - - opts = {} - opts['success_msg'] = 'rebalanced cluster' - opts['error_msg'] = 'unable to rebalance cluster' - - output_result = rest.restCmd('POST', - rest_cmds['rebalance'], - self.user, - self.password, - opts) - if self.debug: - print "INFO: rebalance started: %s" % output_result - - sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) - - print "INFO: rebalancing", - - status, error = self.rebalanceStatus(prefix='\n') - while status == 'running': - print ".", - time.sleep(0.5) - try: - status, error = self.rebalanceStatus(prefix='\n') - except socket.error: - time.sleep(2) - status, error = self.rebalanceStatus(prefix='\n') - - if error: - print '\n' + error - sys.exit(1) - else: - print '\n' + output_result - - def rebalanceStatus(self, prefix=''): - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - opts = { 'error_msg':'unable to obtain rebalance status'} - - output_result = rest.restCmd('GET', - rest_cmds['rebalance-status'], - self.user, - self.password, - opts) - - json = rest.getJson(output_result) - if type(json) == type(list()): - print prefix + ("ERROR: %s" % json[0]) - sys.exit(1) - - if 'errorMessage' in json: - error_message = json['errorMessage'] - else: - error_message = None - - return json['status'],error_message - - def rebalanceStop(self): - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - - opts = {} - opts['success_msg'] = 'rebalance cluster stopped' - opts['error_msg'] = 'unable to stop rebalance' - - output_result = rest.restCmd('POST', - rest_cmds['rebalance-stop'], - self.user, - self.password, - opts) - return output_result - - - def failover(self, servers): - known_otps, eject_otps, failover_otps, readd_otps = \ - self.getNodeOtps(to_failover=servers['failover']) - - if len(failover_otps) <= 0: - usage("specified servers are not part of the cluster: %s" % - servers['failover'].keys()) - - for failover_otp in failover_otps: - rest = restclient.RestClient(self.server, - self.port, - {'debug':self.debug}) - rest.setParam('otpNode', failover_otp) - - opts = {} - opts['error_msg'] = "unable to failover %s" % failover_otp - opts['success_msg'] = "failover %s" % failover_otp - - output_result = rest.restCmd('POST', - rest_cmds['failover'], - self.user, - self.password, - opts) - print output_result - diff --git a/node_map.py b/node_map.py new file mode 100755 index 0000000..133aa93 --- /dev/null +++ b/node_map.py @@ -0,0 +1,8 @@ +address_map = { +"10.12.87.41" : "23.20.45.23", +"10.12.95.171" : "107.22.84.123", +"10.194.169.187" : "107.22.70.136", +"10.12.98.26" : "23.20.50.242", +"10.144.64.38" : "50.17.157.98", +"10.12.97.189" : "107.22.11.161", +} diff --git a/node_stats.py b/node_stats.py index 34fc02b..1df1758 100755 --- a/node_stats.py +++ b/node_stats.py @@ -1,30 +1,83 @@ import stats_buffer -import util - -class ExecSQL: - def run(self, accessor, stmt): - result = accessor.execute(stmt) - return result[0] +import util_cli as util class NodeList: def run(self, accessor): result = [] - nodelist = accessor.execute("SELECT host, port, version, os, status FROM ServerNode", True) - for node in nodelist: - result.append({"ip": node[0], "port": node[1], "version" :node[2], "os": node[3], "status" : node[4]}) - + for node, node_info in stats_buffer.nodes.iteritems(): + result.append({"ip": node, "port": node_info['port'], "version" :node_info['version'], "os": node_info['os'], "status" :node_info['status']}) + return result + +class NumNodes: + def run(self, accessor): + result = [] + result.append(len(stats_buffer.nodes)) + return result + +class NumDownNodes: + def run(self, accessor): + result = [] + result.append(len(filter(lambda (a, b): b["status"]=="down", stats_buffer.nodes.items()))) + return result + +class NumWarmupNodes: + def run(self, accessor): + result = [] + result.append(len(filter(lambda (a, b): b["status"]=="warmup", stats_buffer.nodes.items()))) + return result + +class NumFailOverNodes: + def run(self, accessor): + result = [] + result.append(len(filter(lambda (a, b): b["clusterMembership"]!="active", stats_buffer.nodes.items()))) return result class BucketList: def run(self, accessor): result = [] - bucketlist = accessor.execute("SELECT name FROM Bucket", True) - for bucket in bucketlist: - result.append({"name": bucket[0]}) - + for bucket in stats_buffer.bucket_info.keys(): + result.append({"name": bucket}) + return result +class NodeStorageStats: + def run(self, accessor): + result = [] + for node, values in stats_buffer.nodes.iteritems(): + if values["StorageInfo"].has_key("hdd"): + result.append({"ip": values["host"], + "port": values["port"], + "type" : "hdd", + "free": util.size_label(values["StorageInfo"]["hdd"]["free"]), + "quotaTotal" : util.size_label(values["StorageInfo"]["hdd"]["quotaTotal"]), + "used" : util.size_label(values["StorageInfo"]["hdd"]["used"]), + "usedByData" : util.size_label(values["StorageInfo"]["hdd"]["usedByData"]), + "total" : util.size_label(values["StorageInfo"]["hdd"]["total"])}) + if values["StorageInfo"].has_key("ram"): + result.append({"ip": values["host"], + "port": values["port"], + "type" : "ram", + "quotaTotal" : util.size_label(values["StorageInfo"]["ram"]["quotaTotal"]), + "used" : util.size_label(values["StorageInfo"]["ram"]["used"]), + "usedByData" : util.size_label(values["StorageInfo"]["ram"]["usedByData"]), + "total" : util.size_label(values["StorageInfo"]["ram"]["total"])}) + return result + +class NodeSystemStats: + def run(self, accessor): + result = [] + for node, values in stats_buffer.nodes.iteritems(): + result.append({"ip": values["host"], + "port": values["port"], + "cpuUtilization" :util.pretty_float(values["systemStats"]["cpu_utilization_rate"]), + "swapTotal": util.size_label(values["systemStats"]["swap_total"]), + "swapUsed" : util.size_label(values["systemStats"]["swap_used"]), + "currentItems" : values["systemStats"]["currentItems"], + "currentItemsTotal" : values["systemStats"]["currentItemsTotal"], + "replicaCurrentItems" : values["systemStats"]["replicaCurrentItems"]}) + + return result class ConnectionTrend: def run(self, accessor): result = {} @@ -57,42 +110,57 @@ def run(self, accessor): result[bucket] = trend return result +class NodePerformanceStats: + def run(self, accessor): + result = {} + for bucket, bucket_stats in stats_buffer.node_stats.iteritems(): + stats = [] + for node, stats_info in bucket_stats.iteritems(): + + for key, value in stats_info.iteritems(): + if key.find(accessor["counter"]) >= 0: + if accessor.has_key("threshold"): + if int(value) > accessor["threshold"]: + stats.append((node, (key, value))) + else: + if accessor.has_key("unit"): + if accessor["unit"] == "time": + stats.append((node, util.time_label(value))) + elif accessor["unit"] == "size": + stats.append((node, util.size_label(int(value)))) + else: + stats.append((node, (key,value))) + + result[bucket] = stats + return result + NodeCapsule = [ {"name" : "NodeStatus", "ingredients" : [ { "name" : "nodeList", "description" : "Node list", - "type" : "pythonSQL", "code" : "NodeList", }, { "name" : "numNodes", "description" : "Number of Nodes", - "type" : "SQL", - "stmt" : "SELECT count(*) FROM ServerNode", - "code" : "ExecSQL", + "code" : "NumNodes", }, { "name" : "numDownNodes", "description" : "Number of Down Nodes", - "type" : "SQL", - "stmt" : "SELECT count(*) FROM ServerNode WHERE status='down'", - "code" : "ExecSQL", + "code" : "NumDownNodes", }, { "name" : "numWarmupNodes", "description" : "Number of Warmup Nodes", - "type" : "SQL", - "stmt" : "SELECT count(*) FROM ServerNode WHERE status='warmup'", - "code" : "ExecSQL", + "code" : "NumWarmupNodes", }, { "name" : "numFailedOverNodes", "description" : "Number of Nodes failed over", - "type" : "SQL", - "stmt" : "SELECT count(*) FROM ServerNode WHERE clusterMembership != 'active'", - "code" : "ExecSQL", + "code" : "NumFailOverNodes", }, ], "clusterwise" : False, @@ -106,14 +174,15 @@ def run(self, accessor): "name" : "connectionTrend", "description" : "Connection Trend", "counter" : "curr_connections", - "type" : "python", "scale" : "minute", "code" : "ConnectionTrend", "threshold" : { "high" : 10000, }, }, - ] + ], + "nodewise" : True, + "perNode" : True, }, {"name" : "OOMError", "ingredients" : [ @@ -121,7 +190,6 @@ def run(self, accessor): "name" : "oomErrors", "description" : "OOM Errors", "counter" : "ep_oom_errors", - "type" : "python", "scale" : "hour", "code" : "CalcTrend", }, @@ -129,36 +197,186 @@ def run(self, accessor): "name" : "tempOomErrors", "description" : "Temporary OOM Errors", "counter" : "ep_tmp_oom_errors", - "type" : "python", "scale" : "hour", "code" : "CalcTrend", }, ] }, - {"name" : "Overhead", + {"name" : "bucketList", + "ingredients" : [ + { + "name" : "bucketList", + "description" : "Bucket list", + "code" : "BucketList", + }, + ], + "nodewise" : True, + }, + {"name" : "nodeStorageStats", + "ingredients" : [ + { + "name" : "nodeStorageStats", + "description" : "Node storage stats", + "code" : "NodeStorageStats", + }, + ], + "nodewise" : True, + }, + {"name" : "nodeSystemStats", + "ingredients" : [ + { + "name" : "nodeSystemStats", + "description" : "Node system stats", + "code" : "NodeSystemStats", + }, + ], + "nodewise" : True, + }, + {"name" : "tapPerformance", + "ingredients" : [ + { + "name" : "backfillRemaining", + "description" : "Number of backfill remaining", + "counter" : "ep_tap_queue_backfillremaining", + "code" : "NodePerformanceStats", + }, + { + "name" : "tapNack", + "description" : "Number of nacks", + "counter" : "num_tap_nack", + "code" : "NodePerformanceStats", + }, + { + "name" : "tapIdle", + "description" : "Idle tap streams", + "counter" : "idle", + "code" : "NodePerformanceStats", + }, + ], + "perBucket" : True, + }, + {"name" : "checkpointPerformance", + "ingredients" : [ + { + "name" : "openCheckPoint", + "description" : "Items for open checkpoints", + "counter" : "num_checkpoint_items", + "code" : "NodePerformanceStats", + "threshold" : 10000, + }, + ], + "perBucket" : True, + }, + {"name" : "diskPerformance", + "ingredients" : [ + { + "name" : "diskCommit", + "description" : "Averge disk commit time", + "counter" : "disk_commit", + "code" : "NodePerformanceStats", + "unit" : "time", + }, + { + "name" : "diskUpdate", + "description" : "Averge disk update time", + "counter" : "disk_update", + "code" : "NodePerformanceStats", + "unit" : "time", + }, + { + "name" : "diskInsert", + "description" : "Averge disk insert time", + "counter" : "disk_insert", + "code" : "NodePerformanceStats", + "unit" : "time", + }, + { + "name" : "diskDelete", + "description" : "Averge disk delete time", + "counter" : "disk_del", + "code" : "NodePerformanceStats", + "unit" : "time", + }, + ], + "perBucket" : True, + }, + {"name" : "AverageDocumentSize", "ingredients" : [ + { + "name" : "averageDocumentSize", + "description" : "Average Document Size", + "counter" : "item_alloc_sizes", + "code" : "NodePerformanceStats", + "unit" : "size", + }, + ], + "perBucket" : True, + }, + {"name" : "MemoryUsage", + "ingredients" : [ + { + "name" : "totalMemoryUsage", + "description" : "Total memory usage", + "counter" : "total_heap_bytes", + "code" : "NodePerformanceStats", + "unit" : "size", + }, + { + "name" : "totalFragmentation", + "description" : "Total memory fragmentation", + "counter" : "total_fragmentation_bytes", + "code" : "NodePerformanceStats", + "unit" : "size", + }, + { + "name" : "totalInternalMemory", + "description" : "Total internal memory usage", + "counter" : "mem_used", + "code" : "NodePerformanceStats", + "unit" : "size", + }, { "name" : "overhead", - "description" : "Overhead", + "description" : "Memory overhead", "counter" : "ep_overhead", - "type" : "python", "scale" : "hour", - "code" : "CalcTrend", + "code" : "NodePerformanceStats", + "unit" : "size", }, - ] + ], + "perBucket" : True, }, - {"name" : "bucketList", + {"name" : "EPEnginePerformance", "ingredients" : [ { - "name" : "bucketList", - "description" : "Bucket list", - "type" : "pythonSQL", - "code" : "BucketList", + "name" : "flusherState", + "description" : "Engine flusher state", + "counter" : "ep_flusher_state", + "code" : "NodePerformanceStats", + }, + { + "name" : "flusherCompleted", + "description" : "Flusher completed", + "counter" : "ep_flusher_num_completed", + "code" : "NodePerformanceStats", + }, + { + "name" : "avgItemLoadTime", + "description" : "Average item loaded time", + "counter" : "ep_bg_load_avg", + "code" : "NodePerformanceStats", + "unit" : "time" + }, + { + "name" : "avgItemWaitTime", + "description" : "Averge item waited time", + "counter" : "ep_bg_wait_avg", + "code" : "NodePerformanceStats", + "unit" : "time", }, ], - "nodewise" : True, - }, - + "perNode" : True, + }, ] diff --git a/report-htm.tmpl b/report-htm.tmpl index 1100cfd..1c1090e 100755 --- a/report-htm.tmpl +++ b/report-htm.tmpl @@ -130,27 +130,41 @@ body {margin-top: 0px;margin-left: 0px;}

Section 1 - Couchbase – Alerts

Cluster-wide metrics

#for $counter, $error_values in $indicator_error.iteritems(): -

1. $error_values["description"]

+

. $error_values["description"]

- + #for $err_val in $error_values["value"] -($err_val["node"], $err_val["value"]) -#end for - + + + + - - + + + +#end for + + + + + + + + + + + + + +
Symptomo +  
 Node:$err_val["node"]
 Disk write queue has reached Value:$err_val["value"]
Causes -$error_values["cause"]
Impact -$error_values["impact"]
Action -$error_values["action"]
-

oDrain rate has slowed down to

-

Causes -

-

Impact -

-

Action -

#end for

Section 2 - Couchbase Cluster Overview

Node list

@@ -168,8 +182,10 @@ body {margin-top: 0px;margin-left: 0px;} #end for -

Total number of nodes in the cluster: $node_list["numNodes"]["value"] o Number of nodes down: $node_list["numDownNodes"]["value"]

-

o Number of nodes warming up: $node_list["numWarmupNodes"]["value"] o Number of nodes failed over: $node_list["numFailedOverNodes"]["value"]

+

Total number of nodes in the cluster: $node_list["numNodes"]["value"]

+

Number of down nodes: $node_list["numDownNodes"]["value"]

+

Number of warmingup nodes: $node_list["numWarmupNodes"]["value"]

+

Number of failed over nodes: $node_list["numFailedOverNodes"]["value"]

@@ -205,7 +221,11 @@ body {margin-top: 0px;margin-left: 0px;}   +#if $bucket_node_status[$bucket].has_key($node) + Status – Error +#else Status – OK +#end if   @@ -224,15 +244,41 @@ body {margin-top: 0px;margin-left: 0px;}

Section 3 - Couchbase – Warning Indicators

Cluster-wide metrics

#for $counter, $warn_values in $indicator_warn.iteritems(): -

1.$warn_values["description"]

-

Symptom - +

. $warn_values["description"]

+ + + + + + #for $warn_val in $warn_values["value"] -($warn_val["node"], $warn_val["value"]) + + + + + + + + + + #end for -

-

Cause -

-

Impact -

-

Action -

+ + + + + + + + + + + + + + + +
Symptomo 
 Node:$warn_val["node"]
 Value:$warn_val["value"]
Causes -$warn_values["cause"]
Impact -$warn_values["impact"]
Action -$warn_values["action"]
#end for
diff --git a/simplejson/LICENSE.txt b/simplejson/LICENSE.txt new file mode 100755 index 0000000..ad95f29 --- /dev/null +++ b/simplejson/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2006 Bob Ippolito + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100755 index 0000000..d5b4d39 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100755 index 0000000..c6cccc8 Binary files /dev/null and b/simplejson/__init__.pyc differ diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100755 index 0000000..d921ce0 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100755 index 0000000..4937998 Binary files /dev/null and b/simplejson/decoder.pyc differ diff --git a/simplejson/encoder.py b/simplejson/encoder.py new file mode 100755 index 0000000..cf58290 --- /dev/null +++ b/simplejson/encoder.py @@ -0,0 +1,440 @@ +"""Implementation of JSONEncoder +""" +import re + +try: + from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii +except ImportError: + c_encode_basestring_ascii = None +try: + from simplejson._speedups import make_encoder as c_make_encoder +except ImportError: + c_make_encoder = None + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +# Assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') +FLOAT_REPR = repr + +def encode_basestring(s): + """Return a JSON representation of a Python string + + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + + +def py_encode_basestring_ascii(s): + """Return an ASCII-only JSON representation of a Python string + + """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + #return '\\u{0:04x}'.format(n) + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100755 index 0000000..c5eedc2 Binary files /dev/null and b/simplejson/encoder.pyc differ diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100755 index 0000000..adbc6ec --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100755 index 0000000..013931e Binary files /dev/null and b/simplejson/scanner.pyc differ diff --git a/stats_buffer.py b/stats_buffer.py index 08dc11d..ee3f1a7 100755 --- a/stats_buffer.py +++ b/stats_buffer.py @@ -1,10 +1,10 @@ +nodes = {} +node_stats = {} buckets_summary = {} -stats_summary = { - #'vb_active_num' : {}, - #'vb_replica_num' : {}, -} +stats_summary = {} +bucket_info = {} buckets = {} stats = { "minute" : { @@ -18,28 +18,18 @@ 'vb_active_queue_drain' : {}, 'vb_replica_queue_drain' : {}, 'disk_write_queue' : {}, - }, + }, "hour" : { 'disk_write_queue' : {}, 'ep_cache_miss_rate' : {}, 'ep_tap_total_total_backlog_size' : { }, 'ep_oom_errors' : {}, 'ep_tmp_oom_errors' : {}, - 'ep_overhead' : {}, 'vb_active_num' : {}, 'vb_replica_num' : {}, "mem_used" : {}, - }, + }, "day" : { 'curr_items' : {}, }, -} - -def retrieveSummaryStats(bucket, cmd): - samples = buckets_summary[bucket]["op"]["samples"] - for sample_key in samples.keys(): - if cmd == sample_key: - total_samples = buckets_summary[bucket]["op"]["samplesCount"] - return (total_samples, samples[cmd]) - print "Unknown stats:", cmd - return (0, []) +} \ No newline at end of file diff --git a/usage.py b/usage.py index b2dd8dd..73d2c8b 100755 --- a/usage.py +++ b/usage.py @@ -83,7 +83,7 @@ def usage(error_msg=''): bucket-* OPTIONS: --bucket=BUCKETNAME bucket to act on - --bucket-type=TYPE memcached or membase + --bucket-type=TYPE memcached or couchbase --bucket-port=PORT supports ASCII protocol and is auth-less --bucket-password=PASSWORD standard port, exclusive with bucket-port --bucket-ramsize=RAMSIZEMB ram quota in MB @@ -132,10 +132,10 @@ def usage(error_msg=''): List buckets in a cluster: couchbase-cli bucket-list -c 192.168.0.1:8091 - Create a new dedicated port membase bucket: + Create a new dedicated port couchbase bucket: couchbase-cli bucket-create -c 192.168.0.1:8091 \\ --bucket=test_bucket \\ - --bucket-type=membase \\ + --bucket-type=couchbase \\ --bucket-port=11222 \\ --bucket-ramsize=200 \\ --bucket-replica=1 diff --git a/util.py b/util.py deleted file mode 100755 index 9553ba2..0000000 --- a/util.py +++ /dev/null @@ -1,88 +0,0 @@ -import json -from math import sqrt - -def linreg(X, Y): - """ - Summary - Linear regression of y = ax + b - Usage - real, real, real = linreg(list, list) - Returns coefficients to the regression line "y=ax+b" from x[] and y[], and R^2 Value - """ - if len(X) != len(Y): raise ValueError, 'unequal length' - N = len(X) - Sx = Sy = Sxx = Syy = Sxy = 0.0 - for x, y in map(None, X, Y): - Sx = Sx + x - Sy = Sy + y - Sxx = Sxx + x*x - Syy = Syy + y*y - Sxy = Sxy + x*y - det = Sxx * N - Sx * Sx - if det == 0: - return 0, 0 - else: - a, b = (Sxy * N - Sy * Sx)/det, (Sxx * Sy - Sx * Sxy)/det - return a, b - -def two_pass_variance(data): - n = 0 - sum1 = 0 - sum2 = 0 - - for x in data: - n = n + 1 - sum1 = sum1 + x - - mean = sum1/n - - for x in data: - sum2 = sum2 + (x - mean)*(x - mean) - if n <= 1: - return 0 - - variance = sum2/(n - 1) - return variance - -def pretty_float(number, precision=2): - return '%.*f' % (precision, number) - -def pretty_print(obj): - print json.dumps(obj, indent=4) - -def humanize_bytes(bytes, precision=1): - """Return a humanized string representation of a number of bytes. - - Assumes `from __future__ import division`. - - >>> humanize_bytes(1) - '1 byte' - >>> humanize_bytes(1024) - '1.0 kB' - >>> humanize_bytes(1024*123) - '123.0 kB' - >>> humanize_bytes(1024*12342) - '12.1 MB' - >>> humanize_bytes(1024*12342,2) - '12.05 MB' - >>> humanize_bytes(1024*1234,2) - '1.21 MB' - >>> humanize_bytes(1024*1234*1111,2) - '1.31 GB' - >>> humanize_bytes(1024*1234*1111,1) - '1.3 GB' - """ - abbrevs = ( - (1<<50L, 'PB'), - (1<<40L, 'TB'), - (1<<30L, 'GB'), - (1<<20L, 'MB'), - (1<<10L, 'kB'), - (1, 'bytes') - ) - if bytes == 1: - return '1 byte' - for factor, suffix in abbrevs: - if bytes >= factor: - break - return '%.*f %s' % (precision, bytes / factor, suffix) diff --git a/util_cli.py b/util_cli.py index 2680945..e47455b 100755 --- a/util_cli.py +++ b/util_cli.py @@ -1,6 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import json +import math +import itertools + +BIG_VALUE = 2 ** 60 +SMALL_VALUE = - (2 ** 60) + def hostport(hoststring, default_port=8091): """ finds the host and port given a host:port string """ try: @@ -11,3 +18,83 @@ def hostport(hoststring, default_port=8091): port = default_port return (host, port) + +def time_label(s): + # -(2**64) -> '-inf' + # 2**64 -> 'inf' + # 0 -> '0' + # 4 -> '4us' + # 838384 -> '838ms' + # 8283852 -> '8s' + if s > BIG_VALUE: + return 'inf' + elif s < SMALL_VALUE: + return '-inf' + elif s == 0: + return '0' + product = 1 + sizes = (('us', 1), ('ms', 1000), ('s', 1000), ('m', 60)) + sizeMap = [] + for l,sz in sizes: + product = sz * product + sizeMap.insert(0, (l, product)) + lbl, factor = itertools.dropwhile(lambda x: x[1] > s, sizeMap).next() + return "%d %s" % (s / factor, lbl) + +def size_label(s): + if type(s) in (int, long, float, complex) : + if s == 0: + return "0" + sizes=['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] + e = math.floor(math.log(abs(s), 1024)) + suffix = sizes[int(e)] + return "%d %s" % (s/(1024 ** math.floor(e)), suffix) + else: + return s + +def linreg(X, Y): + """ + Summary + Linear regression of y = ax + b + Usage + real, real, real = linreg(list, list) + Returns coefficients to the regression line "y=ax+b" from x[] and y[], and R^2 Value + """ + if len(X) != len(Y): raise ValueError, 'unequal length' + N = len(X) + Sx = Sy = Sxx = Syy = Sxy = 0.0 + for x, y in map(None, X, Y): + Sx = Sx + x + Sy = Sy + y + Sxx = Sxx + x*x + Syy = Syy + y*y + Sxy = Sxy + x*y + det = Sxx * N - Sx * Sx + if det == 0: + return 0, 0 + else: + a, b = (Sxy * N - Sy * Sx)/det, (Sxx * Sy - Sx * Sxy)/det + return a, b + +def two_pass_variance(data): + n = 0 + sum1 = 0 + sum2 = 0 + + for x in data: + n = n + 1 + sum1 = sum1 + x + + mean = sum1/n + for x in data: + sum2 = sum2 + (x - mean)*(x - mean) + if n <= 1: + return 0 + variance = sum2/(n - 1) + return variance + +def pretty_float(number, precision=2): + return '%.*f' % (precision, number) + +def pretty_print(obj): + return json.dumps(obj, indent=4, sort_keys=True)