From f312ee316acd20201edd5690c4d896105efefc86 Mon Sep 17 00:00:00 2001 From: "Matthew F. Dennis" Date: Sun, 1 Jul 2012 22:02:18 -0500 Subject: [PATCH 1/2] weblauncher init --- README.md | 10 ++ cassandralauncher/cassandralauncher.py | 67 ++++---- ...cher.conf => clusterlauncher.conf.example} | 0 cassandralauncher/common.py | 15 +- cassandralauncher/demoservice.py | 42 +++++ cassandralauncher/ec2.py | 3 +- demoservice/README.md | 13 -- demoservice/demoservice.py | 74 --------- scripts/demoservice | 4 + weblauncher/css/weblauncher.css | 72 +++++++++ weblauncher/genshi/index.html | 42 +++++ weblauncher/genshi/ping.html | 30 ++++ weblauncher/test.py | 14 ++ weblauncher/weblauncher.py | 148 ++++++++++++++++++ 14 files changed, 406 insertions(+), 128 deletions(-) rename cassandralauncher/{clusterlauncher.conf => clusterlauncher.conf.example} (100%) create mode 100755 cassandralauncher/demoservice.py delete mode 100644 demoservice/README.md delete mode 100755 demoservice/demoservice.py create mode 100755 scripts/demoservice create mode 100644 weblauncher/css/weblauncher.css create mode 100644 weblauncher/genshi/index.html create mode 100644 weblauncher/genshi/ping.html create mode 100755 weblauncher/test.py create mode 100755 weblauncher/weblauncher.py diff --git a/README.md b/README.md index 9208b39..2bb2c51 100644 --- a/README.md +++ b/README.md @@ -72,3 +72,13 @@ My cluster is not done launching one (or several) of my nodes. What did I do wro Nothing. EC2 and Rackspace do this from time to time. You can either continue on to do basic testing, or terminate this cluster and try again. Using EC2 and Rackspace off it's peak hours helps in this scenario, in general. + +## weblauncher + +the weblauncher directory contains a trivial web front end for the launcher. weblauncher/weblauncher.py --help will give details. If a results_directory is specified, all cluster launches are recorded in a format that the demoservice can pick up to automatically terminate the cluster after a specified TTL. + +## demoservice + +the scripts/demoservice can be copied to a crond job to be run every few minutes. If you specifiy the same results directory (defaults to /tmp/wl) used by the weblauncher (or from the command line) the demoservice script will terminate clusters whose TTL has expired. + + diff --git a/cassandralauncher/cassandralauncher.py b/cassandralauncher/cassandralauncher.py index 9b1356d..37a666e 100755 --- a/cassandralauncher/cassandralauncher.py +++ b/cassandralauncher/cassandralauncher.py @@ -9,6 +9,7 @@ import tempfile import time import urllib2 +import uuid import ec2 import common @@ -359,26 +360,6 @@ def install_opsc_agents(user): ################################# -################################# -# Log code for private stats - -def running_log(reservation, demotime): - """Logs usage data for personal stats.""" - - loginfo = [ - 'Running' if config.get('Cassandra', 'demo') == 'True' else 'Ignore', - config.get('Shared', 'handle'), - str(demotime), - str(time.time()), - str(reservation.id) - ] - logline = ",".join(loginfo) + '\n' - - with open('running.log', 'a') as f: - f.write(logline) - -################################# - ################################# # Argument parsing @@ -466,8 +447,8 @@ def running_log(reservation, demotime): }, 'demotime': { 'Section': 'Cassandra', - 'Prompt': 'Time (in hours) for the cluster to live', - 'Help': 'For use with DemoService' + 'Prompt': 'Time (in seconds) for the cluster to live', + 'Help': 'For use with demoservice' }, 'instance_type': { 'Section': 'EC2', @@ -490,7 +471,18 @@ def running_log(reservation, demotime): 'Prompt': 'QA', 'Action': 'store_true', 'Help': 'Upload QA scripts.' + }, + 'result_directory':{ + 'Section': 'CLI', + 'Prompt': 'NoPrompts', + 'Help': 'log results of launch to a file in the specified directory' + }, + 'launch_id':{ + 'Section': 'CLI', + 'Prompt': 'NoPrompts', + 'Help': 'use specified id in combination with result_directory' } + } def type_checker(option, read_option, type_check, passive=False): @@ -670,11 +662,11 @@ def main(): print # Included for the experimental DemoService that requires demoservice.py to always be running - demotime = -1 + demotime = 0 if config.get('Cassandra', 'demo') == 'True': print "Your configuration file is set to launch a demo cluster for a specified time." demotime = check_cascading_options('demotime', float) - print "If the demo service is running, this cluster will live for %s hour(s)." % demotime + print "If demosercie is running, this cluster will live for %s seconds(s)." % demotime print if check_cascading_options('installopscenter', optional=True) == 'False': @@ -709,12 +701,33 @@ def main(): private_ips, public_ips, reservation = clusterinfo # Log clusterinfo - running_log(reservation, demotime) + if check_cascading_options('result_directory', optional=True): + result_directory = check_cascading_options('result_directory') + user_data += ' --result_directory %s' % result_directory + + launch_id = str(uuid.uuid4()) + if check_cascading_options('launch_id', optional=True): + launch_id = check_cascading_options('launch_id') + user_data += ' --launch_id %s' % launch_id + + if not os.path.exists(result_directory): + os.mkdir(result_directory) + + tmpfile = os.path.join(result_directory, "%s.tmp" % launch_id) + dstfile = os.path.join(result_directory, "%s.results" % launch_id) + with open(tmpfile, 'w') as f: + f.write("reservation_id=%s\n" % reservation.id) + f.write("ttl_seconds=%s\n" % demotime) + f.write("launch_time=%s\n" % time.time()) + if check_cascading_options('installopscenter', optional=True) != 'False': + f.write("opsc_ip=%s\n" % public_ips[0]) + f.write("opsc_port=%s\n" % opscenterinterface) + os.rename(tmpfile, dstfile) if check_cascading_options('installopscenter', optional=True) != 'False': # Print OpsCenter url - print "OpsCenter Address:" - print "http://%s:%s" % (public_ips[0], opscenterinterface) + url = "http://%s:%s" % (public_ips[0], opscenterinterface) + print "OpsCenter URL: %s" % url print "Note: You must wait 60 seconds after Cassandra becomes active to access OpsCenter." print diff --git a/cassandralauncher/clusterlauncher.conf b/cassandralauncher/clusterlauncher.conf.example similarity index 100% rename from cassandralauncher/clusterlauncher.conf rename to cassandralauncher/clusterlauncher.conf.example diff --git a/cassandralauncher/common.py b/cassandralauncher/common.py index 7433040..8e5fbba 100644 --- a/cassandralauncher/common.py +++ b/cassandralauncher/common.py @@ -28,19 +28,8 @@ def header(): configfile = os.path.join(os.path.expanduser('~'), '.clusterlauncher.conf') if not os.path.exists(configfile): # Look for the configuration file in /etc/clusterlauncher - defaultfile = os.path.join('/etc', 'cassandralauncher', 'clusterlauncher.conf') - configfile = os.path.join(os.path.expanduser('~'), '.clusterlauncher.conf') - shutil.copyfile(defaultfile, configfile) - - # Exit the program to alert the user that the conf file must be properly set with authentications - # before continuing - sys.stderr.write("A copy of the default configuration file located at:\n") - sys.stderr.write(' %s\n' % defaultfile) - sys.stderr.write("was now copied to:\n") - sys.stderr.write(' %s\n' % configfile) - sys.stderr.write("Please ensure that all default settings are correct and filled in before continuing.\n") - sys.exit(1) - + configfile = os.path.join('/etc', 'cassandralauncher', 'clusterlauncher.conf') + sys.stdout.write("currently using %s for config, copy to ~/.clusterlauncher.conf to override\n") if not os.path.exists(configfile): # Exit since we still have not found the configuration file sys.stderr.write("Please setup your authentication configurations. Order of importance:\n") diff --git a/cassandralauncher/demoservice.py b/cassandralauncher/demoservice.py new file mode 100755 index 0000000..74fffa2 --- /dev/null +++ b/cassandralauncher/demoservice.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import os +import sys +import imp +import boto +import time +from glob import glob + +def checkAndCloseExpiredInstances(results_directory): + #common = imp.load_module('common', *imp.find_module('common', [os.path.join(os.path.dirname(__file__), '..', 'cassandralauncher')])) + import common + config, KEY_PAIR, PEM_HOME, HOST_FILE, PEM_FILE = common.header() + conn = boto.connect_ec2(config.get('EC2', 'aws_access_key_id'), config.get('EC2', 'aws_secret_access_key')) + reservations = dict(map(lambda x: (x.id, x), conn.get_all_instances())) + result_files = glob(os.path.join(results_directory, '*.results')) + + print 'checking %s' % result_files + for result_file in result_files: + results = None + with open(result_file, 'r') as f: + results = dict(map(lambda l: l.strip().split('='), f.readlines())) + + if time.time() - float(results['launch_time']) > float(results['ttl_seconds']): + res = reservations.get(results['reservation_id']) + if res != None: + print 'killing %s' % res.id + instances = [i.id for i in res.instances if i.state != 'terminated'] + if len(instances) > 0: + conn.terminate_instances(instances) + else: + os.rename(result_file, '%s.%s' % (result_file, 'done')) + +def cli_main(): + from optparse import OptionParser + parser = OptionParser() + parser.add_option('-r', '--results_directory', dest='results_directory', help='direcotry to search for running clusters', metavar='RESULTS_DIRECTORY', default='/tmp/wl') + (options, args) = parser.parse_args() + checkAndCloseExpiredInstances(options.results_directory) + +if __name__ == '__main__': + cli_main() diff --git a/cassandralauncher/ec2.py b/cassandralauncher/ec2.py index 0837031..4358787 100644 --- a/cassandralauncher/ec2.py +++ b/cassandralauncher/ec2.py @@ -170,7 +170,7 @@ def create_cluster(aws_access_key_id, aws_secret_access_key, reservation_size, i try: # Create the EC2 cluster - print 'Launching cluster...' + print 'Launching cluster ...' start_time = time.time() try: reservation = conn.run_instances(image, @@ -180,6 +180,7 @@ def create_cluster(aws_access_key_id, aws_secret_access_key, reservation_size, i key_name=key_pair, placement=placement, security_groups=['DataStax'], user_data=user_data) + except boto.exception.EC2ResponseError: print_boto_error() diff --git a/demoservice/README.md b/demoservice/README.md deleted file mode 100644 index f0ba525..0000000 --- a/demoservice/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Cassandra Launcher Demo Service - -Allows for a service to run over ./running.log from the instantiation directory to ensure that clusters running too long are terminated. - -For use in conjuncture with demo=True in clusterlauncher.conf under [Cassandra]. - -We do _not_ guarantee any successful operations with this program and should not be held liable for over-running clusters. This is merely an experimental tool which works well in our enviornment. Nothing more. - -## Setup - -Run: - - nohup ./demoservice.py & diff --git a/demoservice/demoservice.py b/demoservice/demoservice.py deleted file mode 100755 index 2f4bfda..0000000 --- a/demoservice/demoservice.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -import boto -import common -import time -from decimal import Decimal, ROUND_UP - -config, KEY_PAIR, PEM_HOME, HOST_FILE, PEM_FILE = common.header() -sleepTime = 1 * 60 # 1 minute -conn = None -runningFile = 'running.log' - - -def updateLog(oldline, newline): - with open(runningFile, 'r') as f: - runninglog = f.read() - - runninglog = runninglog.replace(oldline, newline) - - with open(runningFile, 'w') as f: - f.write(runninglog) - -def getReservationByID(aws_access_key_id, aws_secret_access_key, reservationId): - global conn - if not conn: - conn = boto.connect_ec2(aws_access_key_id, aws_secret_access_key) - reservations = conn.get_all_instances() - - for reservation in reservations: - if reservationId == reservation.id: - return reservation - -def checkAndCloseExpiredInstances(): - try: - with open(runningFile, 'r') as f: - runninglog = f.read().strip().split('\n') - except: - # Wait until runninglog is created - return - - # Wait until the log is populated - if not len(runninglog): - return - - for line in runninglog: - status, user, ttl, birthstamp, reservationId = line.split(',') - ttl = float(Decimal(ttl).quantize(Decimal('1.'), rounding=ROUND_UP)) - ttl = ttl * 60 * 60 - (3 * 60) - birthstamp = float(birthstamp) - - if status == 'Running': - if time.time() > birthstamp + ttl: - # Find and create a list for all instances under this reservation - reservation = getReservationByID(config.get('EC2', 'aws_access_key_id'), - config.get('EC2', 'aws_secret_access_key'), reservationId) - instanceList = [] - for instance in reservation.instances: - instanceList.append(instance.id) - - # Terminate these instances - conn.terminate_instances(instanceList) - - # Update log from running a termination on these instances - updateLog(line, line.replace('Running', 'Stopped')) -try: - while True: - checkAndCloseExpiredInstances() - time.sleep(sleepTime) -except: - print "DataStax Cluster Launcher Demo Service exiting..." - import traceback - traceback.print_exc() - with open('error.log', 'w') as f: - f.write(traceback.format_exc()) diff --git a/scripts/demoservice b/scripts/demoservice new file mode 100755 index 0000000..ad00dd6 --- /dev/null +++ b/scripts/demoservice @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +set -e +/path/to/demoservice.py -r /tmp/wl diff --git a/weblauncher/css/weblauncher.css b/weblauncher/css/weblauncher.css new file mode 100644 index 0000000..477e1e4 --- /dev/null +++ b/weblauncher/css/weblauncher.css @@ -0,0 +1,72 @@ +body{ + font-family:"Lucida Grande", "Lucida Sans Unicode", Verdana, Arial, Helvetica, sans-serif; + font-size:12px; +} + +p, h1, form, button{border:0; margin:0; padding:0;} + +.spacer{clear:both; height:1px;} + +.form{ + margin:0 auto; + width:400px; + padding:14px; +} + +#stylized{ + border:solid 2px #b7ddf2; + background:#ebf4fb; +} + +#stylized h1 { + font-size:14px; + font-weight:bold; + margin-bottom:8px; +} + +#stylized p{ + font-size:11px; + color:#666666; + margin-bottom:20px; + border-bottom:solid 1px #b7ddf2; + padding-bottom:10px; +} + +#stylized label{ + display:block; + font-weight:bold; + text-align:right; + width:140px; + float:left; +} + +#stylized .small{ + color:#666666; + display:block; + font-size:11px; + font-weight:normal; + text-align:right; + width:140px; +} + +#stylized input{ + float:left; + font-size:12px; + padding:4px 2px; + border:solid 1px #aacfe4; + width:200px; + margin:2px 0 20px 10px; +} + +#stylized button{ + clear:both; + margin-left:150px; + width:125px; + height:31px; + background:#666666; + text-align:center; + line-height:31px; + color:#FFFFFF; + font-size:11px; + font-weight:bold; +} diff --git a/weblauncher/genshi/index.html b/weblauncher/genshi/index.html new file mode 100644 index 0000000..b785e49 --- /dev/null +++ b/weblauncher/genshi/index.html @@ -0,0 +1,42 @@ + + + + $title + + + + +
+
+

DSE Launcher

+ + + + + + + + + + + + + + +
+ +
+
+ + + + diff --git a/weblauncher/genshi/ping.html b/weblauncher/genshi/ping.html new file mode 100644 index 0000000..db7ffaa --- /dev/null +++ b/weblauncher/genshi/ping.html @@ -0,0 +1,30 @@ + + + $title + + + + + +
+
+

stdout ($asof)

+
+
+$stdout +
+
+ +
+
+

stderr ($asof)

+
+
+$stderr +
+
+ + + diff --git a/weblauncher/test.py b/weblauncher/test.py new file mode 100755 index 0000000..d7a8608 --- /dev/null +++ b/weblauncher/test.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python + +import sys +import time + +print "starting ..." + +for i in range(20): + print "sleeping %s ... " % i + #sys.stdout.flush() + #sys.stderr.flush() + time.sleep(1) + +print "done" diff --git a/weblauncher/weblauncher.py b/weblauncher/weblauncher.py new file mode 100755 index 0000000..1bccb6c --- /dev/null +++ b/weblauncher/weblauncher.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python + +import os +import uuid +import subprocess +import cherrypy +from datetime import datetime +from genshi.template import TemplateLoader + +loader = TemplateLoader( + os.path.join(os.path.dirname(__file__), 'genshi'), + auto_reload=True +) + +class Launcher(object): + def __init__(self, result_directory): + self.result_directory = result_directory + + def index(self): + return loader.load('index.html').generate(title='weblauncher').render('html', doctype='html') + + def launch(self, c_nodes, h_nodes, s_nodes, ttl): + req_uuid = str(uuid.uuid4()) + cherrypy.log('launching with c_nodes=%s, h_nodes=%s, s_nodes=%s' % (c_nodes, h_nodes, s_nodes)) + stdout = open(os.path.join(self.result_directory, '%s.stdout' % req_uuid), 'w') + stderr = open(os.path.join(self.result_directory, '%s.stderr' % req_uuid), 'w') + cmd = [ + 'python', + '-u', + os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__))), '..', 'cassandralauncher', 'cassandralauncher.py'), + '--analyticsnodes=%s' % h_nodes, + '--searchnodes=%s' % s_nodes, + '--totalnodes=%s' % (int(s_nodes) + int(h_nodes) + int(c_nodes)), + '--clustername=temporary_cluster', + '--demotime=%s' % ttl, + '--noprompts', + '--opscenterinterface=80', + '--result_directory=%s' % self.result_directory, + '--launch_id=%s' % req_uuid, + '--handle=weblauncher' + ] + cherrypy.log('running %s' % ' '.join(cmd)) + #child = subprocess.Popen(['python', '-u', '/home/mdennis/mdev/cassandralauncher/weblauncher/test.py'], stdout=stdout, stderr=stderr, bufsize=0) + child = subprocess.Popen(cmd, stderr=stderr, stdout=stdout, bufsize=0) + raise cherrypy.HTTPRedirect("/ping/%s" % req_uuid) + + def ping(self, uuid): + #obviously not secure + result_path = os.path.join(self.result_directory, '%s.results' % uuid) + with open(os.path.join(self.result_directory, '%s.stdout' % uuid), 'r') as f: + stdout = f.readlines()[-10:] + with open(os.path.join(self.result_directory, '%s.stderr' % uuid), 'r') as f: + stderr = f.readlines()[-10:] + if os.path.exists(result_path): + with open(result_path, 'r') as f: + results = dict(map(lambda l: l.strip().split('='), f)) + opsc_ip = results['opsc_ip'] + opsc_port = results['opsc_port'] + if _opsc_up(opsc_ip, opsc_port): + raise cherrypy.HTTPRedirect("http://%s:%s" % (opsc_ip, opsc_port)) + + asof = datetime.now().ctime() + return loader.load('ping.html').generate(title=uuid, stdout=stdout, stderr=stderr, asof=asof).render('html', doctype='html') + +def _opsc_up(ip, port): + import socket + import httplib + try: + c = httplib.HTTPConnection(ip, port, timeout=3) + c.request("HEAD","/opscenter/index.html") + r = c.getresponse() + return r.status == httplib.OK + except socket.error, se: + #cherrypy.log('error (%s, %s) connecting to opsc at %s on port %s' % (type(se), se.message, ip, port)) + return False + +def init_logging(): + from logging import handlers + + log = cherrypy.log + log.error_file = "" + log.access_file = "" + maxBytes = 10*1024*1024 + backupCount = 1 + + h = handlers.RotatingFileHandler('/var/log/wl/error.log', 'a', maxBytes, backupCount) + log.error_log.addHandler(h) + + h = handlers.RotatingFileHandler('/var/log/wl/access.log', 'a', maxBytes, backupCount) + log.access_log.addHandler(h) + + cherrypy.config.update({ + 'log.screen' : False, + }) + +def load_users(password_file): + with open(password_file) as f: + return dict(map(lambda l: l.strip().split(':'), f)) + +def run(result_directory, password_file): + if not os.path.exists(result_directory): + os.mkdir(result_directory) + + launcher = Launcher(result_directory) + + d = cherrypy.dispatch.RoutesDispatcher() + d.connect(name='root', route='/', controller=launcher, action='index') + d.connect(name='launch', route='/launch', controller=launcher, action='launch') + d.connect(name='ping', route='/ping/:uuid', controller=launcher, action='ping') + + conf = { + '/' : { + 'request.dispatch' : d, + 'tools.staticdir.root' : os.getcwd(), + 'tools.encode.on': True, + 'tools.encode.encoding': 'utf-8', + 'tools.decode.on': True, + 'tools.trailing_slash.on': True, + 'tools.basic_auth.on' : True if password_file != None else False, + 'tools.basic_auth.realm' : 'riptano' if password_file != None else False, + 'tools.basic_auth.users' : load_users(password_file) if password_file != None else None + }, + '/css' : { + 'tools.staticdir.on' : True, + 'tools.staticdir.dir' : os.path.join(os.path.dirname(__file__), 'css') + }, + '/js' : { + 'tools.staticdir.on' : True, + 'tools.staticdir.dir' : os.path.join(os.path.dirname(__file__), 'js') + }, + + } + + init_logging() + cherrypy.tree.mount(None, "/", config=conf) + cherrypy.quickstart(None, config=conf) + +def main(): + from optparse import OptionParser + parser = OptionParser() + parser.add_option('-r', '--result_directory', dest='result_directory', help='direcotry to search for running clusters', metavar='RESULT_DIRECTORY', default='/tmp/wl') + parser.add_option('-p', '--password_file', dest='password_file', help='list of users, one per line, in the form: username:md5(password)', metavar='PASSWORD_FILE', default=None) + (options, args) = parser.parse_args() + run(options.result_directory, options.password_file) + +if __name__ == '__main__': + main() + From 9ddd1f4c3ed023ec28890159c46af77299c7dd62 Mon Sep 17 00:00:00 2001 From: "Matthew F. Dennis" Date: Mon, 2 Jul 2012 19:10:42 -0500 Subject: [PATCH 2/2] socket host --- weblauncher/weblauncher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/weblauncher/weblauncher.py b/weblauncher/weblauncher.py index 1bccb6c..525a64a 100755 --- a/weblauncher/weblauncher.py +++ b/weblauncher/weblauncher.py @@ -132,6 +132,7 @@ def run(result_directory, password_file): } init_logging() + cherrypy.server.socket_host = '0.0.0.0' cherrypy.tree.mount(None, "/", config=conf) cherrypy.quickstart(None, config=conf)