Skip to content
Permalink
Browse files

New config: store config on master

Rather than storing config into tags, the config is stored on
master:/etc/starcluster. This allows easier update.
  • Loading branch information...
FinchPowers committed Jul 17, 2014
1 parent 681cb9e commit 4bc1938e6d7829b78295f065300e0cfbe04503f0
@@ -202,7 +202,6 @@ This branch intends to be a mirror of https://github.com/jtriley/StarCluster dev

* Added commands
- printconfig - To print your existing cluster configuration
- reloadconfig - To reload the core and plugins configuration of a ''running'' cluster.
- cleancluster
+ Will clean Open Grid Engine from dead nodes. (Eg.: Dead spot instances)
+ Manages "impaired" nodes. (Reboots reserved instances, kills spot instances.)
@@ -224,6 +223,14 @@ This branch intends to be a mirror of https://github.com/jtriley/StarCluster dev
be stopped. Defaults to false.
* Improved node cleanup - Merged `robbyt`_ `pull request`_ which makes node cleanup faster.
* Improved node addition - Removed some remote read/writes (very slow) and replaced them get/edit/push.
* Adds a mode where the cluster configuration is written to master:/etc/starcluster. To activate, simply add flag
"--config-on-master" to the start command. Clusters in this mode have the following pros and cons.

- Pros
+ Allows to easily update the config by editing the file.
+ No more obscure config compressed/hashed in metadata/tags and other "obscure" places.
- Cons
+ No longer possible to start a stopped cluster via StarCluster. (This is technically fixable, but not planned at the moment.)

.. _robbyt: https://github.com/robbyt
.. _pull request: https://github.com/jtriley/StarCluster/pull/123
@@ -22,6 +22,7 @@
import pprint
import warnings
import datetime
import json

import iptools

@@ -63,6 +64,11 @@ def get_cluster(self, cluster_name, group=None, load_receipt=True,
group = self.ec2.get_security_group(clname)
cl = Cluster(ec2_conn=self.ec2, cluster_tag=cltag,
cluster_group=group)

# Useful when config is on master node
cl.key_location = \
self.cfg.get_key(cl.master_node.key_name).get('key_location')

if load_receipt:
cl.load_receipt(load_plugins=load_plugins,
load_volumes=load_volumes)
@@ -420,6 +426,7 @@ def __init__(self,
subnet_id=None,
public_ips=None,
plugins_order=[],
config_on_master=False,
**kwargs):
# update class vars with given vars
_vars = locals().copy()
@@ -455,7 +462,6 @@ def __init__(self,
self._nodes = []
self._pool = None
self._progress_bar = None
self._config_fields = None
self.__default_plugin = None
self.__sge_plugin = None

@@ -588,10 +594,12 @@ def __str__(self):
return pprint.pformat(cfg)

def print_config(self):
config = {}
for key in self._config_fields:
config[key] = getattr(self, key)
pprint.pprint(config)
core_settings, user_settings = self._get_settings()
print "Core settings"
print json.dumps(core_settings, indent=1, sort_keys=True)
print
print "User settings"
print json.dumps(user_settings, indent=1, sort_keys=True)

def load_receipt(self, load_plugins=True, load_volumes=True):
"""
@@ -608,8 +616,9 @@ def load_receipt(self, load_plugins=True, load_volumes=True):
msg = user_msgs.version_mismatch % d
sep = '*' * 60
log.warn('\n'.join([sep, msg, sep]), extra={'__textwrap__': 1})
self._config_fields = self._get_settings_from_tags()
self.update(self._config_fields)
self.update(self._get_settings_from_tags())
if self.config_on_master:
self._load_config_from_master()
if not (load_plugins or load_volumes):
return True
try:
@@ -656,26 +665,6 @@ def __getstate__(self):
def _security_group(self):
return static.SECURITY_GROUP_TEMPLATE % self.cluster_tag

def save_core_settings(self, sg):
core_settings = utils.dump_compress_encode(
dict(cluster_size=self.cluster_size,
master_image_id=self.master_image_id,
master_instance_type=self.master_instance_type,
node_image_id=self.node_image_id,
node_instance_type=self.node_instance_type,
disable_queue=self.disable_queue,
disable_cloudinit=self.disable_cloudinit,
plugins_order=self.plugins_order),
use_json=True)
sg.add_tag(static.CORE_TAG, core_settings)

def save_user_settings(self, sg):
user_settings = utils.dump_compress_encode(
dict(cluster_user=self.cluster_user,
cluster_shell=self.cluster_shell, keyname=self.keyname,
spot_bid=self.spot_bid), use_json=True)
sg.add_tag(static.USER_TAG, user_settings)

@property
def subnet(self):
if not self._subnet and self.subnet_id:
@@ -730,9 +719,10 @@ def _add_chunked_tags(self, sg, chunks, base_tag_name):
if tag not in sg.tags:
sg.add_tag(tag, chunk)

def _add_tags_to_sg(self, sg):
if static.VERSION_TAG not in sg.tags:
sg.add_tag(static.VERSION_TAG, str(static.VERSION))
def _get_settings(self):
"""
The settings to save
"""
core_settings = dict(cluster_size=self.cluster_size,
master_image_id=self.master_image_id,
master_instance_type=self.master_instance_type,
@@ -743,16 +733,30 @@ def _add_tags_to_sg(self, sg):
subnet_id=self.subnet_id,
public_ips=self.public_ips,
disable_queue=self.disable_queue,
disable_cloudinit=self.disable_cloudinit)
disable_cloudinit=self.disable_cloudinit,
plugins_order=self.plugins_order)
user_settings = dict(cluster_user=self.cluster_user,
cluster_shell=self.cluster_shell,
keyname=self.keyname, spot_bid=self.spot_bid)
core = utils.dump_compress_encode(core_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, core, static.CORE_TAG)
user = utils.dump_compress_encode(user_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, user, static.USER_TAG)
return core_settings, user_settings

def _add_tags_to_sg(self, sg):
if static.VERSION_TAG not in sg.tags:
sg.add_tag(static.VERSION_TAG, str(static.VERSION))
if self.config_on_master:
# the only info we store is the fact that config is on master
core = utils.dump_compress_encode(
dict(config_on_master=self.config_on_master),
use_json=True, chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, core, static.CORE_TAG)
else:
core_settings, user_settings = self._get_settings()
core = utils.dump_compress_encode(core_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, core, static.CORE_TAG)
user = utils.dump_compress_encode(user_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, user, static.USER_TAG)

def _load_chunked_tags(self, sg, base_tag_name):
tags = [i for i in sg.tags if i.startswith(base_tag_name)]
@@ -769,6 +773,34 @@ def _get_settings_from_tags(self, sg=None):
cluster.update(self._load_chunked_tags(sg, static.USER_TAG))
return cluster

def save_config_on_master(self):
"""
Vanilla Improvements function - save the config on the master node.
For cluster saving their config on the master node rather than in
the security group tags. No more chunk/hashing/splitting headaches.
"""
settings, user_settings = self._get_settings()
settings.update(user_settings)
settings["plugins"] = self._plugins
config = self.master_node.ssh.remote_file(static.MASTER_CFG_FILE, 'wt')
json.dump(settings, config, indent=4, separators=(',', ': '),
sort_keys=True)
config.close()

def _load_config_from_master(self):
"""
Vanilla Improvements function - loads the config on the master node.
"""
config = self.master_node.ssh.remote_file(static.MASTER_CFG_FILE, 'rt')
loaded_config = json.load(config)
self.plugins_order = loaded_config["plugins"]
self.update(loaded_config)
config.close()
master = self.master_node
self.plugins = self.load_plugins(
master.get_plugins(self.plugins_order))
self.validate()

@property
def placement_group(self):
if self._placement_group is None:
@@ -53,7 +53,6 @@
from put import CmdPut
from get import CmdGet
from help import CmdHelp
from reloadconfig import CmdReloadConfig
from printconfig import CmdPrintConfig
from recover import CmdRecover
from cleancluster import CmdCleanCluster

This file was deleted.

@@ -180,6 +180,10 @@ def addopts(self, parser):
parser.add_option("-N", "--subnet-id", dest="subnet_id",
action="store", type="string",
help=("Launch cluster into a VPC subnet"))
parser.add_option("--config-on-master", default=False,
action='store_true', help="Store the config on the "
"master node rather than into the security group "
"tags")

def execute(self, args):
if len(args) != 1:
@@ -203,9 +207,16 @@ def execute(self, args):
validate = self.opts.validate
validate_running = self.opts.no_create
validate_only = self.opts.validate_only
config_on_master = self.opts.config_on_master

if scluster:
scluster = self.cm.get_cluster(tag, group=scluster)
validate_running = True
if config_on_master:
scluster = self.cm.get_cluster(tag, group=scluster,
load_receipt=False)
validate_running = False
else:
scluster = self.cm.get_cluster(tag, group=scluster)
validate_running = True
else:
template = self.opts.cluster_template
if not template:
@@ -238,10 +249,22 @@ def execute(self, args):
self.warn_experimental(msg, num_secs=5)
if self.opts.dns_prefix:
scluster.dns_prefix = tag
if config_on_master:
scluster.config_on_master = True
if self.opts.no_create:
validate = False
log.warning("Cannot start a cluster when its config is "
"stored on the master node using StarCluster. "
"You should start it manually and then use "
"the recovery options.")
return
try:
scluster.start(create=create, create_only=create_only,
validate=validate, validate_only=validate_only,
validate_running=validate_running)
if self.opts.config_on_master and create:
log.info("Saving config on master node")
scluster.save_config_on_master()
except KeyboardInterrupt:
if validate_only:
raise
@@ -158,7 +158,7 @@ def configure_sc_logging(use_syslog=False):
rfh.setLevel(logging.DEBUG)
rfh.setFormatter(formatter)
log.addHandler(rfh)
console.setLevel(logging.INFO)
console.setLevel(logging.DEBUG)
log.addHandler(console)
syslog_device = '/dev/log'
if use_syslog and os.path.exists(syslog_device):
@@ -764,7 +764,8 @@ def start_nfs_server(self):
self.ssh.execute("mkdir -p %s" % DUMMY_EXPORT_DIR)
with self.ssh.remote_file(DUMMY_EXPORT_FILE, 'w') as dummyf:
dummyf.write(DUMMY_EXPORT_LINE)
self.ssh.execute('/etc/init.d/nfs start')
# TEMP DISABLED
# self.ssh.execute('/etc/init.d/nfs start')
self.ssh.execute('rm -f %s' % DUMMY_EXPORT_FILE)
self.ssh.execute('rm -rf %s' % DUMMY_EXPORT_DIR)
self.ssh.execute('exportfs -fra')
@@ -285,3 +285,5 @@ def create_sc_config_dirs():
'disable_cloudinit': (bool, False, False, None, None),
'dns_prefix': (bool, False, False, None, None),
}

MASTER_CFG_FILE = '/etc/starcluster' # vanilla improvements

0 comments on commit 4bc1938

Please sign in to comment.
You can’t perform that action at this time.