Skip to content

Commit

Permalink
New config: store config on master
Browse files Browse the repository at this point in the history
Rather than storing config into tags, the config is stored on
master:/etc/starcluster. This allows easier update.
  • Loading branch information
Mich committed Jul 17, 2014
1 parent 681cb9e commit 4bc1938
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 124 deletions.
9 changes: 8 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ This branch intends to be a mirror of https://github.com/jtriley/StarCluster dev

* Added commands
- printconfig - To print your existing cluster configuration
- reloadconfig - To reload the core and plugins configuration of a ''running'' cluster.
- cleancluster
+ Will clean Open Grid Engine from dead nodes. (Eg.: Dead spot instances)
+ Manages "impaired" nodes. (Reboots reserved instances, kills spot instances.)
Expand All @@ -224,6 +223,14 @@ This branch intends to be a mirror of https://github.com/jtriley/StarCluster dev
be stopped. Defaults to false.
* Improved node cleanup - Merged `robbyt`_ `pull request`_ which makes node cleanup faster.
* Improved node addition - Removed some remote read/writes (very slow) and replaced them get/edit/push.
* Adds a mode where the cluster configuration is written to master:/etc/starcluster. To activate, simply add flag
"--config-on-master" to the start command. Clusters in this mode have the following pros and cons.

- Pros
+ Allows to easily update the config by editing the file.
+ No more obscure config compressed/hashed in metadata/tags and other "obscure" places.
- Cons
+ No longer possible to start a stopped cluster via StarCluster. (This is technically fixable, but not planned at the moment.)

.. _robbyt: https://github.com/robbyt
.. _pull request: https://github.com/jtriley/StarCluster/pull/123
106 changes: 69 additions & 37 deletions starcluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import pprint
import warnings
import datetime
import json

import iptools

Expand Down Expand Up @@ -63,6 +64,11 @@ def get_cluster(self, cluster_name, group=None, load_receipt=True,
group = self.ec2.get_security_group(clname)
cl = Cluster(ec2_conn=self.ec2, cluster_tag=cltag,
cluster_group=group)

# Useful when config is on master node
cl.key_location = \
self.cfg.get_key(cl.master_node.key_name).get('key_location')

if load_receipt:
cl.load_receipt(load_plugins=load_plugins,
load_volumes=load_volumes)
Expand Down Expand Up @@ -420,6 +426,7 @@ def __init__(self,
subnet_id=None,
public_ips=None,
plugins_order=[],
config_on_master=False,
**kwargs):
# update class vars with given vars
_vars = locals().copy()
Expand Down Expand Up @@ -455,7 +462,6 @@ def __init__(self,
self._nodes = []
self._pool = None
self._progress_bar = None
self._config_fields = None
self.__default_plugin = None
self.__sge_plugin = None

Expand Down Expand Up @@ -588,10 +594,12 @@ def __str__(self):
return pprint.pformat(cfg)

def print_config(self):
config = {}
for key in self._config_fields:
config[key] = getattr(self, key)
pprint.pprint(config)
core_settings, user_settings = self._get_settings()
print "Core settings"
print json.dumps(core_settings, indent=1, sort_keys=True)
print
print "User settings"
print json.dumps(user_settings, indent=1, sort_keys=True)

def load_receipt(self, load_plugins=True, load_volumes=True):
"""
Expand All @@ -608,8 +616,9 @@ def load_receipt(self, load_plugins=True, load_volumes=True):
msg = user_msgs.version_mismatch % d
sep = '*' * 60
log.warn('\n'.join([sep, msg, sep]), extra={'__textwrap__': 1})
self._config_fields = self._get_settings_from_tags()
self.update(self._config_fields)
self.update(self._get_settings_from_tags())
if self.config_on_master:
self._load_config_from_master()
if not (load_plugins or load_volumes):
return True
try:
Expand Down Expand Up @@ -656,26 +665,6 @@ def __getstate__(self):
def _security_group(self):
return static.SECURITY_GROUP_TEMPLATE % self.cluster_tag

def save_core_settings(self, sg):
core_settings = utils.dump_compress_encode(
dict(cluster_size=self.cluster_size,
master_image_id=self.master_image_id,
master_instance_type=self.master_instance_type,
node_image_id=self.node_image_id,
node_instance_type=self.node_instance_type,
disable_queue=self.disable_queue,
disable_cloudinit=self.disable_cloudinit,
plugins_order=self.plugins_order),
use_json=True)
sg.add_tag(static.CORE_TAG, core_settings)

def save_user_settings(self, sg):
user_settings = utils.dump_compress_encode(
dict(cluster_user=self.cluster_user,
cluster_shell=self.cluster_shell, keyname=self.keyname,
spot_bid=self.spot_bid), use_json=True)
sg.add_tag(static.USER_TAG, user_settings)

@property
def subnet(self):
if not self._subnet and self.subnet_id:
Expand Down Expand Up @@ -730,9 +719,10 @@ def _add_chunked_tags(self, sg, chunks, base_tag_name):
if tag not in sg.tags:
sg.add_tag(tag, chunk)

def _add_tags_to_sg(self, sg):
if static.VERSION_TAG not in sg.tags:
sg.add_tag(static.VERSION_TAG, str(static.VERSION))
def _get_settings(self):
"""
The settings to save
"""
core_settings = dict(cluster_size=self.cluster_size,
master_image_id=self.master_image_id,
master_instance_type=self.master_instance_type,
Expand All @@ -743,16 +733,30 @@ def _add_tags_to_sg(self, sg):
subnet_id=self.subnet_id,
public_ips=self.public_ips,
disable_queue=self.disable_queue,
disable_cloudinit=self.disable_cloudinit)
disable_cloudinit=self.disable_cloudinit,
plugins_order=self.plugins_order)
user_settings = dict(cluster_user=self.cluster_user,
cluster_shell=self.cluster_shell,
keyname=self.keyname, spot_bid=self.spot_bid)
core = utils.dump_compress_encode(core_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, core, static.CORE_TAG)
user = utils.dump_compress_encode(user_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, user, static.USER_TAG)
return core_settings, user_settings

def _add_tags_to_sg(self, sg):
if static.VERSION_TAG not in sg.tags:
sg.add_tag(static.VERSION_TAG, str(static.VERSION))
if self.config_on_master:
# the only info we store is the fact that config is on master
core = utils.dump_compress_encode(
dict(config_on_master=self.config_on_master),
use_json=True, chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, core, static.CORE_TAG)
else:
core_settings, user_settings = self._get_settings()
core = utils.dump_compress_encode(core_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, core, static.CORE_TAG)
user = utils.dump_compress_encode(user_settings, use_json=True,
chunk_size=static.MAX_TAG_LEN)
self._add_chunked_tags(sg, user, static.USER_TAG)

def _load_chunked_tags(self, sg, base_tag_name):
tags = [i for i in sg.tags if i.startswith(base_tag_name)]
Expand All @@ -769,6 +773,34 @@ def _get_settings_from_tags(self, sg=None):
cluster.update(self._load_chunked_tags(sg, static.USER_TAG))
return cluster

def save_config_on_master(self):
"""
Vanilla Improvements function - save the config on the master node.
For cluster saving their config on the master node rather than in
the security group tags. No more chunk/hashing/splitting headaches.
"""
settings, user_settings = self._get_settings()
settings.update(user_settings)
settings["plugins"] = self._plugins
config = self.master_node.ssh.remote_file(static.MASTER_CFG_FILE, 'wt')
json.dump(settings, config, indent=4, separators=(',', ': '),
sort_keys=True)
config.close()

def _load_config_from_master(self):
"""
Vanilla Improvements function - loads the config on the master node.
"""
config = self.master_node.ssh.remote_file(static.MASTER_CFG_FILE, 'rt')
loaded_config = json.load(config)
self.plugins_order = loaded_config["plugins"]
self.update(loaded_config)
config.close()
master = self.master_node
self.plugins = self.load_plugins(
master.get_plugins(self.plugins_order))
self.validate()

@property
def placement_group(self):
if self._placement_group is None:
Expand Down
1 change: 0 additions & 1 deletion starcluster/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
from put import CmdPut
from get import CmdGet
from help import CmdHelp
from reloadconfig import CmdReloadConfig
from printconfig import CmdPrintConfig
from recover import CmdRecover
from cleancluster import CmdCleanCluster
Expand Down
81 changes: 0 additions & 81 deletions starcluster/commands/reloadconfig.py

This file was deleted.

27 changes: 25 additions & 2 deletions starcluster/commands/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ def addopts(self, parser):
parser.add_option("-N", "--subnet-id", dest="subnet_id",
action="store", type="string",
help=("Launch cluster into a VPC subnet"))
parser.add_option("--config-on-master", default=False,
action='store_true', help="Store the config on the "
"master node rather than into the security group "
"tags")

def execute(self, args):
if len(args) != 1:
Expand All @@ -203,9 +207,16 @@ def execute(self, args):
validate = self.opts.validate
validate_running = self.opts.no_create
validate_only = self.opts.validate_only
config_on_master = self.opts.config_on_master

if scluster:
scluster = self.cm.get_cluster(tag, group=scluster)
validate_running = True
if config_on_master:
scluster = self.cm.get_cluster(tag, group=scluster,
load_receipt=False)
validate_running = False
else:
scluster = self.cm.get_cluster(tag, group=scluster)
validate_running = True
else:
template = self.opts.cluster_template
if not template:
Expand Down Expand Up @@ -238,10 +249,22 @@ def execute(self, args):
self.warn_experimental(msg, num_secs=5)
if self.opts.dns_prefix:
scluster.dns_prefix = tag
if config_on_master:
scluster.config_on_master = True
if self.opts.no_create:
validate = False
log.warning("Cannot start a cluster when its config is "
"stored on the master node using StarCluster. "
"You should start it manually and then use "
"the recovery options.")
return
try:
scluster.start(create=create, create_only=create_only,
validate=validate, validate_only=validate_only,
validate_running=validate_running)
if self.opts.config_on_master and create:
log.info("Saving config on master node")
scluster.save_config_on_master()
except KeyboardInterrupt:
if validate_only:
raise
Expand Down
2 changes: 1 addition & 1 deletion starcluster/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def configure_sc_logging(use_syslog=False):
rfh.setLevel(logging.DEBUG)
rfh.setFormatter(formatter)
log.addHandler(rfh)
console.setLevel(logging.INFO)
console.setLevel(logging.DEBUG)
log.addHandler(console)
syslog_device = '/dev/log'
if use_syslog and os.path.exists(syslog_device):
Expand Down
3 changes: 2 additions & 1 deletion starcluster/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,8 @@ def start_nfs_server(self):
self.ssh.execute("mkdir -p %s" % DUMMY_EXPORT_DIR)
with self.ssh.remote_file(DUMMY_EXPORT_FILE, 'w') as dummyf:
dummyf.write(DUMMY_EXPORT_LINE)
self.ssh.execute('/etc/init.d/nfs start')
# TEMP DISABLED
# self.ssh.execute('/etc/init.d/nfs start')
self.ssh.execute('rm -f %s' % DUMMY_EXPORT_FILE)
self.ssh.execute('rm -rf %s' % DUMMY_EXPORT_DIR)
self.ssh.execute('exportfs -fra')
Expand Down
2 changes: 2 additions & 0 deletions starcluster/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,5 @@ def create_sc_config_dirs():
'disable_cloudinit': (bool, False, False, None, None),
'dns_prefix': (bool, False, False, None, None),
}

MASTER_CFG_FILE = '/etc/starcluster' # vanilla improvements

0 comments on commit 4bc1938

Please sign in to comment.