From eb8526ce01be362032ca345f845f0042e9785cdd Mon Sep 17 00:00:00 2001 From: Tom Prince Date: Wed, 5 Jun 2013 13:24:22 -0600 Subject: [PATCH 1/7] Reintroduce deprecated `workdir` property. It is unfortunatley named, but dropping it now doesn't provide a clear migration path. Supporting both for at least a version allows smoother migration. --- master/buildbot/process/build.py | 10 +++++----- master/docs/relnotes/index.rst | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/master/buildbot/process/build.py b/master/buildbot/process/build.py index 160631adb10..38a1a46ecf8 100644 --- a/master/buildbot/process/build.py +++ b/master/buildbot/process/build.py @@ -197,11 +197,11 @@ def setupSlaveBuilder(self, slavebuilder): buildslave_properties = slavebuilder.slave.properties self.getProperties().updateFromProperties(buildslave_properties) if slavebuilder.slave.slave_basedir: - self.setProperty("builddir", - self.path_module.join( - slavebuilder.slave.slave_basedir, - self.builder.config.slavebuilddir), - "Slave") + builddir = self.path_module.join( + slavebuilder.slave.slave_basedir, + self.builder.config.slavebuilddir), + self.setProperty("builddir", builddir, "slave") + self.setProperty("workdir", builddir, "slave (deprecated)") self.slavename = slavebuilder.slave.slavename self.build_status.setSlavename(self.slavename) diff --git a/master/docs/relnotes/index.rst b/master/docs/relnotes/index.rst index 623f5ec73b4..c25ff175a68 100644 --- a/master/docs/relnotes/index.rst +++ b/master/docs/relnotes/index.rst @@ -85,6 +85,7 @@ Deprecations, Removals, and Non-Compatible Changes * The ``workdir`` build property has been renamed to ``builddir``. This change accurately reflects its content; the term "workdir" means something different. + ``workdir`` is currently still supported for backwards compatability, but will be removed eventually. * The ``Blocker`` step has been removed. From 4ea0e5fd6c579d6b85421a774ea00efa731c2e1a Mon Sep 17 00:00:00 2001 From: Tom Prince Date: Thu, 6 Jun 2013 10:34:46 -0600 Subject: [PATCH 2/7] Rework locating config file for checkconfig and upgrade-master. This changes the logic of checkconfig so that, 1. If a file is passed, that file is used. 2. If a directory is passed containing `buildbot.tac`, that file is loaded and `configfile` is defined there, that file is used. 3. If `buildbot.tac` doesn't exist, or doesn't define `configfile`, `master.cfg` from that directory is used. The logic for upgrade-master is similar, except only directories are supported, so (1) is skipped. --- master/buildbot/scripts/base.py | 11 ++- master/buildbot/scripts/checkconfig.py | 48 ++++++------- master/buildbot/scripts/upgrade_master.py | 2 +- .../buildbot/test/unit/test_scripts_base.py | 70 ++++++++++++++----- .../test/unit/test_scripts_checkconfig.py | 25 +++---- 5 files changed, 90 insertions(+), 66 deletions(-) diff --git a/master/buildbot/scripts/base.py b/master/buildbot/scripts/base.py index 7d68ab76588..cdf0c3bcdfa 100644 --- a/master/buildbot/scripts/base.py +++ b/master/buildbot/scripts/base.py @@ -38,19 +38,16 @@ def print_error(error_message): return True -def getConfigFileWithFallback(basedir, defaultName='master.cfg'): - configFile = os.path.abspath(os.path.join(basedir, defaultName)) - if os.path.exists(configFile): - return configFile +def getConfigFileFromTac(basedir): # execute the .tac file to see if its configfile location exists tacFile = os.path.join(basedir, 'buildbot.tac') if os.path.exists(tacFile): # don't mess with the global namespace tacGlobals = {} execfile(tacFile, tacGlobals) - return tacGlobals["configfile"] - # No config file found; return default location and fail elsewhere - return configFile + return tacGlobals.get("configfile", "master.cfg") + else: + return "master.cfg" class SubcommandOptions(usage.Options): # subclasses should set this to a list-of-lists in order to source the diff --git a/master/buildbot/scripts/checkconfig.py b/master/buildbot/scripts/checkconfig.py index 951247cfb9c..2cd476e6755 100644 --- a/master/buildbot/scripts/checkconfig.py +++ b/master/buildbot/scripts/checkconfig.py @@ -16,35 +16,35 @@ import sys import os from buildbot import config -from buildbot.scripts.base import getConfigFileWithFallback - -class ConfigLoader(object): - def __init__(self, basedir=os.getcwd(), configFileName='master.cfg'): - self.basedir = os.path.abspath(basedir) - self.configFileName = getConfigFileWithFallback(basedir, configFileName) - - def load(self, quiet=False): - try: - config.MasterConfig.loadConfig( - self.basedir, self.configFileName) - except config.ConfigErrors, e: - if not quiet: - print >> sys.stderr, "Configuration Errors:" - for e in e.errors: - print >> sys.stderr, " " + e - return 1 +from buildbot.scripts.base import getConfigFileFromTac +def _loadConfig(basedir, configFile, quiet): + try: + config.MasterConfig.loadConfig( + basedir, configFile) + except config.ConfigErrors, e: if not quiet: - print "Config file is good!" - return 0 + print >> sys.stderr, "Configuration Errors:" + for e in e.errors: + print >> sys.stderr, " " + e + return 1 + + if not quiet: + print "Config file is good!" + return 0 + def checkconfig(config): quiet = config.get('quiet') - configFileName = config.get('configFile') + configFile = config.get('configFile') - if os.path.isdir(configFileName): - cl = ConfigLoader(basedir=configFileName) + if os.path.isdir(configFile): + basedir = configFile + configFile = getConfigFileFromTac(basedir) else: - cl = ConfigLoader(configFileName=configFileName) + basedir = os.getcwd() + + return _loadConfig(basedir=basedir, configFile=configFile, quiet=quiet) + - return cl.load(quiet=quiet) +__all__ = ['checkconfig'] diff --git a/master/buildbot/scripts/upgrade_master.py b/master/buildbot/scripts/upgrade_master.py index b7e1cae90dd..cc783c72936 100644 --- a/master/buildbot/scripts/upgrade_master.py +++ b/master/buildbot/scripts/upgrade_master.py @@ -159,7 +159,7 @@ def upgradeMaster(config, _noMonkey=False): os.chdir(config['basedir']) - configFile = base.getConfigFileWithFallback(config['basedir']) + configFile = base.getConfigFileFromTac(config['basedir']) master_cfg = loadConfig(config, configFile) if not master_cfg: defer.returnValue(1) diff --git a/master/buildbot/test/unit/test_scripts_base.py b/master/buildbot/test/unit/test_scripts_base.py index 3647b87e45a..c682f8b1319 100644 --- a/master/buildbot/test/unit/test_scripts_base.py +++ b/master/buildbot/test/unit/test_scripts_base.py @@ -54,30 +54,66 @@ def test_isBuildmasterDir_matches(self): self.assertWasQuiet() class TestTacFallback(dirs.DirsMixin, unittest.TestCase): + """ + Tests for L{base.getConfigFileFromTac}. + """ def setUp(self): + """ + Create a base directory. + """ self.basedir = os.path.abspath('basedir') - self.stdout = cStringIO.StringIO() - self.filename = 'master.cfg' return self.setUpDirs('basedir') - def test_tacFallback_location_from_tac(self): + def _createBuildbotTac(self, configfile=None): + """ + Create a C{buildbot.tac} that points to a given C{configfile} + and create that file. + + @param configfile: Config file to point at and create. + @type configfile: L{str} + """ tacfile = os.path.join(self.basedir, "buildbot.tac") - otherConfigFile = os.path.join(self.basedir, "other.cfg") with open(tacfile, "wt") as f: - f.write("configfile = '%s'" % otherConfigFile) - with open(otherConfigFile, "wt") as f: - f.write("#dummy") - self.filename = base.getConfigFileWithFallback(self.basedir) - self.assertEqual(self.filename, otherConfigFile) - - def test_tacFallback_noFallback(self): - defaultFilename = self.filename - with open(self.filename, "wt") as f: - f.write("#dummy") - self.filename = base.getConfigFileWithFallback(self.basedir) - self.assertEqual(self.filename, - os.path.join(self.basedir, defaultFilename)) + if configfile is not None: + f.write("configfile = %r" % configfile) + else: + f.write("#dummy") + + + def test_getConfigFileFromTac(self): + """ + When L{getConfigFileFromTac} is passed a C{basedir} + containing a C{buildbot.tac}, it reads the location + of the config file from there. + """ + self._createBuildbotTac("other.cfg") + foundConfigFile = base.getConfigFileFromTac( + basedir=self.basedir) + self.assertEqual(foundConfigFile, "other.cfg") + + def test_getConfigFileFromTac_fallback(self): + """ + When L{getConfigFileFromTac} is passed a C{basedir} + which doesn't contain a C{buildbot.tac}, + it returns C{master.cfg} + """ + foundConfigFile = base.getConfigFileFromTac( + basedir=self.basedir) + self.assertEqual(foundConfigFile, 'master.cfg') + + + def test_getConfigFileFromTac_tacWithoutConfigFile(self): + """ + When L{getConfigFileFromTac} is passed a C{basedir} + containing a C{buildbot.tac}, but C{buildbot.tac} doesn't + define C{configfile}, L{getConfigFileFromTac} returns C{master.cfg} + """ + self._createBuildbotTac() + foundConfigFile = base.getConfigFileFromTac( + basedir=self.basedir) + self.assertEqual(foundConfigFile, 'master.cfg') + class TestSubcommandOptions(unittest.TestCase): diff --git a/master/buildbot/test/unit/test_scripts_checkconfig.py b/master/buildbot/test/unit/test_scripts_checkconfig.py index 08ec9c8917b..0466844edf1 100644 --- a/master/buildbot/test/unit/test_scripts_checkconfig.py +++ b/master/buildbot/test/unit/test_scripts_checkconfig.py @@ -35,7 +35,7 @@ def tearDown(self): # tests - def do_test_load(self, by_name=False, config='', other_files={}, + def do_test_load(self, config='', other_files={}, stdout_re=None, stderr_re=None): configFile = os.path.join('configdir', 'master.cfg') with open(configFile, "w") as f: @@ -51,16 +51,12 @@ def do_test_load(self, by_name=False, config='', other_files={}, with open(fn, "w") as f: f.write(contents) - if by_name: - cl = checkconfig.ConfigLoader(configFileName=configFile) - else: - cl = checkconfig.ConfigLoader(basedir='configdir') - old_stdout, old_stderr = sys.stdout, sys.stderr stdout = sys.stdout = cStringIO.StringIO() stderr = sys.stderr = cStringIO.StringIO() try: - cl.load() + checkconfig._loadConfig( + basedir='configdir', configFile="master.cfg", quiet=False) finally: sys.stdout, sys.stderr = old_stdout, old_stderr if stdout_re: @@ -144,25 +140,20 @@ def test_success_import_package(self): class TestCheckconfig(unittest.TestCase): def setUp(self): - self.ConfigLoader = mock.Mock(name='ConfigLoader') - self.instance = mock.Mock(name='ConfigLoader()') - self.ConfigLoader.return_value = self.instance - self.instance.load.return_value = 3 - self.patch(checkconfig, 'ConfigLoader', self.ConfigLoader) + self.loadConfig = mock.Mock(spec=checkconfig._loadConfig, return_value=3) + self.patch(checkconfig, '_loadConfig', self.loadConfig) def test_checkconfig_given_dir(self): self.assertEqual(checkconfig.checkconfig(dict(configFile='.')), 3) - self.ConfigLoader.assert_called_with(basedir='.') - self.instance.load.assert_called_with(quiet=None) + self.loadConfig.assert_called_with(basedir='.', configFile='master.cfg', quiet=None) def test_checkconfig_given_file(self): config = dict(configFile='master.cfg') self.assertEqual(checkconfig.checkconfig(config), 3) - self.ConfigLoader.assert_called_with(configFileName='master.cfg') - self.instance.load.assert_called_with(quiet=None) + self.loadConfig.assert_called_with(basedir=os.getcwd(), configFile='master.cfg', quiet=None) def test_checkconfig_quiet(self): config = dict(configFile='master.cfg', quiet=True) self.assertEqual(checkconfig.checkconfig(config), 3) - self.instance.load.assert_called_with(quiet=True) + self.loadConfig.assert_called_with(basedir=os.getcwd(), configFile='master.cfg', quiet=True) From 5fd5273241de3e1e3a93cfe259d6cc882bf796a4 Mon Sep 17 00:00:00 2001 From: Tom Prince Date: Thu, 6 Jun 2013 10:59:11 -0600 Subject: [PATCH 3/7] Use twisted.python.deprecate.deprecatedModuleAttribute to deprecate SetProperty. --- master/buildbot/steps/shell.py | 16 ++++++++-------- master/buildbot/test/unit/test_steps_shell.py | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/master/buildbot/steps/shell.py b/master/buildbot/steps/shell.py index b54c19b624d..b469579511a 100644 --- a/master/buildbot/steps/shell.py +++ b/master/buildbot/steps/shell.py @@ -18,6 +18,8 @@ import inspect from twisted.python import log, failure from twisted.spread import pb +from twisted.python.deprecate import deprecatedModuleAttribute +from twisted.python.versions import Version from buildbot.process import buildstep from buildbot.status.results import SUCCESS, WARNINGS, FAILURE from buildbot.status.logfile import STDOUT, STDERR @@ -364,14 +366,12 @@ def getText(self, cmd, results): # let ShellCommand describe return ShellCommand.getText(self, cmd, results) -class SetProperty(SetPropertyFromCommand): - "alias for SetPropertyFromCommand" - def __init__(self, *args, **kwargs): - log.msg("WARNING: the name 'SetProperty' has been renamed to SetPropertyFromCommand; use " + - "buildbot.steps.slave.SetPropertyFromCommand instead " + - "(note that this may require you to change your import " + - "statement)") - SetPropertyFromCommand.__init__(self, *args, **kwargs) + +SetProperty = SetPropertyFromCommand +deprecatedModuleAttribute(Version("Buildbot", 0, 8, 8), + "It has been renamed to SetPropertyFromCommand", + "buildbot.steps.shell", "SetProperty") + class Configure(ShellCommand): diff --git a/master/buildbot/test/unit/test_steps_shell.py b/master/buildbot/test/unit/test_steps_shell.py index 64c9582be7d..9f914600838 100644 --- a/master/buildbot/test/unit/test_steps_shell.py +++ b/master/buildbot/test/unit/test_steps_shell.py @@ -480,6 +480,25 @@ def extract_fn(rc, stdout, stderr): self.assertEqual(len(self.flushLoggedErrors(RuntimeError)), 1)) return d +class SetPropertyDeprecation(unittest.TestCase): + """ + Tests for L{shell.SetProperty} + """ + + def test_deprecated(self): + """ + Accessing L{shell.SetProperty} reports a deprecation error. + """ + shell.SetProperty + warnings = self.flushWarnings([self.test_deprecated]) + self.assertEqual(len(warnings), 1) + self.assertIdentical(warnings[0]['category'], DeprecationWarning) + self.assertEqual(warnings[0]['message'], + "buildbot.steps.shell.SetProperty was deprecated in Buildbot 0.8.8: " + "It has been renamed to SetPropertyFromCommand" + ) + + class Configure(unittest.TestCase): def test_class_attrs(self): From 6b06d23eb425b00fe96c8e98633ff77d98b58d54 Mon Sep 17 00:00:00 2001 From: Tom Prince Date: Wed, 19 Jun 2013 12:17:49 -0600 Subject: [PATCH 4/7] Move buildslave code around to match organization of openstack latent slave. --- master/buildbot/buildslave/__init__.py | 1039 +--------------- master/buildbot/buildslave/base.py | 1049 +++++++++++++++++ master/buildbot/buildslave/ec2.py | 319 +++++ master/buildbot/buildslave/libvirt.py | 301 +++++ master/buildbot/buildslave/openstack.py | 2 +- master/buildbot/ec2buildslave.py | 312 +---- master/buildbot/libvirtbuildslave.py | 296 +---- ..._buildslave.py => test_buildslave_base.py} | 5 +- ...ildslave.py => test_buildslave_libvirt.py} | 3 +- master/docs/manual/cfg-buildslaves.rst | 16 +- master/docs/relnotes/index.rst | 2 + 11 files changed, 1712 insertions(+), 1632 deletions(-) create mode 100644 master/buildbot/buildslave/base.py create mode 100644 master/buildbot/buildslave/ec2.py create mode 100644 master/buildbot/buildslave/libvirt.py rename master/buildbot/test/unit/{test_buildslave.py => test_buildslave_base.py} (98%) rename master/buildbot/test/unit/{test_libvirtbuildslave.py => test_buildslave_libvirt.py} (99%) diff --git a/master/buildbot/buildslave/__init__.py b/master/buildbot/buildslave/__init__.py index 272b323d765..c216e0c7c67 100644 --- a/master/buildbot/buildslave/__init__.py +++ b/master/buildbot/buildslave/__init__.py @@ -11,1039 +11,10 @@ # this program; if not, write to the Free Software Foundation, Inc., 51 # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# Portions Copyright Buildbot Team Members -# Portions Copyright Canonical Ltd. 2009 +# Copyright Buildbot Team Members -import time -from email.Message import Message -from email.Utils import formatdate -from zope.interface import implements -from twisted.python import log -from twisted.internet import defer, reactor -from twisted.application import service -from twisted.spread import pb -from twisted.python.reflect import namedModule +from buildbot.buildslave.base import ( + AbstractBuildSlave, BuildSlave, AbstractLatentBuildSlave) -from buildbot.status.slave import SlaveStatus -from buildbot.status.mail import MailNotifier -from buildbot.process import metrics, botmaster -from buildbot.interfaces import IBuildSlave, ILatentBuildSlave -from buildbot.process.properties import Properties -from buildbot.util import subscription -from buildbot.util.eventual import eventually -from buildbot import config - -class AbstractBuildSlave(config.ReconfigurableServiceMixin, pb.Avatar, - service.MultiService): - """This is the master-side representative for a remote buildbot slave. - There is exactly one for each slave described in the config file (the - c['slaves'] list). When buildbots connect in (.attach), they get a - reference to this instance. The BotMaster object is stashed as the - .botmaster attribute. The BotMaster is also our '.parent' Service. - - I represent a build slave -- a remote machine capable of - running builds. I am instantiated by the configuration file, and can be - subclassed to add extra functionality.""" - - implements(IBuildSlave) - keepalive_timer = None - keepalive_interval = None - - # reconfig slaves after builders - reconfig_priority = 64 - - def __init__(self, name, password, max_builds=None, - notify_on_missing=[], missing_timeout=3600, - properties={}, locks=None, keepalive_interval=3600): - """ - @param name: botname this machine will supply when it connects - @param password: password this machine will supply when - it connects - @param max_builds: maximum number of simultaneous builds that will - be run concurrently on this buildslave (the - default is None for no limit) - @param properties: properties that will be applied to builds run on - this slave - @type properties: dictionary - @param locks: A list of locks that must be acquired before this slave - can be used - @type locks: dictionary - """ - service.MultiService.__init__(self) - self.slavename = name - self.password = password - - # PB registration - self.registration = None - self.registered_port = None - - # these are set when the service is started, and unset when it is - # stopped - self.botmaster = None - self.master = None - - self.slave_status = SlaveStatus(name) - self.slave = None # a RemoteReference to the Bot, when connected - self.slave_commands = None - self.slavebuilders = {} - self.max_builds = max_builds - self.access = [] - if locks: - self.access = locks - self.lock_subscriptions = [] - - self.properties = Properties() - self.properties.update(properties, "BuildSlave") - self.properties.setProperty("slavename", name, "BuildSlave") - - self.lastMessageReceived = 0 - if isinstance(notify_on_missing, str): - notify_on_missing = [notify_on_missing] - self.notify_on_missing = notify_on_missing - for i in notify_on_missing: - if not isinstance(i, str): - config.error( - 'notify_on_missing arg %r is not a string' % (i,)) - self.missing_timeout = missing_timeout - self.missing_timer = None - self.keepalive_interval = keepalive_interval - - self.detached_subs = None - - self._old_builder_list = None - - def __repr__(self): - return "<%s %r>" % (self.__class__.__name__, self.slavename) - - def updateLocks(self): - """Convert the L{LockAccess} objects in C{self.locks} into real lock - objects, while also maintaining the subscriptions to lock releases.""" - # unsubscribe from any old locks - for s in self.lock_subscriptions: - s.unsubscribe() - - # convert locks into their real form - locks = [ (self.botmaster.getLockFromLockAccess(a), a) - for a in self.access ] - self.locks = [(l.getLock(self), la) for l, la in locks] - self.lock_subscriptions = [ l.subscribeToReleases(self._lockReleased) - for l, la in self.locks ] - - def locksAvailable(self): - """ - I am called to see if all the locks I depend on are available, - in which I return True, otherwise I return False - """ - if not self.locks: - return True - for lock, access in self.locks: - if not lock.isAvailable(self, access): - return False - return True - - def acquireLocks(self): - """ - I am called when a build is preparing to run. I try to claim all - the locks that are needed for a build to happen. If I can't, then - my caller should give up the build and try to get another slave - to look at it. - """ - log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) - if not self.locksAvailable(): - log.msg("slave %s can't lock, giving up" % (self, )) - return False - # all locks are available, claim them all - for lock, access in self.locks: - lock.claim(self, access) - return True - - def releaseLocks(self): - """ - I am called to release any locks after a build has finished - """ - log.msg("releaseLocks(%s): %s" % (self, self.locks)) - for lock, access in self.locks: - lock.release(self, access) - - def _lockReleased(self): - """One of the locks for this slave was released; try scheduling - builds.""" - if not self.botmaster: - return # oh well.. - self.botmaster.maybeStartBuildsForSlave(self.slavename) - - def setServiceParent(self, parent): - # botmaster needs to set before setServiceParent which calls startService - self.botmaster = parent - self.master = parent.master - service.MultiService.setServiceParent(self, parent) - - def startService(self): - self.updateLocks() - self.startMissingTimer() - return service.MultiService.startService(self) - - @defer.inlineCallbacks - def reconfigService(self, new_config): - # Given a new BuildSlave, configure this one identically. Because - # BuildSlave objects are remotely referenced, we can't replace them - # without disconnecting the slave, yet there's no reason to do that. - new = self.findNewSlaveInstance(new_config) - - assert self.slavename == new.slavename - - # do we need to re-register? - if (not self.registration or - self.password != new.password or - new_config.slavePortnum != self.registered_port): - if self.registration: - yield self.registration.unregister() - self.registration = None - self.password = new.password - self.registered_port = new_config.slavePortnum - self.registration = self.master.pbmanager.register( - self.registered_port, self.slavename, - self.password, self.getPerspective) - - # adopt new instance's configuration parameters - self.max_builds = new.max_builds - self.access = new.access - self.notify_on_missing = new.notify_on_missing - self.keepalive_interval = new.keepalive_interval - - if self.missing_timeout != new.missing_timeout: - running_missing_timer = self.missing_timer - self.stopMissingTimer() - self.missing_timeout = new.missing_timeout - if running_missing_timer: - self.startMissingTimer() - - properties = Properties() - properties.updateFromProperties(new.properties) - self.properties = properties - - self.updateLocks() - - # update the attached slave's notion of which builders are attached. - # This assumes that the relevant builders have already been configured, - # which is why the reconfig_priority is set low in this class. - yield self.updateSlave() - - yield config.ReconfigurableServiceMixin.reconfigService(self, - new_config) - - def stopService(self): - if self.registration: - self.registration.unregister() - self.registration = None - self.stopMissingTimer() - return service.MultiService.stopService(self) - - def findNewSlaveInstance(self, new_config): - # TODO: called multiple times per reconfig; use 1-element cache? - for sl in new_config.slaves: - if sl.slavename == self.slavename: - return sl - assert 0, "no new slave named '%s'" % self.slavename - - def startMissingTimer(self): - if self.notify_on_missing and self.missing_timeout and self.parent: - self.stopMissingTimer() # in case it's already running - self.missing_timer = reactor.callLater(self.missing_timeout, - self._missing_timer_fired) - - def stopMissingTimer(self): - if self.missing_timer: - self.missing_timer.cancel() - self.missing_timer = None - - def getPerspective(self, mind, slavename): - assert slavename == self.slavename - metrics.MetricCountEvent.log("attached_slaves", 1) - - # record when this connection attempt occurred - if self.slave_status: - self.slave_status.recordConnectTime() - - # try to use TCP keepalives - try: - mind.broker.transport.setTcpKeepAlive(1) - except: - pass - - if self.isConnected(): - # duplicate slave - send it to arbitration - arb = botmaster.DuplicateSlaveArbitrator(self) - return arb.getPerspective(mind, slavename) - else: - log.msg("slave '%s' attaching from %s" % (slavename, mind.broker.transport.getPeer())) - return self - - def doKeepalive(self): - self.keepalive_timer = reactor.callLater(self.keepalive_interval, - self.doKeepalive) - if not self.slave: - return - d = self.slave.callRemote("print", "Received keepalive from master") - d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, )) - - def stopKeepaliveTimer(self): - if self.keepalive_timer: - self.keepalive_timer.cancel() - - def startKeepaliveTimer(self): - assert self.keepalive_interval - log.msg("Starting buildslave keepalive timer for '%s'" % \ - (self.slavename, )) - self.doKeepalive() - - def isConnected(self): - return self.slave - - def _missing_timer_fired(self): - self.missing_timer = None - # notify people, but only if we're still in the config - if not self.parent: - return - - buildmaster = self.botmaster.master - status = buildmaster.getStatus() - text = "The Buildbot working for '%s'\n" % status.getTitle() - text += ("has noticed that the buildslave named %s went away\n" % - self.slavename) - text += "\n" - text += ("It last disconnected at %s (buildmaster-local time)\n" % - time.ctime(time.time() - self.missing_timeout)) # approx - text += "\n" - text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" - text += "was '%s'.\n" % self.slave_status.getAdmin() - text += "\n" - text += "Sincerely,\n" - text += " The Buildbot\n" - text += " %s\n" % status.getTitleURL() - text += "\n" - text += "%s\n" % status.getURLForThing(self.slave_status) - subject = "Buildbot: buildslave %s was lost" % self.slavename - return self._mail_missing_message(subject, text) - - - def updateSlave(self): - """Called to add or remove builders after the slave has connected. - - @return: a Deferred that indicates when an attached slave has - accepted the new builders and/or released the old ones.""" - if self.slave: - return self.sendBuilderList() - else: - return defer.succeed(None) - - def updateSlaveStatus(self, buildStarted=None, buildFinished=None): - if buildStarted: - self.slave_status.buildStarted(buildStarted) - if buildFinished: - self.slave_status.buildFinished(buildFinished) - - @metrics.countMethod('AbstractBuildSlave.attached()') - def attached(self, bot): - """This is called when the slave connects. - - @return: a Deferred that fires when the attachment is complete - """ - - # the botmaster should ensure this. - assert not self.isConnected() - - metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) - - # set up the subscription point for eventual detachment - self.detached_subs = subscription.SubscriptionPoint("detached") - - # now we go through a sequence of calls, gathering information, then - # tell the Botmaster that it can finally give this slave to all the - # Builders that care about it. - - # we accumulate slave information in this 'state' dictionary, then - # set it atomically if we make it far enough through the process - state = {} - - # Reset graceful shutdown status - self.slave_status.setGraceful(False) - # We want to know when the graceful shutdown flag changes - self.slave_status.addGracefulWatcher(self._gracefulChanged) - - d = defer.succeed(None) - def _log_attachment_on_slave(res): - d1 = bot.callRemote("print", "attached") - d1.addErrback(lambda why: None) - return d1 - d.addCallback(_log_attachment_on_slave) - - def _get_info(res): - d1 = bot.callRemote("getSlaveInfo") - def _got_info(info): - log.msg("Got slaveinfo from '%s'" % self.slavename) - # TODO: info{} might have other keys - state["admin"] = info.get("admin") - state["host"] = info.get("host") - state["access_uri"] = info.get("access_uri", None) - state["slave_environ"] = info.get("environ", {}) - state["slave_basedir"] = info.get("basedir", None) - state["slave_system"] = info.get("system", None) - def _info_unavailable(why): - why.trap(pb.NoSuchMethod) - # maybe an old slave, doesn't implement remote_getSlaveInfo - log.msg("BuildSlave.info_unavailable") - log.err(why) - d1.addCallbacks(_got_info, _info_unavailable) - return d1 - d.addCallback(_get_info) - self.startKeepaliveTimer() - - def _get_version(res): - d = bot.callRemote("getVersion") - def _got_version(version): - state["version"] = version - def _version_unavailable(why): - why.trap(pb.NoSuchMethod) - # probably an old slave - state["version"] = '(unknown)' - d.addCallbacks(_got_version, _version_unavailable) - return d - d.addCallback(_get_version) - - def _get_commands(res): - d1 = bot.callRemote("getCommands") - def _got_commands(commands): - state["slave_commands"] = commands - def _commands_unavailable(why): - # probably an old slave - if why.check(AttributeError): - return - log.msg("BuildSlave.getCommands is unavailable - ignoring") - log.err(why) - d1.addCallbacks(_got_commands, _commands_unavailable) - return d1 - d.addCallback(_get_commands) - - def _accept_slave(res): - self.slave_status.setAdmin(state.get("admin")) - self.slave_status.setHost(state.get("host")) - self.slave_status.setAccessURI(state.get("access_uri")) - self.slave_status.setVersion(state.get("version")) - self.slave_status.setConnected(True) - self.slave_commands = state.get("slave_commands") - self.slave_environ = state.get("slave_environ") - self.slave_basedir = state.get("slave_basedir") - self.slave_system = state.get("slave_system") - self.slave = bot - if self.slave_system == "nt": - self.path_module = namedModule("ntpath") - else: - # most eveything accepts / as separator, so posix should be a - # reasonable fallback - self.path_module = namedModule("posixpath") - log.msg("bot attached") - self.messageReceivedFromSlave() - self.stopMissingTimer() - self.botmaster.master.status.slaveConnected(self.slavename) - - return self.updateSlave() - d.addCallback(_accept_slave) - d.addCallback(lambda _: - self.botmaster.maybeStartBuildsForSlave(self.slavename)) - - # Finally, the slave gets a reference to this BuildSlave. They - # receive this later, after we've started using them. - d.addCallback(lambda _: self) - return d - - def messageReceivedFromSlave(self): - now = time.time() - self.lastMessageReceived = now - self.slave_status.setLastMessageReceived(now) - - def detached(self, mind): - metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) - self.slave = None - self._old_builder_list = [] - self.slave_status.removeGracefulWatcher(self._gracefulChanged) - self.slave_status.setConnected(False) - log.msg("BuildSlave.detached(%s)" % self.slavename) - self.botmaster.master.status.slaveDisconnected(self.slavename) - self.stopKeepaliveTimer() - self.releaseLocks() - - # notify watchers, but do so in the next reactor iteration so that - # any further detached() action by subclasses happens first - def notif(): - subs = self.detached_subs - self.detached_subs = None - subs.deliver() - eventually(notif) - - def subscribeToDetach(self, callback): - """ - Request that C{callable} be invoked with no arguments when the - L{detached} method is invoked. - - @returns: L{Subscription} - """ - assert self.detached_subs, "detached_subs is only set if attached" - return self.detached_subs.subscribe(callback) - - def disconnect(self): - """Forcibly disconnect the slave. - - This severs the TCP connection and returns a Deferred that will fire - (with None) when the connection is probably gone. - - If the slave is still alive, they will probably try to reconnect - again in a moment. - - This is called in two circumstances. The first is when a slave is - removed from the config file. In this case, when they try to - reconnect, they will be rejected as an unknown slave. The second is - when we wind up with two connections for the same slave, in which - case we disconnect the older connection. - """ - - if not self.slave: - return defer.succeed(None) - log.msg("disconnecting old slave %s now" % self.slavename) - # When this Deferred fires, we'll be ready to accept the new slave - return self._disconnect(self.slave) - - def _disconnect(self, slave): - # all kinds of teardown will happen as a result of - # loseConnection(), but it happens after a reactor iteration or - # two. Hook the actual disconnect so we can know when it is safe - # to connect the new slave. We have to wait one additional - # iteration (with callLater(0)) to make sure the *other* - # notifyOnDisconnect handlers have had a chance to run. - d = defer.Deferred() - - # notifyOnDisconnect runs the callback with one argument, the - # RemoteReference being disconnected. - def _disconnected(rref): - eventually(d.callback, None) - slave.notifyOnDisconnect(_disconnected) - tport = slave.broker.transport - # this is the polite way to request that a socket be closed - tport.loseConnection() - try: - # but really we don't want to wait for the transmit queue to - # drain. The remote end is unlikely to ACK the data, so we'd - # probably have to wait for a (20-minute) TCP timeout. - #tport._closeSocket() - # however, doing _closeSocket (whether before or after - # loseConnection) somehow prevents the notifyOnDisconnect - # handlers from being run. Bummer. - tport.offset = 0 - tport.dataBuffer = "" - except: - # however, these hacks are pretty internal, so don't blow up if - # they fail or are unavailable - log.msg("failed to accelerate the shutdown process") - log.msg("waiting for slave to finish disconnecting") - - return d - - def sendBuilderList(self): - our_builders = self.botmaster.getBuildersForSlave(self.slavename) - blist = [(b.name, b.config.slavebuilddir) for b in our_builders] - if blist == self._old_builder_list: - return defer.succeed(None) - - d = self.slave.callRemote("setBuilderList", blist) - def sentBuilderList(ign): - self._old_builder_list = blist - return ign - d.addCallback(sentBuilderList) - return d - - def perspective_keepalive(self): - self.messageReceivedFromSlave() - - def perspective_shutdown(self): - log.msg("slave %s wants to shut down" % self.slavename) - self.slave_status.setGraceful(True) - - def addSlaveBuilder(self, sb): - self.slavebuilders[sb.builder_name] = sb - - def removeSlaveBuilder(self, sb): - try: - del self.slavebuilders[sb.builder_name] - except KeyError: - pass - - def buildFinished(self, sb): - """This is called when a build on this slave is finished.""" - self.botmaster.maybeStartBuildsForSlave(self.slavename) - - def canStartBuild(self): - """ - I am called when a build is requested to see if this buildslave - can start a build. This function can be used to limit overall - concurrency on the buildslave. - - Note for subclassers: if a slave can become willing to start a build - without any action on that slave (for example, by a resource in use on - another slave becoming available), then you must arrange for - L{maybeStartBuildsForSlave} to be called at that time, or builds on - this slave will not start. - """ - - if self.slave_status.isPaused(): - return False - - # If we're waiting to shutdown gracefully, then we shouldn't - # accept any new jobs. - if self.slave_status.getGraceful(): - return False - - if self.max_builds: - active_builders = [sb for sb in self.slavebuilders.values() - if sb.isBusy()] - if len(active_builders) >= self.max_builds: - return False - - if not self.locksAvailable(): - return False - - return True - - def _mail_missing_message(self, subject, text): - # first, see if we have a MailNotifier we can use. This gives us a - # fromaddr and a relayhost. - buildmaster = self.botmaster.master - for st in buildmaster.status: - if isinstance(st, MailNotifier): - break - else: - # if not, they get a default MailNotifier, which always uses SMTP - # to localhost and uses a dummy fromaddr of "buildbot". - log.msg("buildslave-missing msg using default MailNotifier") - st = MailNotifier("buildbot") - # now construct the mail - - m = Message() - m.set_payload(text) - m['Date'] = formatdate(localtime=True) - m['Subject'] = subject - m['From'] = st.fromaddr - recipients = self.notify_on_missing - m['To'] = ", ".join(recipients) - d = st.sendMessage(m, recipients) - # return the Deferred for testing purposes - return d - - def _gracefulChanged(self, graceful): - """This is called when our graceful shutdown setting changes""" - self.maybeShutdown() - - @defer.inlineCallbacks - def shutdown(self): - """Shutdown the slave""" - if not self.slave: - log.msg("no remote; slave is already shut down") - return - - # First, try the "new" way - calling our own remote's shutdown - # method. The method was only added in 0.8.3, so ignore NoSuchMethod - # failures. - def new_way(): - d = self.slave.callRemote('shutdown') - d.addCallback(lambda _ : True) # successful shutdown request - def check_nsm(f): - f.trap(pb.NoSuchMethod) - return False # fall through to the old way - d.addErrback(check_nsm) - def check_connlost(f): - f.trap(pb.PBConnectionLost) - return True # the slave is gone, so call it finished - d.addErrback(check_connlost) - return d - - if (yield new_way()): - return # done! - - # Now, the old way. Look for a builder with a remote reference to the - # client side slave. If we can find one, then call "shutdown" on the - # remote builder, which will cause the slave buildbot process to exit. - def old_way(): - d = None - for b in self.slavebuilders.values(): - if b.remote: - d = b.remote.callRemote("shutdown") - break - - if d: - log.msg("Shutting down (old) slave: %s" % self.slavename) - # The remote shutdown call will not complete successfully since the - # buildbot process exits almost immediately after getting the - # shutdown request. - # Here we look at the reason why the remote call failed, and if - # it's because the connection was lost, that means the slave - # shutdown as expected. - def _errback(why): - if why.check(pb.PBConnectionLost): - log.msg("Lost connection to %s" % self.slavename) - else: - log.err("Unexpected error when trying to shutdown %s" % self.slavename) - d.addErrback(_errback) - return d - log.err("Couldn't find remote builder to shut down slave") - return defer.succeed(None) - yield old_way() - - def maybeShutdown(self): - """Shut down this slave if it has been asked to shut down gracefully, - and has no active builders.""" - if not self.slave_status.getGraceful(): - return - active_builders = [sb for sb in self.slavebuilders.values() - if sb.isBusy()] - if active_builders: - return - d = self.shutdown() - d.addErrback(log.err, 'error while shutting down slave') - - def pause(self): - """Stop running new builds on the slave.""" - self.slave_status.setPaused(True) - - def unpause(self): - """Restart running new builds on the slave.""" - self.slave_status.setPaused(False) - self.botmaster.maybeStartBuildsForSlave(self.slavename) - - def isPaused(self): - return self.paused - -class BuildSlave(AbstractBuildSlave): - - def sendBuilderList(self): - d = AbstractBuildSlave.sendBuilderList(self) - def _sent(slist): - # Nothing has changed, so don't need to re-attach to everything - if not slist: - return - dl = [] - for name, remote in slist.items(): - # use get() since we might have changed our mind since then - b = self.botmaster.builders.get(name) - if b: - d1 = b.attached(self, remote, self.slave_commands) - dl.append(d1) - return defer.DeferredList(dl) - def _set_failed(why): - log.msg("BuildSlave.sendBuilderList (%s) failed" % self) - log.err(why) - # TODO: hang up on them?, without setBuilderList we can't use - # them - d.addCallbacks(_sent, _set_failed) - return d - - def detached(self, mind): - AbstractBuildSlave.detached(self, mind) - self.botmaster.slaveLost(self) - self.startMissingTimer() - - def buildFinished(self, sb): - """This is called when a build on this slave is finished.""" - AbstractBuildSlave.buildFinished(self, sb) - - # If we're gracefully shutting down, and we have no more active - # builders, then it's safe to disconnect - self.maybeShutdown() - -class AbstractLatentBuildSlave(AbstractBuildSlave): - """A build slave that will start up a slave instance when needed. - - To use, subclass and implement start_instance and stop_instance. - - See ec2buildslave.py for a concrete example. Also see the stub example in - test/test_slaves.py. - """ - - implements(ILatentBuildSlave) - - substantiated = False - substantiation_deferred = None - substantiation_build = None - insubstantiating = False - build_wait_timer = None - _shutdown_callback_handle = None - - def __init__(self, name, password, max_builds=None, - notify_on_missing=[], missing_timeout=60*20, - build_wait_timeout=60*10, - properties={}, locks=None): - AbstractBuildSlave.__init__( - self, name, password, max_builds, notify_on_missing, - missing_timeout, properties, locks) - self.building = set() - self.build_wait_timeout = build_wait_timeout - - def start_instance(self, build): - # responsible for starting instance that will try to connect with this - # master. Should return deferred with either True (instance started) - # or False (instance not started, so don't run a build here). Problems - # should use an errback. - raise NotImplementedError - - def stop_instance(self, fast=False): - # responsible for shutting down instance. - raise NotImplementedError - - def substantiate(self, sb, build): - if self.substantiated: - self._clearBuildWaitTimer() - self._setBuildWaitTimer() - return defer.succeed(True) - if self.substantiation_deferred is None: - if self.parent and not self.missing_timer: - # start timer. if timer times out, fail deferred - self.missing_timer = reactor.callLater( - self.missing_timeout, - self._substantiation_failed, defer.TimeoutError()) - self.substantiation_deferred = defer.Deferred() - self.substantiation_build = build - if self.slave is None: - d = self._substantiate(build) # start up instance - d.addErrback(log.err, "while substantiating") - # else: we're waiting for an old one to detach. the _substantiate - # will be done in ``detached`` below. - return self.substantiation_deferred - - def _substantiate(self, build): - # register event trigger - d = self.start_instance(build) - self._shutdown_callback_handle = reactor.addSystemEventTrigger( - 'before', 'shutdown', self._soft_disconnect, fast=True) - def start_instance_result(result): - # If we don't report success, then preparation failed. - if not result: - log.msg("Slave '%s' doesn not want to substantiate at this time" % (self.slavename,)) - d = self.substantiation_deferred - self.substantiation_deferred = None - d.callback(False) - return result - def clean_up(failure): - if self.missing_timer is not None: - self.missing_timer.cancel() - self._substantiation_failed(failure) - if self._shutdown_callback_handle is not None: - handle = self._shutdown_callback_handle - del self._shutdown_callback_handle - reactor.removeSystemEventTrigger(handle) - return failure - d.addCallbacks(start_instance_result, clean_up) - return d - - def attached(self, bot): - if self.substantiation_deferred is None and self.build_wait_timeout >= 0: - msg = 'Slave %s received connection while not trying to ' \ - 'substantiate. Disconnecting.' % (self.slavename,) - log.msg(msg) - self._disconnect(bot) - return defer.fail(RuntimeError(msg)) - return AbstractBuildSlave.attached(self, bot) - - def detached(self, mind): - AbstractBuildSlave.detached(self, mind) - if self.substantiation_deferred is not None: - d = self._substantiate(self.substantiation_build) - d.addErrback(log.err, 'while re-substantiating') - - def _substantiation_failed(self, failure): - self.missing_timer = None - if self.substantiation_deferred: - d = self.substantiation_deferred - self.substantiation_deferred = None - self.substantiation_build = None - d.errback(failure) - self.insubstantiate() - # notify people, but only if we're still in the config - if not self.parent or not self.notify_on_missing: - return - - buildmaster = self.botmaster.master - status = buildmaster.getStatus() - text = "The Buildbot working for '%s'\n" % status.getTitle() - text += ("has noticed that the latent buildslave named %s \n" % - self.slavename) - text += "never substantiated after a request\n" - text += "\n" - text += ("The request was made at %s (buildmaster-local time)\n" % - time.ctime(time.time() - self.missing_timeout)) # approx - text += "\n" - text += "Sincerely,\n" - text += " The Buildbot\n" - text += " %s\n" % status.getTitleURL() - subject = "Buildbot: buildslave %s never substantiated" % self.slavename - return self._mail_missing_message(subject, text) - - def canStartBuild(self): - if self.insubstantiating: - return False - return AbstractBuildSlave.canStartBuild(self) - - def buildStarted(self, sb): - assert self.substantiated - self._clearBuildWaitTimer() - self.building.add(sb.builder_name) - - def buildFinished(self, sb): - AbstractBuildSlave.buildFinished(self, sb) - - self.building.remove(sb.builder_name) - if not self.building: - if self.build_wait_timeout == 0: - self.insubstantiate() - else: - self._setBuildWaitTimer() - - def _clearBuildWaitTimer(self): - if self.build_wait_timer is not None: - if self.build_wait_timer.active(): - self.build_wait_timer.cancel() - self.build_wait_timer = None - - def _setBuildWaitTimer(self): - self._clearBuildWaitTimer() - if self.build_wait_timeout <= 0: - return - self.build_wait_timer = reactor.callLater( - self.build_wait_timeout, self._soft_disconnect) - - @defer.inlineCallbacks - def insubstantiate(self, fast=False): - self.insubstantiating = True - self._clearBuildWaitTimer() - d = self.stop_instance(fast) - if self._shutdown_callback_handle is not None: - handle = self._shutdown_callback_handle - del self._shutdown_callback_handle - reactor.removeSystemEventTrigger(handle) - self.substantiated = False - self.building.clear() # just to be sure - yield d - self.insubstantiating = False - - @defer.inlineCallbacks - def _soft_disconnect(self, fast=False): - # a negative build_wait_timeout means the slave should never be shut - # down, so just disconnect. - if self.build_wait_timeout < 0: - yield AbstractBuildSlave.disconnect(self) - return - - if self.missing_timer: - self.missing_timer.cancel() - self.missing_timer = None - - if self.substantiation_deferred is not None: - log.msg("Weird: Got request to stop before started. Allowing " - "slave to start cleanly to avoid inconsistent state") - yield self.substantiation_deferred - self.substantiation_deferred = None - self.substantiation_build = None - log.msg("Substantiation complete, immediately terminating.") - - if self.slave is not None: - # this could be called when the slave needs to shut down, such as - # in BotMaster.removeSlave, *or* when a new slave requests a - # connection when we already have a slave. It's not clear what to - # do in the second case: this shouldn't happen, and if it - # does...if it's a latent slave, shutting down will probably kill - # something we want...but we can't know what the status is. So, - # here, we just do what should be appropriate for the first case, - # and put our heads in the sand for the second, at least for now. - # The best solution to the odd situation is removing it as a - # possibility: make the master in charge of connecting to the - # slave, rather than vice versa. TODO. - yield defer.DeferredList([ - AbstractBuildSlave.disconnect(self), - self.insubstantiate(fast) - ], consumeErrors=True, fireOnOneErrback=True) - else: - yield AbstractBuildSlave.disconnect(self) - yield self.stop_instance(fast) - - def disconnect(self): - # This returns a Deferred but we don't use it - self._soft_disconnect() - # this removes the slave from all builders. It won't come back - # without a restart (or maybe a sighup) - self.botmaster.slaveLost(self) - - def stopService(self): - res = defer.maybeDeferred(AbstractBuildSlave.stopService, self) - if self.slave is not None: - d = self._soft_disconnect() - res = defer.DeferredList([res, d]) - return res - - def updateSlave(self): - """Called to add or remove builders after the slave has connected. - - Also called after botmaster's builders are initially set. - - @return: a Deferred that indicates when an attached slave has - accepted the new builders and/or released the old ones.""" - for b in self.botmaster.getBuildersForSlave(self.slavename): - if b.name not in self.slavebuilders: - b.addLatentSlave(self) - return AbstractBuildSlave.updateSlave(self) - - def sendBuilderList(self): - d = AbstractBuildSlave.sendBuilderList(self) - def _sent(slist): - if not slist: - return - dl = [] - for name, remote in slist.items(): - # use get() since we might have changed our mind since then. - # we're checking on the builder in addition to the - # slavebuilders out of a bit of paranoia. - b = self.botmaster.builders.get(name) - sb = self.slavebuilders.get(name) - if b and sb: - d1 = sb.attached(self, remote, self.slave_commands) - dl.append(d1) - return defer.DeferredList(dl) - def _set_failed(why): - log.msg("BuildSlave.sendBuilderList (%s) failed" % self) - log.err(why) - # TODO: hang up on them?, without setBuilderList we can't use - # them - if self.substantiation_deferred: - d = self.substantiation_deferred - self.substantiation_deferred = None - self.substantiation_build = None - d.errback(why) - if self.missing_timer: - self.missing_timer.cancel() - self.missing_timer = None - # TODO: maybe log? send an email? - return why - d.addCallbacks(_sent, _set_failed) - def _substantiated(res): - log.msg("Slave %s substantiated \o/" % self.slavename) - self.substantiated = True - if not self.substantiation_deferred: - log.msg("No substantiation deferred for %s" % self.slavename) - if self.substantiation_deferred: - log.msg("Firing %s substantiation deferred with success" % self.slavename) - d = self.substantiation_deferred - self.substantiation_deferred = None - self.substantiation_build = None - d.callback(True) - # note that the missing_timer is already handled within - # ``attached`` - if not self.building: - self._setBuildWaitTimer() - d.addCallback(_substantiated) - return d +_hush_pyflakes = [ + AbstractBuildSlave, BuildSlave, AbstractLatentBuildSlave ] diff --git a/master/buildbot/buildslave/base.py b/master/buildbot/buildslave/base.py new file mode 100644 index 00000000000..272b323d765 --- /dev/null +++ b/master/buildbot/buildslave/base.py @@ -0,0 +1,1049 @@ +# This file is part of Buildbot. Buildbot is free software: you can +# redistribute it and/or modify it under the terms of the GNU General Public +# License as published by the Free Software Foundation, version 2. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 51 +# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Portions Copyright Buildbot Team Members +# Portions Copyright Canonical Ltd. 2009 + +import time +from email.Message import Message +from email.Utils import formatdate +from zope.interface import implements +from twisted.python import log +from twisted.internet import defer, reactor +from twisted.application import service +from twisted.spread import pb +from twisted.python.reflect import namedModule + +from buildbot.status.slave import SlaveStatus +from buildbot.status.mail import MailNotifier +from buildbot.process import metrics, botmaster +from buildbot.interfaces import IBuildSlave, ILatentBuildSlave +from buildbot.process.properties import Properties +from buildbot.util import subscription +from buildbot.util.eventual import eventually +from buildbot import config + +class AbstractBuildSlave(config.ReconfigurableServiceMixin, pb.Avatar, + service.MultiService): + """This is the master-side representative for a remote buildbot slave. + There is exactly one for each slave described in the config file (the + c['slaves'] list). When buildbots connect in (.attach), they get a + reference to this instance. The BotMaster object is stashed as the + .botmaster attribute. The BotMaster is also our '.parent' Service. + + I represent a build slave -- a remote machine capable of + running builds. I am instantiated by the configuration file, and can be + subclassed to add extra functionality.""" + + implements(IBuildSlave) + keepalive_timer = None + keepalive_interval = None + + # reconfig slaves after builders + reconfig_priority = 64 + + def __init__(self, name, password, max_builds=None, + notify_on_missing=[], missing_timeout=3600, + properties={}, locks=None, keepalive_interval=3600): + """ + @param name: botname this machine will supply when it connects + @param password: password this machine will supply when + it connects + @param max_builds: maximum number of simultaneous builds that will + be run concurrently on this buildslave (the + default is None for no limit) + @param properties: properties that will be applied to builds run on + this slave + @type properties: dictionary + @param locks: A list of locks that must be acquired before this slave + can be used + @type locks: dictionary + """ + service.MultiService.__init__(self) + self.slavename = name + self.password = password + + # PB registration + self.registration = None + self.registered_port = None + + # these are set when the service is started, and unset when it is + # stopped + self.botmaster = None + self.master = None + + self.slave_status = SlaveStatus(name) + self.slave = None # a RemoteReference to the Bot, when connected + self.slave_commands = None + self.slavebuilders = {} + self.max_builds = max_builds + self.access = [] + if locks: + self.access = locks + self.lock_subscriptions = [] + + self.properties = Properties() + self.properties.update(properties, "BuildSlave") + self.properties.setProperty("slavename", name, "BuildSlave") + + self.lastMessageReceived = 0 + if isinstance(notify_on_missing, str): + notify_on_missing = [notify_on_missing] + self.notify_on_missing = notify_on_missing + for i in notify_on_missing: + if not isinstance(i, str): + config.error( + 'notify_on_missing arg %r is not a string' % (i,)) + self.missing_timeout = missing_timeout + self.missing_timer = None + self.keepalive_interval = keepalive_interval + + self.detached_subs = None + + self._old_builder_list = None + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.slavename) + + def updateLocks(self): + """Convert the L{LockAccess} objects in C{self.locks} into real lock + objects, while also maintaining the subscriptions to lock releases.""" + # unsubscribe from any old locks + for s in self.lock_subscriptions: + s.unsubscribe() + + # convert locks into their real form + locks = [ (self.botmaster.getLockFromLockAccess(a), a) + for a in self.access ] + self.locks = [(l.getLock(self), la) for l, la in locks] + self.lock_subscriptions = [ l.subscribeToReleases(self._lockReleased) + for l, la in self.locks ] + + def locksAvailable(self): + """ + I am called to see if all the locks I depend on are available, + in which I return True, otherwise I return False + """ + if not self.locks: + return True + for lock, access in self.locks: + if not lock.isAvailable(self, access): + return False + return True + + def acquireLocks(self): + """ + I am called when a build is preparing to run. I try to claim all + the locks that are needed for a build to happen. If I can't, then + my caller should give up the build and try to get another slave + to look at it. + """ + log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) + if not self.locksAvailable(): + log.msg("slave %s can't lock, giving up" % (self, )) + return False + # all locks are available, claim them all + for lock, access in self.locks: + lock.claim(self, access) + return True + + def releaseLocks(self): + """ + I am called to release any locks after a build has finished + """ + log.msg("releaseLocks(%s): %s" % (self, self.locks)) + for lock, access in self.locks: + lock.release(self, access) + + def _lockReleased(self): + """One of the locks for this slave was released; try scheduling + builds.""" + if not self.botmaster: + return # oh well.. + self.botmaster.maybeStartBuildsForSlave(self.slavename) + + def setServiceParent(self, parent): + # botmaster needs to set before setServiceParent which calls startService + self.botmaster = parent + self.master = parent.master + service.MultiService.setServiceParent(self, parent) + + def startService(self): + self.updateLocks() + self.startMissingTimer() + return service.MultiService.startService(self) + + @defer.inlineCallbacks + def reconfigService(self, new_config): + # Given a new BuildSlave, configure this one identically. Because + # BuildSlave objects are remotely referenced, we can't replace them + # without disconnecting the slave, yet there's no reason to do that. + new = self.findNewSlaveInstance(new_config) + + assert self.slavename == new.slavename + + # do we need to re-register? + if (not self.registration or + self.password != new.password or + new_config.slavePortnum != self.registered_port): + if self.registration: + yield self.registration.unregister() + self.registration = None + self.password = new.password + self.registered_port = new_config.slavePortnum + self.registration = self.master.pbmanager.register( + self.registered_port, self.slavename, + self.password, self.getPerspective) + + # adopt new instance's configuration parameters + self.max_builds = new.max_builds + self.access = new.access + self.notify_on_missing = new.notify_on_missing + self.keepalive_interval = new.keepalive_interval + + if self.missing_timeout != new.missing_timeout: + running_missing_timer = self.missing_timer + self.stopMissingTimer() + self.missing_timeout = new.missing_timeout + if running_missing_timer: + self.startMissingTimer() + + properties = Properties() + properties.updateFromProperties(new.properties) + self.properties = properties + + self.updateLocks() + + # update the attached slave's notion of which builders are attached. + # This assumes that the relevant builders have already been configured, + # which is why the reconfig_priority is set low in this class. + yield self.updateSlave() + + yield config.ReconfigurableServiceMixin.reconfigService(self, + new_config) + + def stopService(self): + if self.registration: + self.registration.unregister() + self.registration = None + self.stopMissingTimer() + return service.MultiService.stopService(self) + + def findNewSlaveInstance(self, new_config): + # TODO: called multiple times per reconfig; use 1-element cache? + for sl in new_config.slaves: + if sl.slavename == self.slavename: + return sl + assert 0, "no new slave named '%s'" % self.slavename + + def startMissingTimer(self): + if self.notify_on_missing and self.missing_timeout and self.parent: + self.stopMissingTimer() # in case it's already running + self.missing_timer = reactor.callLater(self.missing_timeout, + self._missing_timer_fired) + + def stopMissingTimer(self): + if self.missing_timer: + self.missing_timer.cancel() + self.missing_timer = None + + def getPerspective(self, mind, slavename): + assert slavename == self.slavename + metrics.MetricCountEvent.log("attached_slaves", 1) + + # record when this connection attempt occurred + if self.slave_status: + self.slave_status.recordConnectTime() + + # try to use TCP keepalives + try: + mind.broker.transport.setTcpKeepAlive(1) + except: + pass + + if self.isConnected(): + # duplicate slave - send it to arbitration + arb = botmaster.DuplicateSlaveArbitrator(self) + return arb.getPerspective(mind, slavename) + else: + log.msg("slave '%s' attaching from %s" % (slavename, mind.broker.transport.getPeer())) + return self + + def doKeepalive(self): + self.keepalive_timer = reactor.callLater(self.keepalive_interval, + self.doKeepalive) + if not self.slave: + return + d = self.slave.callRemote("print", "Received keepalive from master") + d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, )) + + def stopKeepaliveTimer(self): + if self.keepalive_timer: + self.keepalive_timer.cancel() + + def startKeepaliveTimer(self): + assert self.keepalive_interval + log.msg("Starting buildslave keepalive timer for '%s'" % \ + (self.slavename, )) + self.doKeepalive() + + def isConnected(self): + return self.slave + + def _missing_timer_fired(self): + self.missing_timer = None + # notify people, but only if we're still in the config + if not self.parent: + return + + buildmaster = self.botmaster.master + status = buildmaster.getStatus() + text = "The Buildbot working for '%s'\n" % status.getTitle() + text += ("has noticed that the buildslave named %s went away\n" % + self.slavename) + text += "\n" + text += ("It last disconnected at %s (buildmaster-local time)\n" % + time.ctime(time.time() - self.missing_timeout)) # approx + text += "\n" + text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" + text += "was '%s'.\n" % self.slave_status.getAdmin() + text += "\n" + text += "Sincerely,\n" + text += " The Buildbot\n" + text += " %s\n" % status.getTitleURL() + text += "\n" + text += "%s\n" % status.getURLForThing(self.slave_status) + subject = "Buildbot: buildslave %s was lost" % self.slavename + return self._mail_missing_message(subject, text) + + + def updateSlave(self): + """Called to add or remove builders after the slave has connected. + + @return: a Deferred that indicates when an attached slave has + accepted the new builders and/or released the old ones.""" + if self.slave: + return self.sendBuilderList() + else: + return defer.succeed(None) + + def updateSlaveStatus(self, buildStarted=None, buildFinished=None): + if buildStarted: + self.slave_status.buildStarted(buildStarted) + if buildFinished: + self.slave_status.buildFinished(buildFinished) + + @metrics.countMethod('AbstractBuildSlave.attached()') + def attached(self, bot): + """This is called when the slave connects. + + @return: a Deferred that fires when the attachment is complete + """ + + # the botmaster should ensure this. + assert not self.isConnected() + + metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) + + # set up the subscription point for eventual detachment + self.detached_subs = subscription.SubscriptionPoint("detached") + + # now we go through a sequence of calls, gathering information, then + # tell the Botmaster that it can finally give this slave to all the + # Builders that care about it. + + # we accumulate slave information in this 'state' dictionary, then + # set it atomically if we make it far enough through the process + state = {} + + # Reset graceful shutdown status + self.slave_status.setGraceful(False) + # We want to know when the graceful shutdown flag changes + self.slave_status.addGracefulWatcher(self._gracefulChanged) + + d = defer.succeed(None) + def _log_attachment_on_slave(res): + d1 = bot.callRemote("print", "attached") + d1.addErrback(lambda why: None) + return d1 + d.addCallback(_log_attachment_on_slave) + + def _get_info(res): + d1 = bot.callRemote("getSlaveInfo") + def _got_info(info): + log.msg("Got slaveinfo from '%s'" % self.slavename) + # TODO: info{} might have other keys + state["admin"] = info.get("admin") + state["host"] = info.get("host") + state["access_uri"] = info.get("access_uri", None) + state["slave_environ"] = info.get("environ", {}) + state["slave_basedir"] = info.get("basedir", None) + state["slave_system"] = info.get("system", None) + def _info_unavailable(why): + why.trap(pb.NoSuchMethod) + # maybe an old slave, doesn't implement remote_getSlaveInfo + log.msg("BuildSlave.info_unavailable") + log.err(why) + d1.addCallbacks(_got_info, _info_unavailable) + return d1 + d.addCallback(_get_info) + self.startKeepaliveTimer() + + def _get_version(res): + d = bot.callRemote("getVersion") + def _got_version(version): + state["version"] = version + def _version_unavailable(why): + why.trap(pb.NoSuchMethod) + # probably an old slave + state["version"] = '(unknown)' + d.addCallbacks(_got_version, _version_unavailable) + return d + d.addCallback(_get_version) + + def _get_commands(res): + d1 = bot.callRemote("getCommands") + def _got_commands(commands): + state["slave_commands"] = commands + def _commands_unavailable(why): + # probably an old slave + if why.check(AttributeError): + return + log.msg("BuildSlave.getCommands is unavailable - ignoring") + log.err(why) + d1.addCallbacks(_got_commands, _commands_unavailable) + return d1 + d.addCallback(_get_commands) + + def _accept_slave(res): + self.slave_status.setAdmin(state.get("admin")) + self.slave_status.setHost(state.get("host")) + self.slave_status.setAccessURI(state.get("access_uri")) + self.slave_status.setVersion(state.get("version")) + self.slave_status.setConnected(True) + self.slave_commands = state.get("slave_commands") + self.slave_environ = state.get("slave_environ") + self.slave_basedir = state.get("slave_basedir") + self.slave_system = state.get("slave_system") + self.slave = bot + if self.slave_system == "nt": + self.path_module = namedModule("ntpath") + else: + # most eveything accepts / as separator, so posix should be a + # reasonable fallback + self.path_module = namedModule("posixpath") + log.msg("bot attached") + self.messageReceivedFromSlave() + self.stopMissingTimer() + self.botmaster.master.status.slaveConnected(self.slavename) + + return self.updateSlave() + d.addCallback(_accept_slave) + d.addCallback(lambda _: + self.botmaster.maybeStartBuildsForSlave(self.slavename)) + + # Finally, the slave gets a reference to this BuildSlave. They + # receive this later, after we've started using them. + d.addCallback(lambda _: self) + return d + + def messageReceivedFromSlave(self): + now = time.time() + self.lastMessageReceived = now + self.slave_status.setLastMessageReceived(now) + + def detached(self, mind): + metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) + self.slave = None + self._old_builder_list = [] + self.slave_status.removeGracefulWatcher(self._gracefulChanged) + self.slave_status.setConnected(False) + log.msg("BuildSlave.detached(%s)" % self.slavename) + self.botmaster.master.status.slaveDisconnected(self.slavename) + self.stopKeepaliveTimer() + self.releaseLocks() + + # notify watchers, but do so in the next reactor iteration so that + # any further detached() action by subclasses happens first + def notif(): + subs = self.detached_subs + self.detached_subs = None + subs.deliver() + eventually(notif) + + def subscribeToDetach(self, callback): + """ + Request that C{callable} be invoked with no arguments when the + L{detached} method is invoked. + + @returns: L{Subscription} + """ + assert self.detached_subs, "detached_subs is only set if attached" + return self.detached_subs.subscribe(callback) + + def disconnect(self): + """Forcibly disconnect the slave. + + This severs the TCP connection and returns a Deferred that will fire + (with None) when the connection is probably gone. + + If the slave is still alive, they will probably try to reconnect + again in a moment. + + This is called in two circumstances. The first is when a slave is + removed from the config file. In this case, when they try to + reconnect, they will be rejected as an unknown slave. The second is + when we wind up with two connections for the same slave, in which + case we disconnect the older connection. + """ + + if not self.slave: + return defer.succeed(None) + log.msg("disconnecting old slave %s now" % self.slavename) + # When this Deferred fires, we'll be ready to accept the new slave + return self._disconnect(self.slave) + + def _disconnect(self, slave): + # all kinds of teardown will happen as a result of + # loseConnection(), but it happens after a reactor iteration or + # two. Hook the actual disconnect so we can know when it is safe + # to connect the new slave. We have to wait one additional + # iteration (with callLater(0)) to make sure the *other* + # notifyOnDisconnect handlers have had a chance to run. + d = defer.Deferred() + + # notifyOnDisconnect runs the callback with one argument, the + # RemoteReference being disconnected. + def _disconnected(rref): + eventually(d.callback, None) + slave.notifyOnDisconnect(_disconnected) + tport = slave.broker.transport + # this is the polite way to request that a socket be closed + tport.loseConnection() + try: + # but really we don't want to wait for the transmit queue to + # drain. The remote end is unlikely to ACK the data, so we'd + # probably have to wait for a (20-minute) TCP timeout. + #tport._closeSocket() + # however, doing _closeSocket (whether before or after + # loseConnection) somehow prevents the notifyOnDisconnect + # handlers from being run. Bummer. + tport.offset = 0 + tport.dataBuffer = "" + except: + # however, these hacks are pretty internal, so don't blow up if + # they fail or are unavailable + log.msg("failed to accelerate the shutdown process") + log.msg("waiting for slave to finish disconnecting") + + return d + + def sendBuilderList(self): + our_builders = self.botmaster.getBuildersForSlave(self.slavename) + blist = [(b.name, b.config.slavebuilddir) for b in our_builders] + if blist == self._old_builder_list: + return defer.succeed(None) + + d = self.slave.callRemote("setBuilderList", blist) + def sentBuilderList(ign): + self._old_builder_list = blist + return ign + d.addCallback(sentBuilderList) + return d + + def perspective_keepalive(self): + self.messageReceivedFromSlave() + + def perspective_shutdown(self): + log.msg("slave %s wants to shut down" % self.slavename) + self.slave_status.setGraceful(True) + + def addSlaveBuilder(self, sb): + self.slavebuilders[sb.builder_name] = sb + + def removeSlaveBuilder(self, sb): + try: + del self.slavebuilders[sb.builder_name] + except KeyError: + pass + + def buildFinished(self, sb): + """This is called when a build on this slave is finished.""" + self.botmaster.maybeStartBuildsForSlave(self.slavename) + + def canStartBuild(self): + """ + I am called when a build is requested to see if this buildslave + can start a build. This function can be used to limit overall + concurrency on the buildslave. + + Note for subclassers: if a slave can become willing to start a build + without any action on that slave (for example, by a resource in use on + another slave becoming available), then you must arrange for + L{maybeStartBuildsForSlave} to be called at that time, or builds on + this slave will not start. + """ + + if self.slave_status.isPaused(): + return False + + # If we're waiting to shutdown gracefully, then we shouldn't + # accept any new jobs. + if self.slave_status.getGraceful(): + return False + + if self.max_builds: + active_builders = [sb for sb in self.slavebuilders.values() + if sb.isBusy()] + if len(active_builders) >= self.max_builds: + return False + + if not self.locksAvailable(): + return False + + return True + + def _mail_missing_message(self, subject, text): + # first, see if we have a MailNotifier we can use. This gives us a + # fromaddr and a relayhost. + buildmaster = self.botmaster.master + for st in buildmaster.status: + if isinstance(st, MailNotifier): + break + else: + # if not, they get a default MailNotifier, which always uses SMTP + # to localhost and uses a dummy fromaddr of "buildbot". + log.msg("buildslave-missing msg using default MailNotifier") + st = MailNotifier("buildbot") + # now construct the mail + + m = Message() + m.set_payload(text) + m['Date'] = formatdate(localtime=True) + m['Subject'] = subject + m['From'] = st.fromaddr + recipients = self.notify_on_missing + m['To'] = ", ".join(recipients) + d = st.sendMessage(m, recipients) + # return the Deferred for testing purposes + return d + + def _gracefulChanged(self, graceful): + """This is called when our graceful shutdown setting changes""" + self.maybeShutdown() + + @defer.inlineCallbacks + def shutdown(self): + """Shutdown the slave""" + if not self.slave: + log.msg("no remote; slave is already shut down") + return + + # First, try the "new" way - calling our own remote's shutdown + # method. The method was only added in 0.8.3, so ignore NoSuchMethod + # failures. + def new_way(): + d = self.slave.callRemote('shutdown') + d.addCallback(lambda _ : True) # successful shutdown request + def check_nsm(f): + f.trap(pb.NoSuchMethod) + return False # fall through to the old way + d.addErrback(check_nsm) + def check_connlost(f): + f.trap(pb.PBConnectionLost) + return True # the slave is gone, so call it finished + d.addErrback(check_connlost) + return d + + if (yield new_way()): + return # done! + + # Now, the old way. Look for a builder with a remote reference to the + # client side slave. If we can find one, then call "shutdown" on the + # remote builder, which will cause the slave buildbot process to exit. + def old_way(): + d = None + for b in self.slavebuilders.values(): + if b.remote: + d = b.remote.callRemote("shutdown") + break + + if d: + log.msg("Shutting down (old) slave: %s" % self.slavename) + # The remote shutdown call will not complete successfully since the + # buildbot process exits almost immediately after getting the + # shutdown request. + # Here we look at the reason why the remote call failed, and if + # it's because the connection was lost, that means the slave + # shutdown as expected. + def _errback(why): + if why.check(pb.PBConnectionLost): + log.msg("Lost connection to %s" % self.slavename) + else: + log.err("Unexpected error when trying to shutdown %s" % self.slavename) + d.addErrback(_errback) + return d + log.err("Couldn't find remote builder to shut down slave") + return defer.succeed(None) + yield old_way() + + def maybeShutdown(self): + """Shut down this slave if it has been asked to shut down gracefully, + and has no active builders.""" + if not self.slave_status.getGraceful(): + return + active_builders = [sb for sb in self.slavebuilders.values() + if sb.isBusy()] + if active_builders: + return + d = self.shutdown() + d.addErrback(log.err, 'error while shutting down slave') + + def pause(self): + """Stop running new builds on the slave.""" + self.slave_status.setPaused(True) + + def unpause(self): + """Restart running new builds on the slave.""" + self.slave_status.setPaused(False) + self.botmaster.maybeStartBuildsForSlave(self.slavename) + + def isPaused(self): + return self.paused + +class BuildSlave(AbstractBuildSlave): + + def sendBuilderList(self): + d = AbstractBuildSlave.sendBuilderList(self) + def _sent(slist): + # Nothing has changed, so don't need to re-attach to everything + if not slist: + return + dl = [] + for name, remote in slist.items(): + # use get() since we might have changed our mind since then + b = self.botmaster.builders.get(name) + if b: + d1 = b.attached(self, remote, self.slave_commands) + dl.append(d1) + return defer.DeferredList(dl) + def _set_failed(why): + log.msg("BuildSlave.sendBuilderList (%s) failed" % self) + log.err(why) + # TODO: hang up on them?, without setBuilderList we can't use + # them + d.addCallbacks(_sent, _set_failed) + return d + + def detached(self, mind): + AbstractBuildSlave.detached(self, mind) + self.botmaster.slaveLost(self) + self.startMissingTimer() + + def buildFinished(self, sb): + """This is called when a build on this slave is finished.""" + AbstractBuildSlave.buildFinished(self, sb) + + # If we're gracefully shutting down, and we have no more active + # builders, then it's safe to disconnect + self.maybeShutdown() + +class AbstractLatentBuildSlave(AbstractBuildSlave): + """A build slave that will start up a slave instance when needed. + + To use, subclass and implement start_instance and stop_instance. + + See ec2buildslave.py for a concrete example. Also see the stub example in + test/test_slaves.py. + """ + + implements(ILatentBuildSlave) + + substantiated = False + substantiation_deferred = None + substantiation_build = None + insubstantiating = False + build_wait_timer = None + _shutdown_callback_handle = None + + def __init__(self, name, password, max_builds=None, + notify_on_missing=[], missing_timeout=60*20, + build_wait_timeout=60*10, + properties={}, locks=None): + AbstractBuildSlave.__init__( + self, name, password, max_builds, notify_on_missing, + missing_timeout, properties, locks) + self.building = set() + self.build_wait_timeout = build_wait_timeout + + def start_instance(self, build): + # responsible for starting instance that will try to connect with this + # master. Should return deferred with either True (instance started) + # or False (instance not started, so don't run a build here). Problems + # should use an errback. + raise NotImplementedError + + def stop_instance(self, fast=False): + # responsible for shutting down instance. + raise NotImplementedError + + def substantiate(self, sb, build): + if self.substantiated: + self._clearBuildWaitTimer() + self._setBuildWaitTimer() + return defer.succeed(True) + if self.substantiation_deferred is None: + if self.parent and not self.missing_timer: + # start timer. if timer times out, fail deferred + self.missing_timer = reactor.callLater( + self.missing_timeout, + self._substantiation_failed, defer.TimeoutError()) + self.substantiation_deferred = defer.Deferred() + self.substantiation_build = build + if self.slave is None: + d = self._substantiate(build) # start up instance + d.addErrback(log.err, "while substantiating") + # else: we're waiting for an old one to detach. the _substantiate + # will be done in ``detached`` below. + return self.substantiation_deferred + + def _substantiate(self, build): + # register event trigger + d = self.start_instance(build) + self._shutdown_callback_handle = reactor.addSystemEventTrigger( + 'before', 'shutdown', self._soft_disconnect, fast=True) + def start_instance_result(result): + # If we don't report success, then preparation failed. + if not result: + log.msg("Slave '%s' doesn not want to substantiate at this time" % (self.slavename,)) + d = self.substantiation_deferred + self.substantiation_deferred = None + d.callback(False) + return result + def clean_up(failure): + if self.missing_timer is not None: + self.missing_timer.cancel() + self._substantiation_failed(failure) + if self._shutdown_callback_handle is not None: + handle = self._shutdown_callback_handle + del self._shutdown_callback_handle + reactor.removeSystemEventTrigger(handle) + return failure + d.addCallbacks(start_instance_result, clean_up) + return d + + def attached(self, bot): + if self.substantiation_deferred is None and self.build_wait_timeout >= 0: + msg = 'Slave %s received connection while not trying to ' \ + 'substantiate. Disconnecting.' % (self.slavename,) + log.msg(msg) + self._disconnect(bot) + return defer.fail(RuntimeError(msg)) + return AbstractBuildSlave.attached(self, bot) + + def detached(self, mind): + AbstractBuildSlave.detached(self, mind) + if self.substantiation_deferred is not None: + d = self._substantiate(self.substantiation_build) + d.addErrback(log.err, 'while re-substantiating') + + def _substantiation_failed(self, failure): + self.missing_timer = None + if self.substantiation_deferred: + d = self.substantiation_deferred + self.substantiation_deferred = None + self.substantiation_build = None + d.errback(failure) + self.insubstantiate() + # notify people, but only if we're still in the config + if not self.parent or not self.notify_on_missing: + return + + buildmaster = self.botmaster.master + status = buildmaster.getStatus() + text = "The Buildbot working for '%s'\n" % status.getTitle() + text += ("has noticed that the latent buildslave named %s \n" % + self.slavename) + text += "never substantiated after a request\n" + text += "\n" + text += ("The request was made at %s (buildmaster-local time)\n" % + time.ctime(time.time() - self.missing_timeout)) # approx + text += "\n" + text += "Sincerely,\n" + text += " The Buildbot\n" + text += " %s\n" % status.getTitleURL() + subject = "Buildbot: buildslave %s never substantiated" % self.slavename + return self._mail_missing_message(subject, text) + + def canStartBuild(self): + if self.insubstantiating: + return False + return AbstractBuildSlave.canStartBuild(self) + + def buildStarted(self, sb): + assert self.substantiated + self._clearBuildWaitTimer() + self.building.add(sb.builder_name) + + def buildFinished(self, sb): + AbstractBuildSlave.buildFinished(self, sb) + + self.building.remove(sb.builder_name) + if not self.building: + if self.build_wait_timeout == 0: + self.insubstantiate() + else: + self._setBuildWaitTimer() + + def _clearBuildWaitTimer(self): + if self.build_wait_timer is not None: + if self.build_wait_timer.active(): + self.build_wait_timer.cancel() + self.build_wait_timer = None + + def _setBuildWaitTimer(self): + self._clearBuildWaitTimer() + if self.build_wait_timeout <= 0: + return + self.build_wait_timer = reactor.callLater( + self.build_wait_timeout, self._soft_disconnect) + + @defer.inlineCallbacks + def insubstantiate(self, fast=False): + self.insubstantiating = True + self._clearBuildWaitTimer() + d = self.stop_instance(fast) + if self._shutdown_callback_handle is not None: + handle = self._shutdown_callback_handle + del self._shutdown_callback_handle + reactor.removeSystemEventTrigger(handle) + self.substantiated = False + self.building.clear() # just to be sure + yield d + self.insubstantiating = False + + @defer.inlineCallbacks + def _soft_disconnect(self, fast=False): + # a negative build_wait_timeout means the slave should never be shut + # down, so just disconnect. + if self.build_wait_timeout < 0: + yield AbstractBuildSlave.disconnect(self) + return + + if self.missing_timer: + self.missing_timer.cancel() + self.missing_timer = None + + if self.substantiation_deferred is not None: + log.msg("Weird: Got request to stop before started. Allowing " + "slave to start cleanly to avoid inconsistent state") + yield self.substantiation_deferred + self.substantiation_deferred = None + self.substantiation_build = None + log.msg("Substantiation complete, immediately terminating.") + + if self.slave is not None: + # this could be called when the slave needs to shut down, such as + # in BotMaster.removeSlave, *or* when a new slave requests a + # connection when we already have a slave. It's not clear what to + # do in the second case: this shouldn't happen, and if it + # does...if it's a latent slave, shutting down will probably kill + # something we want...but we can't know what the status is. So, + # here, we just do what should be appropriate for the first case, + # and put our heads in the sand for the second, at least for now. + # The best solution to the odd situation is removing it as a + # possibility: make the master in charge of connecting to the + # slave, rather than vice versa. TODO. + yield defer.DeferredList([ + AbstractBuildSlave.disconnect(self), + self.insubstantiate(fast) + ], consumeErrors=True, fireOnOneErrback=True) + else: + yield AbstractBuildSlave.disconnect(self) + yield self.stop_instance(fast) + + def disconnect(self): + # This returns a Deferred but we don't use it + self._soft_disconnect() + # this removes the slave from all builders. It won't come back + # without a restart (or maybe a sighup) + self.botmaster.slaveLost(self) + + def stopService(self): + res = defer.maybeDeferred(AbstractBuildSlave.stopService, self) + if self.slave is not None: + d = self._soft_disconnect() + res = defer.DeferredList([res, d]) + return res + + def updateSlave(self): + """Called to add or remove builders after the slave has connected. + + Also called after botmaster's builders are initially set. + + @return: a Deferred that indicates when an attached slave has + accepted the new builders and/or released the old ones.""" + for b in self.botmaster.getBuildersForSlave(self.slavename): + if b.name not in self.slavebuilders: + b.addLatentSlave(self) + return AbstractBuildSlave.updateSlave(self) + + def sendBuilderList(self): + d = AbstractBuildSlave.sendBuilderList(self) + def _sent(slist): + if not slist: + return + dl = [] + for name, remote in slist.items(): + # use get() since we might have changed our mind since then. + # we're checking on the builder in addition to the + # slavebuilders out of a bit of paranoia. + b = self.botmaster.builders.get(name) + sb = self.slavebuilders.get(name) + if b and sb: + d1 = sb.attached(self, remote, self.slave_commands) + dl.append(d1) + return defer.DeferredList(dl) + def _set_failed(why): + log.msg("BuildSlave.sendBuilderList (%s) failed" % self) + log.err(why) + # TODO: hang up on them?, without setBuilderList we can't use + # them + if self.substantiation_deferred: + d = self.substantiation_deferred + self.substantiation_deferred = None + self.substantiation_build = None + d.errback(why) + if self.missing_timer: + self.missing_timer.cancel() + self.missing_timer = None + # TODO: maybe log? send an email? + return why + d.addCallbacks(_sent, _set_failed) + def _substantiated(res): + log.msg("Slave %s substantiated \o/" % self.slavename) + self.substantiated = True + if not self.substantiation_deferred: + log.msg("No substantiation deferred for %s" % self.slavename) + if self.substantiation_deferred: + log.msg("Firing %s substantiation deferred with success" % self.slavename) + d = self.substantiation_deferred + self.substantiation_deferred = None + self.substantiation_build = None + d.callback(True) + # note that the missing_timer is already handled within + # ``attached`` + if not self.building: + self._setBuildWaitTimer() + d.addCallback(_substantiated) + return d diff --git a/master/buildbot/buildslave/ec2.py b/master/buildbot/buildslave/ec2.py new file mode 100644 index 00000000000..f144321b190 --- /dev/null +++ b/master/buildbot/buildslave/ec2.py @@ -0,0 +1,319 @@ +# This file is part of Buildbot. Buildbot is free software: you can +# redistribute it and/or modify it under the terms of the GNU General Public +# License as published by the Free Software Foundation, version 2. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 51 +# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Portions Copyright Buildbot Team Members + +from __future__ import with_statement +# Portions Copyright Canonical Ltd. 2009 + +"""A LatentSlave that uses EC2 to instantiate the slaves on demand. + +Tested with Python boto 1.5c +""" + +import os +import re +import time + +import boto +import boto.ec2 +import boto.exception +from twisted.internet import defer, threads +from twisted.python import log + +from buildbot.buildslave.base import AbstractLatentBuildSlave +from buildbot import interfaces + +PENDING = 'pending' +RUNNING = 'running' +SHUTTINGDOWN = 'shutting-down' +TERMINATED = 'terminated' + +class EC2LatentBuildSlave(AbstractLatentBuildSlave): + + instance = image = None + _poll_resolution = 5 # hook point for tests + + def __init__(self, name, password, instance_type, ami=None, + valid_ami_owners=None, valid_ami_location_regex=None, + elastic_ip=None, identifier=None, secret_identifier=None, + aws_id_file_path=None, user_data=None, region=None, + keypair_name='latent_buildbot_slave', + security_name='latent_buildbot_slave', + max_builds=None, notify_on_missing=[], missing_timeout=60*20, + build_wait_timeout=60*10, properties={}, locks=None): + + AbstractLatentBuildSlave.__init__( + self, name, password, max_builds, notify_on_missing, + missing_timeout, build_wait_timeout, properties, locks) + if not ((ami is not None) ^ + (valid_ami_owners is not None or + valid_ami_location_regex is not None)): + raise ValueError( + 'You must provide either a specific ami, or one or both of ' + 'valid_ami_location_regex and valid_ami_owners') + self.ami = ami + if valid_ami_owners is not None: + if isinstance(valid_ami_owners, (int, long)): + valid_ami_owners = (valid_ami_owners,) + else: + for element in valid_ami_owners: + if not isinstance(element, (int, long)): + raise ValueError( + 'valid_ami_owners should be int or iterable ' + 'of ints', element) + if valid_ami_location_regex is not None: + if not isinstance(valid_ami_location_regex, basestring): + raise ValueError( + 'valid_ami_location_regex should be a string') + else: + # verify that regex will compile + re.compile(valid_ami_location_regex) + self.valid_ami_owners = valid_ami_owners + self.valid_ami_location_regex = valid_ami_location_regex + self.instance_type = instance_type + self.keypair_name = keypair_name + self.security_name = security_name + self.user_data = user_data + if identifier is None: + assert secret_identifier is None, ( + 'supply both or neither of identifier, secret_identifier') + if aws_id_file_path is None: + home = os.environ['HOME'] + aws_id_file_path = os.path.join(home, '.ec2', 'aws_id') + if not os.path.exists(aws_id_file_path): + raise ValueError( + "Please supply your AWS access key identifier and secret " + "access key identifier either when instantiating this %s " + "or in the %s file (on two lines).\n" % + (self.__class__.__name__, aws_id_file_path)) + with open(aws_id_file_path, 'r') as aws_file: + identifier = aws_file.readline().strip() + secret_identifier = aws_file.readline().strip() + else: + assert aws_id_file_path is None, \ + 'if you supply the identifier and secret_identifier, ' \ + 'do not specify the aws_id_file_path' + assert secret_identifier is not None, \ + 'supply both or neither of identifier, secret_identifier' + + region_found = None + + # Make the EC2 connection. + if region is not None: + for r in boto.ec2.regions(aws_access_key_id=identifier, + aws_secret_access_key=secret_identifier): + + if r.name == region: + region_found = r + + + if region_found is not None: + self.conn = boto.ec2.connect_to_region(region, + aws_access_key_id=identifier, + aws_secret_access_key=secret_identifier) + else: + raise ValueError('The specified region does not exist: {0}'.format(region)) + + else: + self.conn = boto.connect_ec2(identifier, secret_identifier) + + # Make a keypair + # + # We currently discard the keypair data because we don't need it. + # If we do need it in the future, we will always recreate the keypairs + # because there is no way to + # programmatically retrieve the private key component, unless we + # generate it and store it on the filesystem, which is an unnecessary + # usage requirement. + try: + key_pair = self.conn.get_all_key_pairs(keypair_name)[0] + assert key_pair + # key_pair.delete() # would be used to recreate + except boto.exception.EC2ResponseError, e: + if 'InvalidKeyPair.NotFound' not in e.body: + if 'AuthFailure' in e.body: + print ('POSSIBLE CAUSES OF ERROR:\n' + ' Did you sign up for EC2?\n' + ' Did you put a credit card number in your AWS ' + 'account?\n' + 'Please doublecheck before reporting a problem.\n') + raise + # make one; we would always do this, and stash the result, if we + # needed the key (for instance, to SSH to the box). We'd then + # use paramiko to use the key to connect. + self.conn.create_key_pair(keypair_name) + + # create security group + try: + group = self.conn.get_all_security_groups(security_name)[0] + assert group + except boto.exception.EC2ResponseError, e: + if 'InvalidGroup.NotFound' in e.body: + self.security_group = self.conn.create_security_group( + security_name, + 'Authorization to access the buildbot instance.') + # Authorize the master as necessary + # TODO this is where we'd open the hole to do the reverse pb + # connect to the buildbot + # ip = urllib.urlopen( + # 'http://checkip.amazonaws.com').read().strip() + # self.security_group.authorize('tcp', 22, 22, '%s/32' % ip) + # self.security_group.authorize('tcp', 80, 80, '%s/32' % ip) + else: + raise + + # get the image + if self.ami is not None: + self.image = self.conn.get_image(self.ami) + else: + # verify we have access to at least one acceptable image + discard = self.get_image() + assert discard + + # get the specified elastic IP, if any + if elastic_ip is not None: + elastic_ip = self.conn.get_all_addresses([elastic_ip])[0] + self.elastic_ip = elastic_ip + + def get_image(self): + if self.image is not None: + return self.image + if self.valid_ami_location_regex: + level = 0 + options = [] + get_match = re.compile(self.valid_ami_location_regex).match + for image in self.conn.get_all_images( + owners=self.valid_ami_owners): + # gather sorting data + match = get_match(image.location) + if match: + alpha_sort = int_sort = None + if level < 2: + try: + alpha_sort = match.group(1) + except IndexError: + level = 2 + else: + if level == 0: + try: + int_sort = int(alpha_sort) + except ValueError: + level = 1 + options.append([int_sort, alpha_sort, + image.location, image.id, image]) + if level: + log.msg('sorting images at level %d' % level) + options = [candidate[level:] for candidate in options] + else: + options = [(image.location, image.id, image) for image + in self.conn.get_all_images( + owners=self.valid_ami_owners)] + options.sort() + log.msg('sorted images (last is chosen): %s' % + (', '.join( + ['%s (%s)' % (candidate[-1].id, candidate[-1].location) + for candidate in options]))) + if not options: + raise ValueError('no available images match constraints') + return options[-1][-1] + + def dns(self): + if self.instance is None: + return None + return self.instance.public_dns_name + dns = property(dns) + + def start_instance(self, build): + if self.instance is not None: + raise ValueError('instance active') + return threads.deferToThread(self._start_instance) + + def _start_instance(self): + image = self.get_image() + reservation = image.run( + key_name=self.keypair_name, security_groups=[self.security_name], + instance_type=self.instance_type, user_data=self.user_data) + self.instance = reservation.instances[0] + log.msg('%s %s starting instance %s' % + (self.__class__.__name__, self.slavename, self.instance.id)) + duration = 0 + interval = self._poll_resolution + while self.instance.state == PENDING: + time.sleep(interval) + duration += interval + if duration % 60 == 0: + log.msg('%s %s has waited %d minutes for instance %s' % + (self.__class__.__name__, self.slavename, duration//60, + self.instance.id)) + self.instance.update() + if self.instance.state == RUNNING: + self.output = self.instance.get_console_output() + minutes = duration//60 + seconds = duration%60 + log.msg('%s %s instance %s started on %s ' + 'in about %d minutes %d seconds (%s)' % + (self.__class__.__name__, self.slavename, + self.instance.id, self.dns, minutes, seconds, + self.output.output)) + if self.elastic_ip is not None: + self.instance.use_ip(self.elastic_ip) + return [self.instance.id, + image.id, + '%02d:%02d:%02d' % (minutes//60, minutes%60, seconds)] + else: + log.msg('%s %s failed to start instance %s (%s)' % + (self.__class__.__name__, self.slavename, + self.instance.id, self.instance.state)) + raise interfaces.LatentBuildSlaveFailedToSubstantiate( + self.instance.id, self.instance.state) + + def stop_instance(self, fast=False): + if self.instance is None: + # be gentle. Something may just be trying to alert us that an + # instance never attached, and it's because, somehow, we never + # started. + return defer.succeed(None) + instance = self.instance + self.output = self.instance = None + return threads.deferToThread( + self._stop_instance, instance, fast) + + def _stop_instance(self, instance, fast): + if self.elastic_ip is not None: + self.conn.disassociate_address(self.elastic_ip.public_ip) + instance.update() + if instance.state not in (SHUTTINGDOWN, TERMINATED): + instance.terminate() + log.msg('%s %s terminating instance %s' % + (self.__class__.__name__, self.slavename, instance.id)) + duration = 0 + interval = self._poll_resolution + if fast: + goal = (SHUTTINGDOWN, TERMINATED) + instance.update() + else: + goal = (TERMINATED,) + while instance.state not in goal: + time.sleep(interval) + duration += interval + if duration % 60 == 0: + log.msg( + '%s %s has waited %d minutes for instance %s to end' % + (self.__class__.__name__, self.slavename, duration//60, + instance.id)) + instance.update() + log.msg('%s %s instance %s %s ' + 'after about %d minutes %d seconds' % + (self.__class__.__name__, self.slavename, + instance.id, goal, duration//60, duration%60)) diff --git a/master/buildbot/buildslave/libvirt.py b/master/buildbot/buildslave/libvirt.py new file mode 100644 index 00000000000..e4133147575 --- /dev/null +++ b/master/buildbot/buildslave/libvirt.py @@ -0,0 +1,301 @@ +# This file is part of Buildbot. Buildbot is free software: you can +# redistribute it and/or modify it under the terms of the GNU General Public +# License as published by the Free Software Foundation, version 2. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 51 +# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Portions Copyright Buildbot Team Members +# Portions Copyright 2010 Isotoma Limited + +import os + +from twisted.internet import defer, utils, threads +from twisted.python import log, failure +from buildbot.buildslave.base import AbstractBuildSlave, AbstractLatentBuildSlave +from buildbot.util.eventual import eventually +from buildbot import config + +try: + import libvirt + libvirt = libvirt +except ImportError: + libvirt = None + + +class WorkQueue(object): + """ + I am a class that turns parallel access into serial access. + + I exist because we want to run libvirt access in threads as we don't + trust calls not to block, but under load libvirt doesn't seem to like + this kind of threaded use. + """ + + def __init__(self): + self.queue = [] + + def _process(self): + log.msg("Looking to start a piece of work now...") + + # Is there anything to do? + if not self.queue: + log.msg("_process called when there is no work") + return + + # Peek at the top of the stack - get a function to call and + # a deferred to fire when its all over + d, next_operation, args, kwargs = self.queue[0] + + # Start doing some work - expects a deferred + try: + d2 = next_operation(*args, **kwargs) + except: + d2 = defer.fail() + + # Whenever a piece of work is done, whether it worked or not + # call this to schedule the next piece of work + def _work_done(res): + log.msg("Completed a piece of work") + self.queue.pop(0) + if self.queue: + log.msg("Preparing next piece of work") + eventually(self._process) + return res + d2.addBoth(_work_done) + + # When the work is done, trigger d + d2.chainDeferred(d) + + def execute(self, cb, *args, **kwargs): + kickstart_processing = not self.queue + d = defer.Deferred() + self.queue.append((d, cb, args, kwargs)) + if kickstart_processing: + self._process() + return d + + def executeInThread(self, cb, *args, **kwargs): + return self.execute(threads.deferToThread, cb, *args, **kwargs) + + +# A module is effectively a singleton class, so this is OK +queue = WorkQueue() + + +class Domain(object): + + """ + I am a wrapper around a libvirt Domain object + """ + + def __init__(self, connection, domain): + self.connection = connection + self.domain = domain + + def name(self): + return queue.executeInThread(self.domain.name) + + def create(self): + return queue.executeInThread(self.domain.create) + + def shutdown(self): + return queue.executeInThread(self.domain.shutdown) + + def destroy(self): + return queue.executeInThread(self.domain.destroy) + + +class Connection(object): + + """ + I am a wrapper around a libvirt Connection object. + """ + + DomainClass = Domain + + def __init__(self, uri): + self.uri = uri + self.connection = libvirt.open(uri) + + @defer.inlineCallbacks + def lookupByName(self, name): + """ I lookup an existing predefined domain """ + res = yield queue.executeInThread(self.connection.lookupByName, name) + defer.returnValue(self.DomainClass(self, res)) + + @defer.inlineCallbacks + def create(self, xml): + """ I take libvirt XML and start a new VM """ + res = yield queue.executeInThread(self.connection.createXML, xml, 0) + defer.returnValue(self.DomainClass(self, res)) + + @defer.inlineCallbacks + def all(self): + domains = [] + domain_ids = yield queue.executeInThread(self.connection.listDomainsID) + + for did in domain_ids: + domain = yield queue.executeInThread(self.connection.lookupByID, did) + domains.append(self.DomainClass(self, domain)) + + defer.returnValue(domains) + + +class LibVirtSlave(AbstractLatentBuildSlave): + + def __init__(self, name, password, connection, hd_image, base_image = None, xml=None, max_builds=None, notify_on_missing=[], + missing_timeout=60*20, build_wait_timeout=60*10, properties={}, locks=None): + AbstractLatentBuildSlave.__init__(self, name, password, max_builds, notify_on_missing, + missing_timeout, build_wait_timeout, properties, locks) + + if not libvirt: + config.error("The python module 'libvirt' is needed to use a LibVirtSlave") + + self.name = name + self.connection = connection + self.image = hd_image + self.base_image = base_image + self.xml = xml + + self.cheap_copy = True + self.graceful_shutdown = False + + self.domain = None + + self.ready = False + self._find_existing_deferred = self._find_existing_instance() + + @defer.inlineCallbacks + def _find_existing_instance(self): + """ + I find existing VMs that are already running that might be orphaned instances of this slave. + """ + if not self.connection: + defer.returnValue(None) + + domains = yield self.connection.all() + for d in domains: + name = yield d.name() + if name.startswith(self.name): + self.domain = d + self.substantiated = True + break + + self.ready = True + + def canStartBuild(self): + if not self.ready: + log.msg("Not accepting builds as existing domains not iterated") + return False + + if self.domain and not self.isConnected(): + log.msg("Not accepting builds as existing domain but slave not connected") + return False + + return AbstractLatentBuildSlave.canStartBuild(self) + + def _prepare_base_image(self): + """ + I am a private method for creating (possibly cheap) copies of a + base_image for start_instance to boot. + """ + if not self.base_image: + return defer.succeed(True) + + if self.cheap_copy: + clone_cmd = "qemu-img" + clone_args = "create -b %(base)s -f qcow2 %(image)s" + else: + clone_cmd = "cp" + clone_args = "%(base)s %(image)s" + + clone_args = clone_args % { + "base": self.base_image, + "image": self.image, + } + + log.msg("Cloning base image: %s %s'" % (clone_cmd, clone_args)) + + def _log_result(res): + log.msg("Cloning exit code was: %d" % res) + return res + + d = utils.getProcessValue(clone_cmd, clone_args.split()) + d.addBoth(_log_result) + return d + + @defer.inlineCallbacks + def start_instance(self, build): + """ + I start a new instance of a VM. + + If a base_image is specified, I will make a clone of that otherwise i will + use image directly. + + If i'm not given libvirt domain definition XML, I will look for my name + in the list of defined virtual machines and start that. + """ + if self.domain is not None: + log.msg("Cannot start_instance '%s' as already active" % self.name) + defer.returnValue(False) + + yield self._prepare_base_image() + + try: + if self.xml: + self.domain = yield self.connection.create(self.xml) + else: + self.domain = yield self.connection.lookupByName(self.name) + yield self.domain.create() + except: + log.err(failure.Failure(), + "Cannot start a VM (%s), failing gracefully and triggering" + "a new build check" % self.name) + self.domain = None + defer.returnValue(False) + + defer.returnValue(True) + + def stop_instance(self, fast=False): + """ + I attempt to stop a running VM. + I make sure any connection to the slave is removed. + If the VM was using a cloned image, I remove the clone + When everything is tidied up, I ask that bbot looks for work to do + """ + log.msg("Attempting to stop '%s'" % self.name) + if self.domain is None: + log.msg("I don't think that domain is even running, aborting") + return defer.succeed(None) + + domain = self.domain + self.domain = None + + if self.graceful_shutdown and not fast: + log.msg("Graceful shutdown chosen for %s" % self.name) + d = domain.shutdown() + else: + d = domain.destroy() + + def _disconnect(res): + log.msg("VM destroyed (%s): Forcing its connection closed." % self.name) + return AbstractBuildSlave.disconnect(self) + d.addCallback(_disconnect) + + def _disconnected(res): + log.msg("We forced disconnection (%s), cleaning up and triggering new build" % self.name) + if self.base_image: + os.remove(self.image) + self.botmaster.maybeStartBuildsForSlave(self.name) + return res + d.addBoth(_disconnected) + + return d + diff --git a/master/buildbot/buildslave/openstack.py b/master/buildbot/buildslave/openstack.py index 39beed2a29a..ec5c4861654 100644 --- a/master/buildbot/buildslave/openstack.py +++ b/master/buildbot/buildslave/openstack.py @@ -19,7 +19,7 @@ from twisted.internet import defer, threads from twisted.python import log -from buildbot.buildslave import AbstractLatentBuildSlave +from buildbot.buildslave.base import AbstractLatentBuildSlave from buildbot import config, interfaces try: diff --git a/master/buildbot/ec2buildslave.py b/master/buildbot/ec2buildslave.py index 5d520f02397..085cb3dd3da 100644 --- a/master/buildbot/ec2buildslave.py +++ b/master/buildbot/ec2buildslave.py @@ -11,309 +11,17 @@ # this program; if not, write to the Free Software Foundation, Inc., 51 # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# Portions Copyright Buildbot Team Members +# Copyright Buildbot Team Members -from __future__ import with_statement -# Portions Copyright Canonical Ltd. 2009 +from twisted.python.deprecate import deprecatedModuleAttribute +from twisted.python.versions import Version -"""A LatentSlave that uses EC2 to instantiate the slaves on demand. +from buildbot.buildslave.libvirt import ( + EC2LatentBuildSlave) -Tested with Python boto 1.5c -""" +deprecatedModuleAttribute(Version("Buildbot", 0, 8, 8), + "It has been moved to buildbot.buildslave.ec2", + "buildbot.libvirtbuildslave", "EC2LatentBuildSlave") -import os -import re -import time - -import boto -import boto.ec2 -import boto.exception -from twisted.internet import defer, threads -from twisted.python import log - -from buildbot.buildslave import AbstractLatentBuildSlave -from buildbot import interfaces - -PENDING = 'pending' -RUNNING = 'running' -SHUTTINGDOWN = 'shutting-down' -TERMINATED = 'terminated' - -class EC2LatentBuildSlave(AbstractLatentBuildSlave): - - instance = image = None - _poll_resolution = 5 # hook point for tests - - def __init__(self, name, password, instance_type, ami=None, - valid_ami_owners=None, valid_ami_location_regex=None, - elastic_ip=None, identifier=None, secret_identifier=None, - aws_id_file_path=None, user_data=None, region=None, - keypair_name='latent_buildbot_slave', - security_name='latent_buildbot_slave', - max_builds=None, notify_on_missing=[], missing_timeout=60*20, - build_wait_timeout=60*10, properties={}, locks=None): - - AbstractLatentBuildSlave.__init__( - self, name, password, max_builds, notify_on_missing, - missing_timeout, build_wait_timeout, properties, locks) - if not ((ami is not None) ^ - (valid_ami_owners is not None or - valid_ami_location_regex is not None)): - raise ValueError( - 'You must provide either a specific ami, or one or both of ' - 'valid_ami_location_regex and valid_ami_owners') - self.ami = ami - if valid_ami_owners is not None: - if isinstance(valid_ami_owners, (int, long)): - valid_ami_owners = (valid_ami_owners,) - else: - for element in valid_ami_owners: - if not isinstance(element, (int, long)): - raise ValueError( - 'valid_ami_owners should be int or iterable ' - 'of ints', element) - if valid_ami_location_regex is not None: - if not isinstance(valid_ami_location_regex, basestring): - raise ValueError( - 'valid_ami_location_regex should be a string') - else: - # verify that regex will compile - re.compile(valid_ami_location_regex) - self.valid_ami_owners = valid_ami_owners - self.valid_ami_location_regex = valid_ami_location_regex - self.instance_type = instance_type - self.keypair_name = keypair_name - self.security_name = security_name - self.user_data = user_data - if identifier is None: - assert secret_identifier is None, ( - 'supply both or neither of identifier, secret_identifier') - if aws_id_file_path is None: - home = os.environ['HOME'] - aws_id_file_path = os.path.join(home, '.ec2', 'aws_id') - if not os.path.exists(aws_id_file_path): - raise ValueError( - "Please supply your AWS access key identifier and secret " - "access key identifier either when instantiating this %s " - "or in the %s file (on two lines).\n" % - (self.__class__.__name__, aws_id_file_path)) - with open(aws_id_file_path, 'r') as aws_file: - identifier = aws_file.readline().strip() - secret_identifier = aws_file.readline().strip() - else: - assert aws_id_file_path is None, \ - 'if you supply the identifier and secret_identifier, ' \ - 'do not specify the aws_id_file_path' - assert secret_identifier is not None, \ - 'supply both or neither of identifier, secret_identifier' - - region_found = None - - # Make the EC2 connection. - if region is not None: - for r in boto.ec2.regions(aws_access_key_id=identifier, - aws_secret_access_key=secret_identifier): - - if r.name == region: - region_found = r - - - if region_found is not None: - self.conn = boto.ec2.connect_to_region(region, - aws_access_key_id=identifier, - aws_secret_access_key=secret_identifier) - else: - raise ValueError('The specified region does not exist: {0}'.format(region)) - - else: - self.conn = boto.connect_ec2(identifier, secret_identifier) - - # Make a keypair - # - # We currently discard the keypair data because we don't need it. - # If we do need it in the future, we will always recreate the keypairs - # because there is no way to - # programmatically retrieve the private key component, unless we - # generate it and store it on the filesystem, which is an unnecessary - # usage requirement. - try: - key_pair = self.conn.get_all_key_pairs(keypair_name)[0] - assert key_pair - # key_pair.delete() # would be used to recreate - except boto.exception.EC2ResponseError, e: - if 'InvalidKeyPair.NotFound' not in e.body: - if 'AuthFailure' in e.body: - print ('POSSIBLE CAUSES OF ERROR:\n' - ' Did you sign up for EC2?\n' - ' Did you put a credit card number in your AWS ' - 'account?\n' - 'Please doublecheck before reporting a problem.\n') - raise - # make one; we would always do this, and stash the result, if we - # needed the key (for instance, to SSH to the box). We'd then - # use paramiko to use the key to connect. - self.conn.create_key_pair(keypair_name) - - # create security group - try: - group = self.conn.get_all_security_groups(security_name)[0] - assert group - except boto.exception.EC2ResponseError, e: - if 'InvalidGroup.NotFound' in e.body: - self.security_group = self.conn.create_security_group( - security_name, - 'Authorization to access the buildbot instance.') - # Authorize the master as necessary - # TODO this is where we'd open the hole to do the reverse pb - # connect to the buildbot - # ip = urllib.urlopen( - # 'http://checkip.amazonaws.com').read().strip() - # self.security_group.authorize('tcp', 22, 22, '%s/32' % ip) - # self.security_group.authorize('tcp', 80, 80, '%s/32' % ip) - else: - raise - - # get the image - if self.ami is not None: - self.image = self.conn.get_image(self.ami) - else: - # verify we have access to at least one acceptable image - discard = self.get_image() - assert discard - - # get the specified elastic IP, if any - if elastic_ip is not None: - elastic_ip = self.conn.get_all_addresses([elastic_ip])[0] - self.elastic_ip = elastic_ip - - def get_image(self): - if self.image is not None: - return self.image - if self.valid_ami_location_regex: - level = 0 - options = [] - get_match = re.compile(self.valid_ami_location_regex).match - for image in self.conn.get_all_images( - owners=self.valid_ami_owners): - # gather sorting data - match = get_match(image.location) - if match: - alpha_sort = int_sort = None - if level < 2: - try: - alpha_sort = match.group(1) - except IndexError: - level = 2 - else: - if level == 0: - try: - int_sort = int(alpha_sort) - except ValueError: - level = 1 - options.append([int_sort, alpha_sort, - image.location, image.id, image]) - if level: - log.msg('sorting images at level %d' % level) - options = [candidate[level:] for candidate in options] - else: - options = [(image.location, image.id, image) for image - in self.conn.get_all_images( - owners=self.valid_ami_owners)] - options.sort() - log.msg('sorted images (last is chosen): %s' % - (', '.join( - ['%s (%s)' % (candidate[-1].id, candidate[-1].location) - for candidate in options]))) - if not options: - raise ValueError('no available images match constraints') - return options[-1][-1] - - def dns(self): - if self.instance is None: - return None - return self.instance.public_dns_name - dns = property(dns) - - def start_instance(self, build): - if self.instance is not None: - raise ValueError('instance active') - return threads.deferToThread(self._start_instance) - - def _start_instance(self): - image = self.get_image() - reservation = image.run( - key_name=self.keypair_name, security_groups=[self.security_name], - instance_type=self.instance_type, user_data=self.user_data) - self.instance = reservation.instances[0] - log.msg('%s %s starting instance %s' % - (self.__class__.__name__, self.slavename, self.instance.id)) - duration = 0 - interval = self._poll_resolution - while self.instance.state == PENDING: - time.sleep(interval) - duration += interval - if duration % 60 == 0: - log.msg('%s %s has waited %d minutes for instance %s' % - (self.__class__.__name__, self.slavename, duration//60, - self.instance.id)) - self.instance.update() - if self.instance.state == RUNNING: - self.output = self.instance.get_console_output() - minutes = duration//60 - seconds = duration%60 - log.msg('%s %s instance %s started on %s ' - 'in about %d minutes %d seconds (%s)' % - (self.__class__.__name__, self.slavename, - self.instance.id, self.dns, minutes, seconds, - self.output.output)) - if self.elastic_ip is not None: - self.instance.use_ip(self.elastic_ip) - return [self.instance.id, - image.id, - '%02d:%02d:%02d' % (minutes//60, minutes%60, seconds)] - else: - log.msg('%s %s failed to start instance %s (%s)' % - (self.__class__.__name__, self.slavename, - self.instance.id, self.instance.state)) - raise interfaces.LatentBuildSlaveFailedToSubstantiate( - self.instance.id, self.instance.state) - - def stop_instance(self, fast=False): - if self.instance is None: - # be gentle. Something may just be trying to alert us that an - # instance never attached, and it's because, somehow, we never - # started. - return defer.succeed(None) - instance = self.instance - self.output = self.instance = None - return threads.deferToThread( - self._stop_instance, instance, fast) - - def _stop_instance(self, instance, fast): - if self.elastic_ip is not None: - self.conn.disassociate_address(self.elastic_ip.public_ip) - instance.update() - if instance.state not in (SHUTTINGDOWN, TERMINATED): - instance.terminate() - log.msg('%s %s terminating instance %s' % - (self.__class__.__name__, self.slavename, instance.id)) - duration = 0 - interval = self._poll_resolution - if fast: - goal = (SHUTTINGDOWN, TERMINATED) - instance.update() - else: - goal = (TERMINATED,) - while instance.state not in goal: - time.sleep(interval) - duration += interval - if duration % 60 == 0: - log.msg( - '%s %s has waited %d minutes for instance %s to end' % - (self.__class__.__name__, self.slavename, duration//60, - instance.id)) - instance.update() - log.msg('%s %s instance %s %s ' - 'after about %d minutes %d seconds' % - (self.__class__.__name__, self.slavename, - instance.id, goal, duration//60, duration%60)) +_hush_pyflakes = [ + EC2LatentBuildSlave] diff --git a/master/buildbot/libvirtbuildslave.py b/master/buildbot/libvirtbuildslave.py index 7345901cbb9..ab9b2730f43 100644 --- a/master/buildbot/libvirtbuildslave.py +++ b/master/buildbot/libvirtbuildslave.py @@ -1,3 +1,4 @@ + # This file is part of Buildbot. Buildbot is free software: you can # redistribute it and/or modify it under the terms of the GNU General Public # License as published by the Free Software Foundation, version 2. @@ -11,291 +12,18 @@ # this program; if not, write to the Free Software Foundation, Inc., 51 # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# Portions Copyright Buildbot Team Members -# Portions Copyright 2010 Isotoma Limited - -import os - -from twisted.internet import defer, utils, threads -from twisted.python import log, failure -from buildbot.buildslave import AbstractBuildSlave, AbstractLatentBuildSlave -from buildbot.util.eventual import eventually -from buildbot import config - -try: - import libvirt - libvirt = libvirt -except ImportError: - libvirt = None - - -class WorkQueue(object): - """ - I am a class that turns parallel access into serial access. - - I exist because we want to run libvirt access in threads as we don't - trust calls not to block, but under load libvirt doesn't seem to like - this kind of threaded use. - """ - - def __init__(self): - self.queue = [] - - def _process(self): - log.msg("Looking to start a piece of work now...") - - # Is there anything to do? - if not self.queue: - log.msg("_process called when there is no work") - return - - # Peek at the top of the stack - get a function to call and - # a deferred to fire when its all over - d, next_operation, args, kwargs = self.queue[0] - - # Start doing some work - expects a deferred - try: - d2 = next_operation(*args, **kwargs) - except: - d2 = defer.fail() - - # Whenever a piece of work is done, whether it worked or not - # call this to schedule the next piece of work - def _work_done(res): - log.msg("Completed a piece of work") - self.queue.pop(0) - if self.queue: - log.msg("Preparing next piece of work") - eventually(self._process) - return res - d2.addBoth(_work_done) - - # When the work is done, trigger d - d2.chainDeferred(d) - - def execute(self, cb, *args, **kwargs): - kickstart_processing = not self.queue - d = defer.Deferred() - self.queue.append((d, cb, args, kwargs)) - if kickstart_processing: - self._process() - return d - - def executeInThread(self, cb, *args, **kwargs): - return self.execute(threads.deferToThread, cb, *args, **kwargs) - - -# A module is effectively a singleton class, so this is OK -queue = WorkQueue() - - -class Domain(object): - - """ - I am a wrapper around a libvirt Domain object - """ - - def __init__(self, connection, domain): - self.connection = connection - self.domain = domain - - def name(self): - return queue.executeInThread(self.domain.name) - - def create(self): - return queue.executeInThread(self.domain.create) - - def shutdown(self): - return queue.executeInThread(self.domain.shutdown) - - def destroy(self): - return queue.executeInThread(self.domain.destroy) - - -class Connection(object): - - """ - I am a wrapper around a libvirt Connection object. - """ - - DomainClass = Domain - - def __init__(self, uri): - self.uri = uri - self.connection = libvirt.open(uri) - - @defer.inlineCallbacks - def lookupByName(self, name): - """ I lookup an existing predefined domain """ - res = yield queue.executeInThread(self.connection.lookupByName, name) - defer.returnValue(self.DomainClass(self, res)) - - @defer.inlineCallbacks - def create(self, xml): - """ I take libvirt XML and start a new VM """ - res = yield queue.executeInThread(self.connection.createXML, xml, 0) - defer.returnValue(self.DomainClass(self, res)) - - @defer.inlineCallbacks - def all(self): - domains = [] - domain_ids = yield queue.executeInThread(self.connection.listDomainsID) - - for did in domain_ids: - domain = yield queue.executeInThread(self.connection.lookupByID, did) - domains.append(self.DomainClass(self, domain)) - - defer.returnValue(domains) - - -class LibVirtSlave(AbstractLatentBuildSlave): - - def __init__(self, name, password, connection, hd_image, base_image = None, xml=None, max_builds=None, notify_on_missing=[], - missing_timeout=60*20, build_wait_timeout=60*10, properties={}, locks=None): - AbstractLatentBuildSlave.__init__(self, name, password, max_builds, notify_on_missing, - missing_timeout, build_wait_timeout, properties, locks) - - if not libvirt: - config.error("The python module 'libvirt' is needed to use a LibVirtSlave") - - self.name = name - self.connection = connection - self.image = hd_image - self.base_image = base_image - self.xml = xml - - self.cheap_copy = True - self.graceful_shutdown = False - - self.domain = None - - self.ready = False - self._find_existing_deferred = self._find_existing_instance() - - @defer.inlineCallbacks - def _find_existing_instance(self): - """ - I find existing VMs that are already running that might be orphaned instances of this slave. - """ - if not self.connection: - defer.returnValue(None) - - domains = yield self.connection.all() - for d in domains: - name = yield d.name() - if name.startswith(self.name): - self.domain = d - self.substantiated = True - break - - self.ready = True - - def canStartBuild(self): - if not self.ready: - log.msg("Not accepting builds as existing domains not iterated") - return False - - if self.domain and not self.isConnected(): - log.msg("Not accepting builds as existing domain but slave not connected") - return False - - return AbstractLatentBuildSlave.canStartBuild(self) - - def _prepare_base_image(self): - """ - I am a private method for creating (possibly cheap) copies of a - base_image for start_instance to boot. - """ - if not self.base_image: - return defer.succeed(True) - - if self.cheap_copy: - clone_cmd = "qemu-img" - clone_args = "create -b %(base)s -f qcow2 %(image)s" - else: - clone_cmd = "cp" - clone_args = "%(base)s %(image)s" - - clone_args = clone_args % { - "base": self.base_image, - "image": self.image, - } - - log.msg("Cloning base image: %s %s'" % (clone_cmd, clone_args)) - - def _log_result(res): - log.msg("Cloning exit code was: %d" % res) - return res - - d = utils.getProcessValue(clone_cmd, clone_args.split()) - d.addBoth(_log_result) - return d - - @defer.inlineCallbacks - def start_instance(self, build): - """ - I start a new instance of a VM. - - If a base_image is specified, I will make a clone of that otherwise i will - use image directly. - - If i'm not given libvirt domain definition XML, I will look for my name - in the list of defined virtual machines and start that. - """ - if self.domain is not None: - log.msg("Cannot start_instance '%s' as already active" % self.name) - defer.returnValue(False) - - yield self._prepare_base_image() - - try: - if self.xml: - self.domain = yield self.connection.create(self.xml) - else: - self.domain = yield self.connection.lookupByName(self.name) - yield self.domain.create() - except: - log.err(failure.Failure(), - "Cannot start a VM (%s), failing gracefully and triggering" - "a new build check" % self.name) - self.domain = None - defer.returnValue(False) - - defer.returnValue(True) - - def stop_instance(self, fast=False): - """ - I attempt to stop a running VM. - I make sure any connection to the slave is removed. - If the VM was using a cloned image, I remove the clone - When everything is tidied up, I ask that bbot looks for work to do - """ - log.msg("Attempting to stop '%s'" % self.name) - if self.domain is None: - log.msg("I don't think that domain is even running, aborting") - return defer.succeed(None) - - domain = self.domain - self.domain = None - - if self.graceful_shutdown and not fast: - log.msg("Graceful shutdown chosen for %s" % self.name) - d = domain.shutdown() - else: - d = domain.destroy() +# Copyright Buildbot Team Members - def _disconnect(res): - log.msg("VM destroyed (%s): Forcing its connection closed." % self.name) - return AbstractBuildSlave.disconnect(self) - d.addCallback(_disconnect) +from twisted.python.deprecate import deprecatedModuleAttribute +from twisted.python.versions import Version - def _disconnected(res): - log.msg("We forced disconnection (%s), cleaning up and triggering new build" % self.name) - if self.base_image: - os.remove(self.image) - self.botmaster.maybeStartBuildsForSlave(self.name) - return res - d.addBoth(_disconnected) +from buildbot.buildslave.libvirt import ( + LibVirtSlave, Domain, Connection) - return d +for _attr in ["LibVirtSlave", "Connection", "Domain"]: + deprecatedModuleAttribute(Version("Buildbot", 0, 8, 8), + "It has been moved to buildbot.buildslave.libvirt", + "buildbot.libvirtbuildslave", _attr) +_hush_pyflakes = [ + LibVirtSlave, Domain, Connection] diff --git a/master/buildbot/test/unit/test_buildslave.py b/master/buildbot/test/unit/test_buildslave_base.py similarity index 98% rename from master/buildbot/test/unit/test_buildslave.py rename to master/buildbot/test/unit/test_buildslave_base.py index 92bc1e6c90f..250115aa0a2 100644 --- a/master/buildbot/test/unit/test_buildslave.py +++ b/master/buildbot/test/unit/test_buildslave_base.py @@ -16,13 +16,14 @@ import mock from twisted.trial import unittest from twisted.internet import defer -from buildbot import buildslave, config, locks +from buildbot import config, locks +from buildbot.buildslave import base from buildbot.test.fake import fakemaster, pbmanager from buildbot.test.fake.botmaster import FakeBotMaster class TestAbstractBuildSlave(unittest.TestCase): - class ConcreteBuildSlave(buildslave.AbstractBuildSlave): + class ConcreteBuildSlave(base.AbstractBuildSlave): pass def test_constructor_minimal(self): diff --git a/master/buildbot/test/unit/test_libvirtbuildslave.py b/master/buildbot/test/unit/test_buildslave_libvirt.py similarity index 99% rename from master/buildbot/test/unit/test_libvirtbuildslave.py rename to master/buildbot/test/unit/test_buildslave_libvirt.py index fa04413e32a..2478e46c3dc 100644 --- a/master/buildbot/test/unit/test_libvirtbuildslave.py +++ b/master/buildbot/test/unit/test_buildslave_libvirt.py @@ -17,9 +17,10 @@ from twisted.trial import unittest from twisted.internet import defer, reactor, utils from twisted.python import failure -from buildbot import libvirtbuildslave, config +from buildbot import config from buildbot.test.fake import libvirt from buildbot.test.util import compat +from buildbot.buildslave import libvirt as libvirtbuildslave class TestLibVirtSlave(unittest.TestCase): diff --git a/master/docs/manual/cfg-buildslaves.rst b/master/docs/manual/cfg-buildslaves.rst index f14ee06cb5c..6ee7d35628b 100644 --- a/master/docs/manual/cfg-buildslaves.rst +++ b/master/docs/manual/cfg-buildslaves.rst @@ -257,7 +257,7 @@ machines). Here is the simplest example of configuring an EC2 latent buildslave. It specifies all necessary remaining values explicitly in the instantiation. :: - from buildbot.ec2buildslave import EC2LatentBuildSlave + from buildbot.buildslave.ec2 import EC2LatentBuildSlave c['slaves'] = [EC2LatentBuildSlave('bot1', 'sekrit', 'm1.large', ami='ami-12345', identifier='publickey', @@ -282,7 +282,7 @@ The first line of that file should be your access key id; the second line should be your secret access key id. Then you can instantiate the build slave as follows. :: - from buildbot.ec2buildslave import EC2LatentBuildSlave + from buildbot.buildslave.ec2 import EC2LatentBuildSlave c['slaves'] = [EC2LatentBuildSlave('bot1', 'sekrit', 'm1.large', ami='ami-12345')] @@ -301,7 +301,7 @@ One available filter is to specify the acceptable AMI owners, by AWS account number (the 12 digit number, usually rendered in AWS with hyphens like "1234-5678-9012", should be entered as in integer). :: - from buildbot.ec2buildslave import EC2LatentBuildSlave + from buildbot.buildslave.ec2 import EC2LatentBuildSlave bot1 = EC2LatentBuildSlave('bot1', 'sekrit', 'm1.large', valid_ami_owners=[11111111111, 22222222222], @@ -312,7 +312,7 @@ number (the 12 digit number, usually rendered in AWS with hyphens like The other available filter is to provide a regular expression string that will be matched against each AMI's location (the S3 bucket and manifest name). :: - from buildbot.ec2buildslave import EC2LatentBuildSlave + from buildbot.buildslave.ec2 import EC2LatentBuildSlave bot1 = EC2LatentBuildSlave( 'bot1', 'sekrit', 'm1.large', valid_ami_location_regex=r'buildbot\-.*/image.manifest.xml', @@ -321,7 +321,7 @@ will be matched against each AMI's location (the S3 bucket and manifest name). : The regular expression can specify a group, which will be preferred for the sorting. Only the first group is used; subsequent groups are ignored. :: - from buildbot.ec2buildslave import EC2LatentBuildSlave + from buildbot.buildslave.ec2 import EC2LatentBuildSlave bot1 = EC2LatentBuildSlave( 'bot1', 'sekrit', 'm1.large', valid_ami_location_regex=r'buildbot\-.*\-(.*)/image.manifest.xml', @@ -330,7 +330,7 @@ sorting. Only the first group is used; subsequent groups are ignored. :: If the group can be cast to an integer, it will be. This allows 10 to sort after 1, for instance. :: - from buildbot.ec2buildslave import EC2LatentBuildSlave + from buildbot.buildslave.ec2 import EC2LatentBuildSlave bot1 = EC2LatentBuildSlave( 'bot1', 'sekrit', 'm1.large', valid_ami_location_regex=r'buildbot\-.*\-(\d+)/image.manifest.xml', @@ -342,7 +342,7 @@ specific IP can connect as slaves. This is possible with AWS EC2 by using the Elastic IP feature. To configure, generate a Elastic IP in AWS, and then specify it in your configuration using the ``elastic_ip`` argument. :: - from buildbot.ec2buildslave import EC2LatentBuildSlave + from buildbot.buildslave.ec2 import EC2LatentBuildSlave c['slaves'] = [EC2LatentBuildSlave('bot1', 'sekrit', 'm1.large', 'ami-12345', identifier='publickey', @@ -435,7 +435,7 @@ set the username to ``minion1``, the password to ``sekrit``. The base image is c and a copy of it will be made for the duration of the VM's life. That copy will be thrown away every time a build is complete. :: - from buildbot.libvirtbuildslave import LibVirtSlave, Connection + from buildbot.buildslave.libvirt import LibVirtSlave, Connection c['slaves'] = [LibVirtSlave('minion1', 'sekrit', Connection("qemu:///session"), '/home/buildbot/images/minion1', '/home/buildbot/images/base_image')] diff --git a/master/docs/relnotes/index.rst b/master/docs/relnotes/index.rst index c25ff175a68..4ffdd1eb923 100644 --- a/master/docs/relnotes/index.rst +++ b/master/docs/relnotes/index.rst @@ -103,6 +103,8 @@ Deprecations, Removals, and Non-Compatible Changes * The ``SetProperty`` step in ``buildbot.steps.shell`` has been renamed to :bb:step:`SetPropertyFromCommand`. +* The EC2 and libvirt latent slaves have been moved to ``buildbot.buildslave.ec2`` and ``buildbot.buildslave.libirt`` respectively. + Changes for Developers ~~~~~~~~~~~~~~~~~~~~~~ From 18533b22faba8e69246e65e54e7cee0e0466621d Mon Sep 17 00:00:00 2001 From: Tom Prince Date: Wed, 19 Jun 2013 13:20:42 -0600 Subject: [PATCH 5/7] Report `SyntaxError` or `ImportError` from `checkconfig` and `upgrade-master`. --- master/buildbot/scripts/checkconfig.py | 8 +++++++- master/buildbot/scripts/upgrade_master.py | 8 +++++++- .../buildbot/test/unit/test_scripts_checkconfig.py | 12 +++++++++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/master/buildbot/scripts/checkconfig.py b/master/buildbot/scripts/checkconfig.py index 2cd476e6755..92a26a0a525 100644 --- a/master/buildbot/scripts/checkconfig.py +++ b/master/buildbot/scripts/checkconfig.py @@ -40,7 +40,13 @@ def checkconfig(config): if os.path.isdir(configFile): basedir = configFile - configFile = getConfigFileFromTac(basedir) + try: + configFile = getConfigFileFromTac(basedir) + except (SyntaxError, ImportError) as e: + if not quiet: + print "Unable to load 'buildbot.tac' from '%s':" % basedir + print e + return 1 else: basedir = os.getcwd() diff --git a/master/buildbot/scripts/upgrade_master.py b/master/buildbot/scripts/upgrade_master.py index cc783c72936..29813466683 100644 --- a/master/buildbot/scripts/upgrade_master.py +++ b/master/buildbot/scripts/upgrade_master.py @@ -159,7 +159,13 @@ def upgradeMaster(config, _noMonkey=False): os.chdir(config['basedir']) - configFile = base.getConfigFileFromTac(config['basedir']) + try: + configFile = base.getConfigFileFromTac(config['basedir']) + except (SyntaxError, ImportError) as e: + print "Unable to load 'buildbot.tac' from '%s':" % config['basedir'] + print e + defer.returnValue(1) + return master_cfg = loadConfig(config, configFile) if not master_cfg: defer.returnValue(1) diff --git a/master/buildbot/test/unit/test_scripts_checkconfig.py b/master/buildbot/test/unit/test_scripts_checkconfig.py index 0466844edf1..c6bf3bde113 100644 --- a/master/buildbot/test/unit/test_scripts_checkconfig.py +++ b/master/buildbot/test/unit/test_scripts_checkconfig.py @@ -23,7 +23,7 @@ import cStringIO from twisted.trial import unittest from buildbot.test.util import dirs, compat -from buildbot.scripts import checkconfig +from buildbot.scripts import base, checkconfig class TestConfigLoader(dirs.DirsMixin, unittest.TestCase): @@ -157,3 +157,13 @@ def test_checkconfig_quiet(self): self.assertEqual(checkconfig.checkconfig(config), 3) self.loadConfig.assert_called_with(basedir=os.getcwd(), configFile='master.cfg', quiet=True) + def test_checkconfig_syntaxError_quiet(self): + """ + When C{base.getConfigFileFromTac} raises L{SyntaxError}, + C{checkconfig.checkconfig} return an error. + """ + mockGetConfig = mock.Mock(spec=base.getConfigFileFromTac, + side_effect=SyntaxError) + self.patch(checkconfig, 'getConfigFileFromTac', mockGetConfig) + config = dict(configFile='.', quiet=True) + self.assertEqual(checkconfig.checkconfig(config), 1) From 1465f9d26e4dd2edd75703706ed57eb6f75d012e Mon Sep 17 00:00:00 2001 From: Tom Prince Date: Wed, 19 Jun 2013 13:28:03 -0600 Subject: [PATCH 6/7] Fix python2.5 syntax errors. --- master/buildbot/scripts/checkconfig.py | 2 +- master/buildbot/scripts/upgrade_master.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/master/buildbot/scripts/checkconfig.py b/master/buildbot/scripts/checkconfig.py index 92a26a0a525..8e05b0bb4b8 100644 --- a/master/buildbot/scripts/checkconfig.py +++ b/master/buildbot/scripts/checkconfig.py @@ -42,7 +42,7 @@ def checkconfig(config): basedir = configFile try: configFile = getConfigFileFromTac(basedir) - except (SyntaxError, ImportError) as e: + except (SyntaxError, ImportError), e: if not quiet: print "Unable to load 'buildbot.tac' from '%s':" % basedir print e diff --git a/master/buildbot/scripts/upgrade_master.py b/master/buildbot/scripts/upgrade_master.py index 29813466683..c471965d17f 100644 --- a/master/buildbot/scripts/upgrade_master.py +++ b/master/buildbot/scripts/upgrade_master.py @@ -161,7 +161,7 @@ def upgradeMaster(config, _noMonkey=False): try: configFile = base.getConfigFileFromTac(config['basedir']) - except (SyntaxError, ImportError) as e: + except (SyntaxError, ImportError), e: print "Unable to load 'buildbot.tac' from '%s':" % config['basedir'] print e defer.returnValue(1) From c7fc9806c3361267c37dcf896be4ce701fd60301 Mon Sep 17 00:00:00 2001 From: "Dustin J. Mitchell" Date: Fri, 5 Jul 2013 17:47:36 -0400 Subject: [PATCH 7/7] Don't check that builders are defined in multi-master mode This fixes an error identified by Yassert LEBON . --- master/buildbot/config.py | 4 ++++ master/buildbot/test/unit/test_config.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/master/buildbot/config.py b/master/buildbot/config.py index 5b5138426d7..e8632b2ea01 100644 --- a/master/buildbot/config.py +++ b/master/buildbot/config.py @@ -526,6 +526,10 @@ def check_single_master(self): def check_schedulers(self): + # don't perform this check in multiMaster mode + if self.multiMaster: + return + all_buildernames = set([ b.name for b in self.builders ]) for s in self.schedulers.itervalues(): diff --git a/master/buildbot/test/unit/test_config.py b/master/buildbot/test/unit/test_config.py index c08c29614b6..9b6f6c1dde9 100644 --- a/master/buildbot/test/unit/test_config.py +++ b/master/buildbot/test/unit/test_config.py @@ -832,6 +832,13 @@ def test_check_schedulers_unknown_builder(self): self.cfg.check_schedulers() self.assertConfigError(self.errors, "Unknown builder 'b2'") + def test_check_schedulers_ignored_in_multiMaster(self): + self.setup_basic_attrs() + del self.cfg.builders[1] # remove b2, leaving b1 + self.cfg.multiMaster = True + self.cfg.check_schedulers() + self.assertNoConfigErrors(self.errors) + def test_check_schedulers(self): self.setup_basic_attrs() self.cfg.check_schedulers()