Skip to content

Commit

Permalink
More thorough testing of DuplicateSlaveArbitrator
Browse files Browse the repository at this point in the history
This adds a subscribeToDetach method to build slaves, which the
arbitrator can use to wait for the old slave to detach, so there's no
longer any need to poll slave.isConnected.  Fixes #2022.
  • Loading branch information
djmitche committed Jul 31, 2011
1 parent 0cb6f1e commit 995a6a7
Show file tree
Hide file tree
Showing 5 changed files with 355 additions and 80 deletions.
34 changes: 29 additions & 5 deletions master/buildbot/buildslave.py
Expand Up @@ -30,6 +30,7 @@
from buildbot.interfaces import IBuildSlave, ILatentBuildSlave
from buildbot.process.properties import Properties
from buildbot.locks import LockAccess
from buildbot.util import subscription

class AbstractBuildSlave(pb.Avatar, service.MultiService):
"""This is the master-side representative for a remote buildbot slave.
Expand Down Expand Up @@ -90,6 +91,8 @@ def __init__(self, name, password, max_builds=None,
self.missing_timer = None
self.keepalive_interval = keepalive_interval

self.detached_subs = None

self._old_builder_list = None

def identity(self):
Expand Down Expand Up @@ -229,7 +232,7 @@ def _missing_timer_fired(self):
if not self.parent:
return

buildmaster = self.botmaster.parent
buildmaster = self.botmaster.master
status = buildmaster.getStatus()
text = "The Buildbot working for '%s'\n" % status.getTitle()
text += ("has noticed that the buildslave named %s went away\n" %
Expand Down Expand Up @@ -276,6 +279,9 @@ def attached(self, bot):

metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1)

# set up the subscription point for eventual detachment
self.detached_subs = subscription.SubscriptionPoint("detached")

# now we go through a sequence of calls, gathering information, then
# tell the Botmaster that it can finally give this slave to all the
# Builders that care about it.
Expand Down Expand Up @@ -363,7 +369,7 @@ def _accept_slave(res):
log.msg("bot attached")
self.messageReceivedFromSlave()
self.stopMissingTimer()
self.botmaster.parent.status.slaveConnected(self.slavename)
self.botmaster.master.status.slaveConnected(self.slavename)

return self.updateSlave()
d.addCallback(_accept_slave)
Expand All @@ -387,9 +393,27 @@ def detached(self, mind):
self.slave_status.removeGracefulWatcher(self._gracefulChanged)
self.slave_status.setConnected(False)
log.msg("BuildSlave.detached(%s)" % self.slavename)
self.botmaster.parent.status.slaveDisconnected(self.slavename)
self.botmaster.master.status.slaveDisconnected(self.slavename)
self.stopKeepaliveTimer()

# notify watchers, but do so in the next reactor iteration so that
# any further detached() action by subclasses happens first
def notif():
subs = self.detached_subs
self.detached_subs = None
subs.deliver()
reactor.callLater(0, notif)

def subscribeToDetach(self, callback):
"""
Request that C{callable} be invoked with no arguments when the
L{detached} method is invoked.
@returns: L{Subscription}
"""
assert self.detached_subs, "detached_subs is only set if attached"
return self.detached_subs.subscribe(callback)

def disconnect(self):
"""Forcibly disconnect the slave.
Expand Down Expand Up @@ -512,7 +536,7 @@ def canStartBuild(self):
def _mail_missing_message(self, subject, text):
# first, see if we have a MailNotifier we can use. This gives us a
# fromaddr and a relayhost.
buildmaster = self.botmaster.parent
buildmaster = self.botmaster.master
for st in buildmaster.statusTargets:
if isinstance(st, MailNotifier):
break
Expand Down Expand Up @@ -756,7 +780,7 @@ def _substantiation_failed(self, failure):
if not self.parent or not self.notify_on_missing:
return

buildmaster = self.botmaster.parent
buildmaster = self.botmaster.master
status = buildmaster.getStatus()
text = "The Buildbot working for '%s'\n" % status.getTitle()
text += ("has noticed that the latent buildslave named %s \n" %
Expand Down
29 changes: 16 additions & 13 deletions master/buildbot/pbmanager.py
Expand Up @@ -82,35 +82,38 @@ def unregister(self):
"""
return self.pbmanager._unregister(self)

def getPort(self):
"""
Helper method for testing; returns the TCP port used for this
registration, even if it was specified as 0 and thus allocated by the
OS.
"""
disp = self.pbmanager.dispatchers[self.portstr]
return disp.port.getHost().port

class Dispatcher(service.MultiService):

class Dispatcher(service.Service):
implements(portal.IRealm, checkers.ICredentialsChecker)

credentialInterfaces = [ credentials.IUsernamePassword,
credentials.IUsernameHashedPassword ]

def __init__(self, portstr):
service.MultiService.__init__(self)
self.portstr = portstr
self.users = {}

# there's lots of stuff to set up for a PB connection!
self.portal = portal.Portal(self)
self.portal.registerChecker(self)
self.serverFactory = pb.PBServerFactory(self.portal)
self.serverFactory.unsafeTraceback = True
self.serverPort = strports.service(portstr, self.serverFactory)
self.serverPort.setServiceParent(self)

def getPort(self):
# helper method for testing
return self.serverPort.getHost().port

def startService(self):
return service.MultiService.startService(self)
self.serverFactory.unsafeTracebacks = True
self.port = strports.listen(portstr, self.serverFactory)

def stopService(self):
return service.MultiService.stopService(self)
# stop listening on the port when shut down
d = defer.maybeDeferred(self.port.stopListening)
d.addCallback(lambda _ : service.Service.stopService(self))
return d

def register(self, username, password, pfactory):
if debug:
Expand Down
47 changes: 17 additions & 30 deletions master/buildbot/process/botmaster.py
Expand Up @@ -574,12 +574,6 @@ class DuplicateSlaveArbitrator(object):
return a ping.
"""

DISCONNECT_TRIES = 20
"""Number of times to try to disconnect an old slave"""

DISCONNECT_TRY_TIME = 0.1
"""Time to wait beteween each attempt to disconnect an old slave"""

def __init__(self, buildslave):
self.buildslave = buildslave
self.old_remote = self.buildslave.slave
Expand Down Expand Up @@ -680,38 +674,31 @@ def maybe_done(self):
else:
self.start_new_slave()

def start_new_slave(self, count=DISCONNECT_TRIES-1):
def start_new_slave(self):
# just in case
if not self.new_slave_d:
return # pragma: ignore

# we need to wait until the old slave has actually disconnected, which
# can take a little while -- but don't wait forever! This polls
# until buildslave.slave becomes None.

# TODO: test this in Twisted to see if it's necessary - it's mostly
# important for AbstractBuildSlave's accounting, right? can we just
# call detach directly?
if self.buildslave.isConnected():
if self.buildslave.slave:
self.buildslave.slave.broker.transport.loseConnection()
if count < 0:
log.msg("WEIRD: want to start new slave, but the old slave "
"will not disconnect")
self.disconnect_new_slave()
else:
self._reactor.callLater(self.DISCONNECT_TRY_TIME,
self.start_new_slave, count-1)
if not self.new_slave_d: # pragma: ignore
return

d = self.new_slave_d
self.new_slave_d = None
d.callback(self.buildslave)

if self.buildslave.isConnected():
# we need to wait until the old slave has fully detached, which can
# take a little while as buffers drain, etc.
def detached():
d.callback(self.buildslave)
self.buildslave.subscribeToDetach(detached)
self.old_remote.broker.transport.loseConnection()
else: # pragma: ignore
# by some unusual timing, it's quite possible that the old slave
# has disconnected while the arbitration was going on. In that
# case, we're already done!
d.callback(self.buildslave)

def disconnect_new_slave(self):
# just in case
if not self.new_slave_d:
return # pragma: ignore
if not self.new_slave_d: # pragma: ignore
return

d = self.new_slave_d
self.new_slave_d = None
Expand Down

0 comments on commit 995a6a7

Please sign in to comment.