diff --git a/master/buildbot/buildslave/base.py b/master/buildbot/buildslave/base.py index c4b6e5638fb..d0bbf8af90d 100644 --- a/master/buildbot/buildslave/base.py +++ b/master/buildbot/buildslave/base.py @@ -355,6 +355,7 @@ def attached(self, conn): # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) self.conn = conn + self._old_builder_list = None # clear builder list before proceed d = defer.succeed(None) diff --git a/master/buildbot/buildslave/manager.py b/master/buildbot/buildslave/manager.py index 00e6d58e770..a0a440c1bdf 100644 --- a/master/buildbot/buildslave/manager.py +++ b/master/buildbot/buildslave/manager.py @@ -97,19 +97,28 @@ def _unregister(self, registration): @defer.inlineCallbacks def newConnection(self, conn, buildslaveName): if buildslaveName in self.connections: + log.msg("Got duplication connection from '%s'" + " starting arbitration procedure" % buildslaveName) old_conn = self.connections[buildslaveName] # returns: # (None, 0) if ping was successfull, that means old connection stil alive # (None, 1) if timeout expired and old slave didn't respond - res, pos = yield defer.DeferredList( - [old_conn.remotePrint("master got a duplicate connection"), - task.deferLater(reactor, self.PING_TIMEOUT, lambda : None)], - fireOnOneCallback=True - ) - if pos == 0: - # if we get here then old connection still alives and new should - # be rejected - defer.returnValue(Failure(RuntimeError("rejecting duplicate slave"))) + try: + res, pos = yield defer.DeferredList( + [old_conn.remotePrint("master got a duplicate connection"), + task.deferLater(reactor, self.PING_TIMEOUT, lambda : None)], + fireOnOneCallback=True + ) + if pos == 0: + # if we get here then old connection still alives and new should + # be rejected + defer.returnValue( + Failure(RuntimeError("rejecting duplicate slave")) + ) + except Exception, e: + log.msg("Got error while trying to ping connected slave %s:" + "%s" % (buildslaveName, e)) + log.msg("Old connection for '%s' was lost, accepting new" % buildslaveName) self.connections[buildslaveName] = conn def remove(): diff --git a/master/buildbot/buildslave/protocols/pb.py b/master/buildbot/buildslave/protocols/pb.py index 3c754be8e35..1f12410359f 100644 --- a/master/buildbot/buildslave/protocols/pb.py +++ b/master/buildbot/buildslave/protocols/pb.py @@ -207,8 +207,9 @@ def _errback(why): return defer.succeed(None) yield old_way() - def remoteStartBuild(self): - return self.mind.callRemote('startBuild') + def remoteStartBuild(self, builder_name): + slavebuilder = self.builders.get(builder_name) + return slavebuilder.callRemote('startBuild') def stopKeepaliveTimer(self): if self.keepalive_timer and self.keepalive_timer.active(): diff --git a/master/buildbot/process/build.py b/master/buildbot/process/build.py index 277d9c11b5a..fca27d1b644 100644 --- a/master/buildbot/process/build.py +++ b/master/buildbot/process/build.py @@ -226,7 +226,7 @@ def startBuild(self, build_status, expectations, slavebuilder): # then narrow SlaveLocks down to the right slave self.locks = [(l.getLock(self.slavebuilder.slave), a) for l, a in self.locks ] - self.conn = slavebuilder.conn + self.conn = slavebuilder.slave.conn self.conn.notifyOnDisconnect(self.lostRemote) # TODO: save subscription metrics.MetricCountEvent.log('active_builds', 1) diff --git a/master/buildbot/process/builder.py b/master/buildbot/process/builder.py index d8b6de01af3..7d3cd632400 100644 --- a/master/buildbot/process/builder.py +++ b/master/buildbot/process/builder.py @@ -364,7 +364,7 @@ def run_cleanups(): # tell the remote that it's starting a build, too try: - yield slavebuilder.conn.remoteStartBuild() + yield slavebuilder.slave.conn.remoteStartBuild(build.builder.name) except: log.err(failure.Failure(), 'while calling remote startBuild:') run_cleanups() @@ -392,7 +392,7 @@ def run_cleanups(): # and now. If so, bail out. The build.startBuild call below transfers # responsibility for monitoring this connection to the Build instance, # so this check ensures we hand off a working connection. - if not slavebuilder.conn: # TODO: replace with isConnected() + if not slavebuilder.slave.conn: # TODO: replace with isConnected() log.msg("slave disappeared before build could start") run_cleanups() defer.returnValue(False) diff --git a/master/buildbot/process/slavebuilder.py b/master/buildbot/process/slavebuilder.py index b82497da348..ea59b1272af 100644 --- a/master/buildbot/process/slavebuilder.py +++ b/master/buildbot/process/slavebuilder.py @@ -29,7 +29,6 @@ class AbstractSlaveBuilder(pb.Referenceable): def __init__(self): self.ping_watchers = [] self.state = None # set in subclass - self.conn = None self.slave = None self.builder_name = None self.locks = None @@ -85,7 +84,6 @@ def attached(self, slave, commands): @param commands: provides the slave's version of each RemoteCommand """ self.state = ATTACHING - self.conn = slave.conn self.remoteCommands = commands # maps command name to version if self.slave is None: self.slave = slave @@ -97,7 +95,7 @@ def attached(self, slave, commands): d = defer.succeed(None) d.addCallback(lambda _: - self.conn.remotePrint(message="attached")) + self.slave.conn.remotePrint(message="attached")) def setIdle(res): self.state = IDLE @@ -131,7 +129,7 @@ def ping(self, status=None): self.ping_watchers.insert(0, d2) # I think it will make the tests run smoother if the status # is updated before the ping completes - Ping().ping(self.conn).addCallback(self._pong) + Ping().ping(self.slave.conn).addCallback(self._pong) def reset_state(res): if self.state == PINGING: @@ -158,7 +156,6 @@ def detached(self): if self.slave: self.slave.removeSlaveBuilder(self) self.slave = None - self.conn = None self.remoteCommands = None