Skip to content

Commit

Permalink
Merge pull request #7206 from p12tic/docker-improvements
Browse files Browse the repository at this point in the history
Docker improvements
  • Loading branch information
p12tic committed Nov 6, 2023
2 parents 0d6b6be + ac241a3 commit c4c7bf4
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 12 deletions.
9 changes: 6 additions & 3 deletions master/buildbot/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,18 @@ class WorkerSetupError(Exception):


class LatentWorkerFailedToSubstantiate(Exception):
pass
def __str__(self):
return " ".join(str(arg) for arg in self.args)


class LatentWorkerCannotSubstantiate(Exception):
pass
def __str__(self):
return " ".join(str(arg) for arg in self.args)


class LatentWorkerSubstantiatiationCancelled(Exception):
pass
def __str__(self):
return " ".join(str(arg) for arg in self.args)


class IPlugin(Interface):
Expand Down
2 changes: 2 additions & 0 deletions master/buildbot/process/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,8 @@ def buildPreparationFailure(self, why, state_string):
self.workerforbuilder.worker.putInQuarantine()
if isinstance(why, failure.Failure):
yield self.preparation_step.addLogWithFailure(why)
elif isinstance(why, Exception):
yield self.preparation_step.addLogWithException(why)
yield self.master.data.updates.setStepStateString(self.preparation_step.stepid,
"error while " + state_string)
yield self.master.data.updates.finishStep(self.preparation_step.stepid,
Expand Down
6 changes: 5 additions & 1 deletion master/buildbot/test/fake/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,17 @@ def create_container(self, image, *args, **kwargs):
'image': image,
'Id': ret['Id'],
'name': name, # docker does not return this
'Names': ["/" + name] # this what docker returns
'Names': ["/" + name], # this what docker returns
"State": "running",
}
return ret

def remove_container(self, id, **kwargs):
del self._containers[id]

def logs(self, id, tail=None):
return f"log for {id}\n1\n2\n3\nend\n".encode("utf-8")

def close(self):
# dummy close, no connection to cleanup
pass
Expand Down
2 changes: 1 addition & 1 deletion master/buildbot/test/fake/latent.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,4 @@ def stop_instance(self, fast):
return (yield self._controller._stop_deferred)

def check_instance(self):
return not self._controller.has_crashed
return (not self._controller.has_crashed, "")
23 changes: 23 additions & 0 deletions master/buildbot/test/unit/worker/test_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,29 @@ def test_constructor_hostname(self):
image="myworker_image", hostname="myworker_hostname")
self.assertEqual(bs.hostname, 'myworker_hostname')

@defer.inlineCallbacks
def test_check_instance_running(self):
bs = yield self.setupWorker('bot', 'pass', 'tcp://1234:2375', 'worker')
yield bs.start_instance(self.build)
self.assertEqual((yield bs.check_instance()), (True, ""))

@defer.inlineCallbacks
def test_check_instance_exited(self):
bs = yield self.setupWorker('bot', 'pass', 'tcp://1234:2375', 'worker')
yield bs.start_instance(self.build)
for c in self._client._containers.values():
c["State"] = "exited"

expected_logs = (
"logs: \n"
"log for 8a61192da2b3bb2d922875585e29b74ec0dc4e0117fcbf84c962204e97564cd7\n"
"1\n"
"2\n"
"3\n"
"end\n"
)
self.assertEqual((yield bs.check_instance()), (False, expected_logs))


class testDockerPyStreamLogs(unittest.TestCase):

Expand Down
21 changes: 21 additions & 0 deletions master/buildbot/worker/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,27 @@ def _thd_start_instance(self, docker_host, image, dockerfile, volumes, host_conf
docker_client.close()
return [instance['Id'], image]

def check_instance(self):
if self.instance is None:
return defer.succeed((True, ""))
return threads.deferToThread(
self._thd_check_instance,
self._curr_client_args
)

def _thd_check_instance(self, curr_client_args):
docker_client = self._getDockerClient(curr_client_args)
container_name = self.getContainerName()
instances = docker_client.containers(all=1, filters={"name": container_name})
container_name = f"/{container_name}"
for instance in instances:
if container_name not in instance["Names"]:
continue
if instance["State"] == "exited":
logs = docker_client.logs(instance['Id'], tail=100).decode("utf-8")
return (False, "logs: \n" + logs)
return (True, "")

def stop_instance(self, fast=False):
if self.instance is None:
# be gentle. Something may just be trying to alert us that an
Expand Down
23 changes: 18 additions & 5 deletions master/buildbot/worker/latent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from buildbot.interfaces import LatentWorkerFailedToSubstantiate
from buildbot.interfaces import LatentWorkerSubstantiatiationCancelled
from buildbot.util import Notifier
from buildbot.warnings import warn_deprecated
from buildbot.worker.base import AbstractWorker


Expand Down Expand Up @@ -227,7 +228,7 @@ def stop_instance(self, fast=False):
raise NotImplementedError

def check_instance(self):
return True
return (True, "")

@property
def substantiated(self):
Expand Down Expand Up @@ -460,12 +461,24 @@ def _check_instance_timer_fired_impl(self):

try:
yield self._start_stop_lock.acquire()
is_good = yield self.check_instance()
message = "latent worker crashed before connecting"
try:
value = yield self.check_instance()
if isinstance(value, bool):
is_good = value
warn_deprecated("3.10.0", "check_instance() must return a two element tuple "
"with second element containing error message, if any")

else:
is_good, message_append = value
message += ": " + message_append
except Exception as e:
message += ": " + str(e)
is_good = False

if not is_good:
yield self._substantiation_failed(
LatentWorkerFailedToSubstantiate(
self.name, 'latent worker crashed before connecting'
)
LatentWorkerFailedToSubstantiate(self.name, message)
)
return
finally:
Expand Down
10 changes: 8 additions & 2 deletions master/docs/manual/customization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -675,8 +675,14 @@ Overriding these members ensures that builds aren't ran on incompatible workers
.. py:method:: check_instance(self)
This method determines the health of an instance.
The method should return ``False`` if it determines that a serious error has occurred and worker will not connect to the master.
Otherwise, the method should return ``True``.
The method is expected to return a tuple with two members: ``is_good`` and ``message``.
The first member identifies whether the instance is still valid.
It should be ``False`` if the method determined that a serious error has occurred and worker will not connect to the master.
In such case, ``message`` should identify any additional error message that should be displayed to Buildbot user.

In case there is no additional messages, ``message`` should be an empty string.

Any exceptions raised from this method are interpreted as if the method returned ``False``.


Custom Build Classes
Expand Down
2 changes: 2 additions & 0 deletions newsfragments/docker-latent-worker-early-fail.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improved handling of Docker containers that fail before worker attaches to master.
In such case build will be restarted immediately instead of waiting for a timeout to expire.
1 change: 1 addition & 0 deletions newsfragments/docker-startup-failure-logs.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Docker Latent workers will now show last logs in Buildbot UI when their startup fails.
2 changes: 2 additions & 0 deletions newsfragments/latest-check-instance-message.change
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
``AbstractLatentWorker.check_instance()`` now accepts error message being supplied in case instance is not good.
The previous API has been deprecated.

0 comments on commit c4c7bf4

Please sign in to comment.