In [None]:
import fcntl
import time

class FileLock:
    def __init__(self, lockfile):
        self.lockfile = lockfile
        self.fd = None

    def acquire(self, timeout=10):
        self.fd = open(self.lockfile, 'w')
        start = time.time()
        while True:
            try:
                fcntl.flock(self.fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
                return True
            except BlockingIOError:
                # self.fd.close()
                if time.time() - start > timeout:
                    return False
                time.sleep(0.1)
            except OSError as e:
                # Handle file system errors
                self.fd.close()
                raise e

    def release(self):
        if self.fd:
            fcntl.flock(self.fd, fcntl.LOCK_UN)
            self.fd.close()
            self.fd = None

In [None]:
import os

os.environ["PYTHONUNBUFFERED"] = "1"

import time
import random
from datetime import datetime

from dask.distributed import (
    Client,
    LocalCluster,
    get_worker,
    wait,
    Lock,
)

# NOTE: adjust the number of workers as needed. The more the sooner of crash.
n_workers = 6


def dummy_task(i):
    lock = FileLock("marten.lock")
    while not lock.acquire(timeout=2):
        time.sleep(0.5)
    try:
        time.sleep(random.uniform(1.5, 3.0))
        print(
            f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} worker#{get_worker().name} acquired lock and completed task #{i}'
        )
    finally:
        lock.release()

    return None


def main():
    cluster = LocalCluster(
        n_workers=n_workers,
        threads_per_worker=1,
        processes=True,
    )
    client = Client(cluster)

    futures = []
    i = 0
    while True:
        futures.append(
            client.submit(
                dummy_task,
                i,
            )
        )
        if len(futures) > n_workers * 2:
            _, undone = wait(futures, return_when="FIRST_COMPLETED")
            futures = list(undone)
        i += 1


if __name__ == "__main__":
    main()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 61155 instead


2025-03-03 10:05:23 worker#3 acquired lock and completed task #0
2025-03-03 10:05:24 worker#3 acquired lock and completed task #6
2025-03-03 10:05:26 worker#3 acquired lock and completed task #12
2025-03-03 10:05:28 worker#3 acquired lock and completed task #13
2025-03-03 10:05:31 worker#3 acquired lock and completed task #14
2025-03-03 10:05:33 worker#3 acquired lock and completed task #15
2025-03-03 10:05:35 worker#3 acquired lock and completed task #16
2025-03-03 10:05:36 worker#3 acquired lock and completed task #17
2025-03-03 10:05:39 worker#3 acquired lock and completed task #18
2025-03-03 10:05:42 worker#3 acquired lock and completed task #19
2025-03-03 10:05:45 worker#3 acquired lock and completed task #20
2025-03-03 10:05:47 worker#3 acquired lock and completed task #21
2025-03-03 10:05:48 worker#3 acquired lock and completed task #22
2025-03-03 10:05:51 worker#3 acquired lock and completed task #23
2025-03-03 10:05:53 worker#3 acquired lock and completed task #24
2025-03-03 1

KeyboardInterrupt: 



2025-03-03 10:05:58 worker#3 acquired lock and completed task #26


2025-03-03 10:06:00,041 - distributed.nanny - ERROR - Worker process died unexpectedly
Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/base_events.py", line 685, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/nanny.py", line 985, in run
    await worker.finished()
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError

During handling

2025-03-03 10:06:00 worker#0 acquired lock and completed task #3


Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/base_events.py", line 685, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/nanny.py", line 985, in run
    await worker.finished()
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last)

2025-03-03 10:06:02 worker#4 acquired lock and completed task #5


2025-03-03 10:06:04,528 - distributed.nanny - ERROR - Worker process died unexpectedly
Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/base_events.py", line 685, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/nanny.py", line 985, in run
    await worker.finished()
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError

During handling

2025-03-03 10:06:04 worker#1 acquired lock and completed task #4


2025-03-03 10:06:06,702 - distributed.nanny - ERROR - Worker process died unexpectedly
Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/base_events.py", line 685, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/nanny.py", line 985, in run
    await worker.finished()
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError

During handling

2025-03-03 10:06:07 worker#5 acquired lock and completed task #2
2025-03-03 10:06:08 worker#2 acquired lock and completed task #1


Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/base_events.py", line 685, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/nanny.py", line 985, in run
    await worker.finished()
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  File "/Users/jx/.pyenv/versions/3.12.2/lib/python3.12/asyncio/locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last)