In [2]:
import posix_ipc
import time
import os

class IPCLock:
    def __init__(self, name):
        self.sem = posix_ipc.Semaphore(
            name,
            flags=posix_ipc.O_CREAT,  # 不存在则创建
            # | posix_ipc.SEM_UNDO,
            # mode=0o600,  # 权限：用户读写
            mode=384,
            initial_value=1,  # 初始为可用状态
        )
        self.owner_pid = None  # 记录持有者进程ID

    def acquire(self, blocking=True, timeout=None):
        """获取锁核心逻辑"""
        if blocking and timeout is not None:
            return self._acquire_with_timeout(timeout)

        acquired = self.sem.acquire(blocking)
        if acquired:
            self.owner_pid = os.getpid()
        return acquired

    def release(self):
        """释放锁并校验持有者"""
        if self.owner_pid != os.getpid():
            raise PermissionError("非持有进程禁止释放锁")
        self.sem.release()
        self.owner_pid = None

    def _acquire_with_timeout(self, timeout):
        """带超时的获取锁实现"""
        end_time = time.time() + timeout
        while time.time() < end_time:
            if self.sem.acquire(blocking=False):
                self.owner_pid = os.getpid()
                return True
            time.sleep(0.05)  # 避免CPU空转
        return False

    def __enter__(self):
        self.acquire()
        return self

    def __exit__(self, *args):
        self.release()

In [None]:
import os

os.environ["PYTHONUNBUFFERED"] = "1"

import time
import random
from datetime import datetime

from dask.distributed import (
    Client,
    LocalCluster,
    get_worker,
    wait,
    Lock,
)

# NOTE: adjust the number of workers as needed. The more the sooner of crash.
n_workers = 6


def dummy_task(i):
    lock = IPCLock("/my_lock")
    if lock.acquire(blocking=False, timeout=2):
        try:
            print(
                f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} worker#{get_worker().name} acquired lock and completed task #{i}'
            )
        finally:
            lock.release()
    else:
        print(
            f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} worker#{get_worker().name} lock not acquired due to timeout. task #{i}'
        )

    return None


def main():
    cluster = LocalCluster(
        n_workers=n_workers,
        threads_per_worker=1,
        processes=True,
    )
    client = Client(cluster)

    futures = []
    i = 0
    while True:
        futures.append(
            client.submit(
                dummy_task,
                i,
            )
        )
        if len(futures) > n_workers * 2:
            _, undone = wait(futures, return_when="FIRST_COMPLETED")
            futures = list(undone)
        i += 1


if __name__ == "__main__":
    main()

2025-03-01 17:57:36 worker#1 lock not acquired due to timeout. task #0


2025-03-01 17:57:36,800 - distributed.worker - ERROR - Compute Failed
Key:       dummy_task-b54ba9f7e0d5de2698b731fd89d57c25
State:     executing
Task:  <Task 'dummy_task-b54ba9f7e0d5de2698b731fd89d57c25' dummy_task(...)>
Exception: "BusyError('Semaphore is busy')"
Traceback: '  File "/var/folders/cy/bhfxm8116cq_nrfpdywmf9w00000gn/T/ipykernel_83820/990653539.py", line 23, in dummy_task\n  File "/var/folders/cy/bhfxm8116cq_nrfpdywmf9w00000gn/T/ipykernel_83820/398838656.py", line 22, in acquire\n'

2025-03-01 17:57:36,801 - distributed.worker - ERROR - Compute Failed
Key:       dummy_task-a950c5810101d037ec29bc5edaf36680
State:     executing
Task:  <Task 'dummy_task-a950c5810101d037ec29bc5edaf36680' dummy_task(...)>
Exception: "BusyError('Semaphore is busy')"
Traceback: '  File "/var/folders/cy/bhfxm8116cq_nrfpdywmf9w00000gn/T/ipykernel_83820/990653539.py", line 23, in dummy_task\n  File "/var/folders/cy/bhfxm8116cq_nrfpdywmf9w00000gn/T/ipykernel_83820/398838656.py", line 22, in acquire\

KeyboardInterrupt: 

