From 7b6b5a1c245b48b71edff37cebd1941d43c6ab47 Mon Sep 17 00:00:00 2001 From: Gus Monod Date: Tue, 21 May 2024 14:46:27 -0400 Subject: [PATCH] Improve deadlock tutorial documentation about use of locals argument Signed-off-by: Matheus Nascimento --- docs/tutorials/deadlock.py | 42 +++------- docs/tutorials/deadlock.rst | 160 +++++++++++++++++++++++++++--------- 2 files changed, 133 insertions(+), 69 deletions(-) diff --git a/docs/tutorials/deadlock.py b/docs/tutorials/deadlock.py index 35030b7..5619157 100644 --- a/docs/tutorials/deadlock.py +++ b/docs/tutorials/deadlock.py @@ -1,48 +1,30 @@ -#!/usr/bin/env - import os import threading import time -def first_thread(lock1, lock2): - print("Thread 1: Trying to acquire lock1") +def background(lock1, lock2): with lock1: - print("Thread 1: lock1 acquired") - + print(" First lock acquired") time.sleep(1) - - print("Thread 1: Trying to acquire lock2") with lock2: - print("Thread 1: lock2 acquired") - print("Thread 1: Released both locks") - - -def second_thread(lock1, lock2): - print("Thread 2: Trying to acquire lock2") - with lock2: - print("Thread 2: lock2 acquired") - - time.sleep(1) - - print("Thread 2: Trying to acquire lock1") - with lock1: - print("Thread 2: lock1 acquired") - print("Thread 2: Released both locks") + print(" Second lock acquired") if __name__ == "__main__": - print(f"thread ID {os.getpid()}") + print(f"Process ID: {os.getpid()}") lock1 = threading.Lock() lock2 = threading.Lock() - p1 = threading.Thread(target=first_thread, args=(lock1, lock2)) - p2 = threading.Thread(target=second_thread, args=(lock1, lock2)) + t1 = threading.Thread(target=background, args=(lock1, lock2)) + t2 = threading.Thread(target=background, args=(lock2, lock1)) - p1.start() - p2.start() + print("Stating First Thread") + t1.start() + print("Stating Second Thread") + t2.start() - p1.join() - p2.join() + t1.join() + t2.join() print("Finished execution") diff --git a/docs/tutorials/deadlock.rst b/docs/tutorials/deadlock.rst index 07c0240..f743218 100644 --- a/docs/tutorials/deadlock.rst +++ b/docs/tutorials/deadlock.rst @@ -21,7 +21,7 @@ source code. You can either clone it, or just download the zip, whatever is your You will also need a terminal with ``python3`` installed, preferably one of the latest versions. -Once you have the repo ready to navigate, ``cd`` into the docs/tutorials folder: +Once you have the repo ready to navigate, ``cd`` into the ``docs/tutorials`` folder: .. code:: shell @@ -62,18 +62,27 @@ PyStack remote has the ability to analyze the status of a running (remote) proce Executing the deadlock ^^^^^^^^^^^^^^^^^^^^^^ -In the ``tutorials`` directory, there will be a script called ``deadlock.py``: +In the ``docs/tutorials`` directory, there will be a script called ``deadlock.py``: .. include:: deadlock.py :literal: -Assuming that we are in the ``tutorials`` directory, we can run the deadlock script with: +Assuming that we are in the ``docs/tutorials`` directory, we can run the deadlock script with: .. code:: shell python3 deadlock.py -This script will intentionally perform a deadlock and the first message will contain the process ID. +This script will intentionally perform a deadlock and the first message will contain the process ID, so this is the expected output: + +.. code:: shell + + Process ID: + Stating First Thread + First lock acquired + Stating Second Thread + First lock acquired + Another option is to find it with: .. code:: shell @@ -81,30 +90,19 @@ Another option is to find it with: ps aux | grep deadlock.py After the deadlock occurs we can use the PyStack command to analyze the -process (replace with the process ID from the previous step): +process (replace ```` with the process ID from the previous step): .. code:: shell pystack remote -Potentially it will appear an ``Operation not permitted`` error. If it is your case use this command instead: +If you see ``Operation not permitted``, you may want to instead run it with: .. code:: shell sudo -E pystack remote -The output should be this: - -.. code:: shell - - Thread 1: Trying to acquire lock1 - Thread 1: lock1 acquired - Thread 2: Trying to acquire lock2 - Thread 2: lock2 acquired - Thread 1: Trying to acquire lock2 - Thread 2: Trying to acquire lock1 - Understanding the results ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -113,38 +111,122 @@ The excepted result is the following code: .. code:: shell - Traceback for thread 518222 (python3) [] (most recent call last): - (Python) File "//threading.py", line 966, in _bootstrap + Traceback for thread 789 (python3) [] (most recent call last): + (Python) File "//threading.py", line 966, in _bootstrap self._bootstrap_inner() - (Python) File "//threading.py", line 1009, in _bootstrap_inner + (Python) File "//threading.py", line 1009, in _bootstrap_inner self.run() - (Python) File "//threading.py", line 946, in run + (Python) File "//threading.py", line 946, in run self._target(*self._args, **self._kwargs) - (Python) File "//deadlock.py", line 30, in process2 - lock1.acquire() + (Python) File "//deadlock.py", line 10, in background + with lock2: - Traceback for thread 518221 (python3) [] (most recent call last): - (Python) File "//threading.py", line 966, in _bootstrap + Traceback for thread 456 (python3) [] (most recent call last): + (Python) File "//threading.py", line 966, in _bootstrap self._bootstrap_inner() - (Python) File "//threading.py", line 1009, in _bootstrap_inner + (Python) File "//threading.py", line 1009, in _bootstrap_inner self.run() - (Python) File "//threading.py", line 946, in run + (Python) File "//threading.py", line 946, in run self._target(*self._args, **self._kwargs) - (Python) File "//deadlock.py", line 16, in process1 - lock2.acquire() + (Python) File "//deadlock.py", line 10, in background + with lock2: - Traceback for thread 518220 (python3) [] (most recent call last): - (Python) File "//deadlock.py", line 46, in - p1.join() - (Python) File "//threading.py", line 1089, in join + Traceback for thread 123 (python3) [] (most recent call last): + (Python) File "//deadlock.py", line 28, in + t1.join() + (Python) File "//threading.py", line 1089, in join self._wait_for_tstate_lock() - (Python) File "//threading.py", line 1109, in _wait_for_tstate_lock + (Python) File "//threading.py", line 1109, in _wait_for_tstate_lock if lock.acquire(block, timeout): -Notice that each section is displaying a running thread. -Each thread has a flow of actions that can be audited to find the potential problem: - - The thread 518222 is trying to acquire lock1 but is blocked because lock1 is already held by the other thread. - - The thread 518221 is trying to acquire lock2 but is blocked because lock2 is already held by the other thread. - - The main thread 518220 is calling join() on p1, waiting for it to finish. However, p1 cannot finish because it is stuck waiting to acquire a lock, leading to a deadlock. +Notice that each section is displaying a running thread, and there are three threads here: + +1. Thread 123 is the original thread that creates the other two, and waits for them +2. Thread 456 is ``t1`` +3. Thread 789 is ``t2`` + +Each thread has a stack trace: + +- The thread 789 is trying to acquire lock1 but is blocked because lock1 is already held by thread 456. +- The thread 456 is trying to acquire lock2 but is blocked because lock2 is already held by thread 789. +- The main thread 123 is calling ``join()`` on t1, waiting for it to finish. However, t1 cannot finish because it is stuck waiting to acquire a lock, leading to a deadlock. This way we can successful diagnose that there are a improper ordering of lock acquisition. + +Exploring more features +^^^^^^^^^^^^^^^^^^^^^^^ + +PyStack has some features that can help the process of diagnose the problem. +Using ``--locals`` you can obtain a simple string representation of the local variables +in the different frames as well as the function arguments. + +When you run: + +.. code:: shell + + pystack remote --locals + +The excepted result is: + +.. code:: shell + + Traceback for thread 789 (python3) [] (most recent call last): + (Python) File "//threading.py", line 966, in _bootstrap + self._bootstrap_inner() + Arguments: + self: + (Python) File "//threading.py", line 1009, in _bootstrap_inner + self.run() + Arguments: + self: + (Python) File "//threading.py", line 946, in run + self._target(*self._args, **self._kwargs) + Arguments: + self: + (Python) File "//deadlock.py", line 10, in background + with lock2: + Arguments: + lock2: <_thread.lock at 0x7f0c04b90900> + lock1: <_thread.lock at 0x7f0c04b90b40> + + Traceback for thread 456 (python3) [] (most recent call last): + (Python) File "//threading.py", line 966, in _bootstrap + self._bootstrap_inner() + Arguments: + self: + (Python) File "//threading.py", line 1009, in _bootstrap_inner + self.run() + Arguments: + self: + (Python) File "//threading.py", line 946, in run + self._target(*self._args, **self._kwargs) + Arguments: + self: + (Python) File "//deadlock.py", line 10, in background + with lock2: + Arguments: + lock2: <_thread.lock at 0x7f0c04b90b40> + lock1: <_thread.lock at 0x7f0c04b90900> + + Traceback for thread 123 (python3) [] (most recent call last): + (Python) File "//deadlock.py", line 28, in + t1.join() + (Python) File "//threading.py", line 1089, in join + self._wait_for_tstate_lock() + Arguments: + timeout: None + self: + (Python) File "//threading.py", line 1109, in _wait_for_tstate_lock + if lock.acquire(block, timeout): + Arguments: + timeout: -1 + block: True + self: + Locals: + lock: <_thread.lock at 0x7f0c04bae100> + +Observe that we have the same format of result, with one section for each thread. +However now, there are more information, the local variables and function arguments. + +- In thread 789 and 456 we can identify the ID of each lock. +- The main thread 123 we can verify the Arguments of ``lock.acquire()``, that no timeout was set ``timeout: None`` and ``self`` refers to the thread object ````. Moreover, ``timeout`` is ``-1``, which usually an indefinite ``wait``, and ``block`` is ``True``, meaning it will block until the lock is acquired.