Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gzip support #171

Merged
merged 9 commits into from
Jun 4, 2024
Merged
2 changes: 2 additions & 0 deletions docs/corefile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ In most cases, you just need to provide the location of the core to use PyStack
(Python) File "/test.py", line 16, in third_func
time.sleep(1000)

Pystack can automatically extract core dumps from `.gz` files: `pystack core ./archived_core_file.gz`

To learn more about the different options you can use to customize what is reported, check the :ref:`customizing-the-reports` section.

Providing the executable
Expand Down
4 changes: 4 additions & 0 deletions news/add_gzip_support.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Features
~~~~~~~~

- Add support for Gzip compressed corefiles (#171)
5 changes: 5 additions & 0 deletions src/pystack/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from typing import Set

from pystack.errors import InvalidPythonProcess
from pystack.process import decompress_gzip
from pystack.process import is_elf
from pystack.process import is_gzip

from . import errors
from . import print_thread
Expand Down Expand Up @@ -319,6 +321,9 @@ def process_core(parser: argparse.ArgumentParser, args: argparse.Namespace) -> N
if not corefile.exists():
parser.error(f"Core {corefile} does not exist")

if is_gzip(corefile):
corefile = decompress_gzip(corefile)

if args.executable is None:
corefile_analyzer = CoreFileAnalyzer(corefile)
executable = pathlib.Path(corefile_analyzer.extract_executable())
Expand Down
42 changes: 42 additions & 0 deletions src/pystack/process.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import gzip
import logging
import pathlib
import re
import subprocess
import tempfile
from typing import Optional
from typing import Tuple

Expand Down Expand Up @@ -129,3 +131,43 @@ def get_thread_name(pid: int, tid: int) -> Optional[str]:
return comm.read().strip()
except OSError:
return None


def is_gzip(filename: pathlib.Path) -> bool:
"""
Checks if the given file is a Gzip file based on the header.

Args:
filename (pathlib.Path): The path to the file to be checked.

Returns:
bool: True if the file starts with the Gzip header, False otherwise.
"""
gzip_header = b"\x1f\x8b"
with open(filename, "rb") as thefile:
return thefile.read(2) == gzip_header


def decompress_gzip(
filename: pathlib.Path, chunk_size: int = 4 * 1024 * 1024
) -> pathlib.Path:
"""Decompresses a Gzip file and writes the contents to a temporary file.

Args:
filename: The path to the gzip file to decompress.
chunk_size: Size of chunks to read and write at a time; defaults to 4MB.

Returns:
The path to the temporary file containing the decompressed data.

Raises:
gzip.BadGzipFile: If the file is not a valid gzip file.
"""
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
with gzip.open(filename, "rb") as file_handle:
while True:
chunk = file_handle.read(chunk_size)
if not chunk:
break
temp_file.write(chunk)
return pathlib.Path(temp_file.name)
88 changes: 87 additions & 1 deletion tests/unit/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from textwrap import dedent
from unittest.mock import Mock
from unittest.mock import call
from unittest.mock import mock_open
from unittest.mock import patch

import pytest
Expand Down Expand Up @@ -409,6 +410,8 @@ def test_process_core_default_without_executable():
"pathlib.Path.exists", return_value=True
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
), patch(
"pystack.__main__.CoreFileAnalyzer"
) as core_file_analizer_mock:
Expand All @@ -431,6 +434,61 @@ def test_process_core_default_without_executable():
assert print_thread_mock.mock_calls == [call(thread, False) for thread in threads]


def test_process_core_default_gzip_without_executable():
# GIVEN

argv = ["pystack", "core", "corefile.gz"]

threads = [Mock(), Mock(), Mock()]

temp_mock_file = Mock()
temp_mock_file.name = Path("/tmp/file")
temp_file_context_mock = Mock()
temp_file_context_mock.__enter__ = Mock(return_value=temp_mock_file)
temp_file_context_mock.__exit__ = Mock(return_value=None)

# WHEN

with patch(
"pystack.__main__.get_process_threads_for_core"
) as get_process_threads_mock, patch(
"pystack.__main__.print_thread"
) as print_thread_mock, patch(
"sys.argv", argv
), patch(
"pathlib.Path.exists", return_value=True
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"builtins.open", mock_open(read_data=b"\x1f\x8b")
), patch(
"gzip.open", mock_open(read_data=b"")
) as gzip_open_mock, patch(
"tempfile.NamedTemporaryFile", return_value=temp_file_context_mock
), patch(
"pystack.__main__.CoreFileAnalyzer"
) as core_file_analizer_mock:
core_file_analizer_mock().extract_executable.return_value = (
"extracted_executable"
)
get_process_threads_mock.return_value = threads

main()

# THEN

get_process_threads_mock.assert_called_with(
Path("/tmp/file"),
Path("extracted_executable"),
library_search_path="",
native_mode=NativeReportingMode.OFF,
locals=False,
method=StackMethod.AUTO,
)
assert print_thread_mock.mock_calls == [call(thread, False) for thread in threads]
gzip_open_mock.assert_called_with(Path("corefile.gz"), "rb")


def test_process_core_default_without_executable_and_executable_does_not_exist(capsys):
# GIVEN

Expand All @@ -440,7 +498,7 @@ def test_process_core_default_without_executable_and_executable_does_not_exist(c

with patch("sys.argv", argv), patch(
"pathlib.Path.exists"
) as path_exists_mock, patch(
) as path_exists_mock, patch("pystack.__main__.is_gzip", return_value=False), patch(
"pystack.__main__.CoreFileAnalyzer"
) as core_file_analizer_mock:
core_file_analizer_mock().extract_executable.return_value = (
Expand Down Expand Up @@ -473,6 +531,8 @@ def test_process_core_executable_not_elf_file(capsys):
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=False
), patch(
"pystack.__main__.is_gzip", return_value=False
):
get_process_threads_mock.return_value = threads
with pytest.raises(SystemExit):
Expand Down Expand Up @@ -503,6 +563,8 @@ def test_process_core_default_with_executable():
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
get_process_threads_mock.return_value = threads
main()
Expand Down Expand Up @@ -548,6 +610,8 @@ def test_process_core_native(argument, mode):
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
get_process_threads_mock.return_value = threads
main()
Expand Down Expand Up @@ -586,6 +650,8 @@ def test_process_core_locals():
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
get_process_threads_mock.return_value = threads
main()
Expand Down Expand Up @@ -631,6 +697,8 @@ def test_process_core_with_search_path():
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
get_process_threads_mock.return_value = threads
main()
Expand Down Expand Up @@ -677,6 +745,8 @@ def test_process_core_with_search_root():
return_value=True,
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
get_process_threads_mock.return_value = threads
main()
Expand Down Expand Up @@ -785,6 +855,8 @@ def does_exit(what):
) as get_process_threads_mock, patch(
"pystack.__main__.print_thread"
) as print_thread_mock, patch(
"pystack.__main__.is_gzip", return_value=False
), patch(
"sys.argv", argv
), patch.object(
Path, "exists", does_exit
Expand Down Expand Up @@ -821,6 +893,8 @@ def test_process_core_error(exception, exval, capsys):
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
# THEN
get_process_threads_mock.side_effect = exception("Oh no!")
Expand Down Expand Up @@ -856,6 +930,8 @@ def test_process_core_exhaustive():
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
get_process_threads_mock.return_value = threads
main()
Expand Down Expand Up @@ -942,6 +1018,8 @@ def test_nocolor_output_at_the_front_for_core():
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
main()

Expand All @@ -966,6 +1044,8 @@ def test_global_options_can_be_placed_at_any_point(option):
"pystack.__main__.CoreFileAnalyzer"
), patch(
"pystack.__main__.is_elf", return_value=True
), patch(
"pystack.__main__.is_gzip", return_value=False
):
# THEN

Expand Down Expand Up @@ -1131,6 +1211,8 @@ def test_process_core_does_not_crash_if_core_analyzer_fails(method):
with patch("pystack.__main__.get_process_threads_for_core"), patch(
"pystack.__main__.print_thread"
), patch("pystack.__main__.is_elf", return_value=True), patch(
"pystack.__main__.is_gzip", return_value=False
), patch(
"sys.argv", argv
), patch(
"pathlib.Path.exists", return_value=True
Expand All @@ -1156,6 +1238,8 @@ def test_core_file_missing_modules_are_logged(caplog, native):
with patch("pystack.__main__.get_process_threads_for_core"), patch(
"pystack.__main__.print_thread"
), patch("pystack.__main__.is_elf", return_value=True), patch(
"pystack.__main__.is_gzip", return_value=False
), patch(
"sys.argv", argv
), patch(
"pathlib.Path.exists", return_value=True
Expand Down Expand Up @@ -1187,6 +1271,8 @@ def test_core_file_missing_build_ids_are_logged(caplog, native):
with patch("pystack.__main__.get_process_threads_for_core"), patch(
"pystack.__main__.print_thread"
), patch("pystack.__main__.is_elf", return_value=True), patch(
"pystack.__main__.is_gzip", return_value=False
), patch(
"sys.argv", argv
), patch(
"pathlib.Path.exists", return_value=True
Expand Down
Loading