Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.26)

project(
DIE
VERSION 0.5.0
VERSION 0.5.1
LANGUAGES CXX
DESCRIPTION "DIE Library implementation"
)
Expand Down
17 changes: 6 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ import die, pathlib
print(die.scan_file("c:/windows/system32/ntdll.dll", die.ScanFlags.DEEP_SCAN))
'PE64'

print(die.scan_file("../upx.exe", die.ScanFlags.RESULT_AS_JSON, str(die.database_path/'db') ))
print(die.scan_file("../upx.exe", die.ScanFlags.RESULT_AS_JSON, str(die.database_path) ))
{
"detects": [
{
Expand Down Expand Up @@ -86,16 +86,11 @@ print(die.scan_file("../upx.exe", die.ScanFlags.RESULT_AS_JSON, str(die.database

for db in die.databases():
print(db)
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\ACE
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\PackageName.1.sg
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\SingleJar.3.sg
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\_APK.0.sg
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\_init
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\Archive\_init
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\archive-file
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\arj
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\Binary\Amiga loadable.1.sg
C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\Binary\archive.7z.1.sg
\path\to\your\pyenv\site-packages\die\db\ACE
\path\to\your\pyenv\site-packages\die\db\Amiga\DeliTracker.1.sg
\path\to\your\pyenv\site-packages\die\db\Amiga\_Amiga.0.sg
\path\to\your\pyenv\site-packages\die\db\Amiga\_init
\path\to\your\pyenv\site-packages\die\db\APK\AlibabaProtection.2.sg
[...]
```

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"

[project]
name = "die_python"
version = "0.5.0"
version = "0.5.1"
description = "Python bindings for Detect It Easy (DIE)."
readme = "./README.md"
license.file = "./LICENSE"
Expand Down
44 changes: 41 additions & 3 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
project(
die-python
LANGUAGES CXX
VERSION 0.5.0
VERSION 0.5.1
)

find_package(Python 3
Expand Down Expand Up @@ -68,11 +68,45 @@ target_link_libraries(_die PRIVATE Qt6::Qml)
target_link_libraries(_die PRIVATE Qt6::Concurrent)
target_link_libraries(_die PRIVATE Qt6::Network)

# Workaround: die_library on Windows installs files to incorrect locations.
# Remove these before our correct install rules:
# - db/ directory (should be die/db, not site-packages/db)
# - die.lib (should be die/die.lib, not root die.lib)
# - include/ directory (C++ headers not needed in Python wheel)
if(WIN32)
install(CODE [[
# List of paths to remove: each entry is "type|path"
# type: "dir" for directory, "file" for file
set(REMOVE_PATHS
"dir|${CMAKE_INSTALL_PREFIX}/db"
"file|${CMAKE_INSTALL_PREFIX}/die.lib"
"dir|${CMAKE_INSTALL_PREFIX}/include"
)

foreach(REMOVE_ENTRY ${REMOVE_PATHS})
string(REPLACE "|" ";" REMOVE_LIST "${REMOVE_ENTRY}")
list(GET REMOVE_LIST 0 REMOVE_TYPE)
list(GET REMOVE_LIST 1 REMOVE_PATH)

if(EXISTS "${REMOVE_PATH}")
if(REMOVE_TYPE STREQUAL "dir")
execute_process(COMMAND ${CMAKE_COMMAND} -E remove_directory "${REMOVE_PATH}")
message(STATUS "Removed directory: ${REMOVE_PATH}")
elseif(REMOVE_TYPE STREQUAL "file")
execute_process(COMMAND ${CMAKE_COMMAND} -E remove "${REMOVE_PATH}")
message(STATUS "Removed file: ${REMOVE_PATH}")
endif()
endif()
endforeach()
]])
endif()

install(DIRECTORY die DESTINATION .)
install(TARGETS _die DESTINATION die/)
install(TARGETS die DESTINATION die/)
install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db DESTINATION die/db)
install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db_custom DESTINATION die/db)
# Fix: Install database to die/db instead of die/db/db
install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db DESTINATION die)
install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db_custom DESTINATION die)

if(LINUX OR APPLE)
install(
Expand All @@ -93,6 +127,7 @@ if(LINUX OR APPLE)
PATTERN "pkgconfig" EXCLUDE
)
else()
# Windows: Install Qt DLLs and ICU libraries
install(
DIRECTORY
${Qt6_DIR}/../../../bin/
Expand All @@ -103,5 +138,8 @@ else()
PATTERN "Qt6Qml.*"
PATTERN "Qt6Concurrent.*"
PATTERN "Qt6Network.*"
PATTERN "icudt*.dll"
PATTERN "icuin*.dll"
PATTERN "icuuc*.dll"
)
endif()
107 changes: 105 additions & 2 deletions python/die/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import enum
import pathlib
import warnings

from typing import Generator, Optional, Union

Expand All @@ -16,8 +17,110 @@

version_major, version_minor, version_patch = map(int, __version__.split("."))

database_path = pathlib.Path(__path__[0]) / "db"
"""Path to the DIE signature database"""

# Use concrete Path type to maintain isinstance() compatibility
_BasePath = type(pathlib.Path())

class _DatabasePath(_BasePath):
"""
Smart database path that maintains backward compatibility.

This class automatically handles both old and new usage patterns:
- New code: use database_path directly
- Old code: database_path / 'db' still works but shows deprecation warning

The path detection works as follows:
1. If db/PE/ exists (new fixed version): use this path
2. If db/db/PE/ exists (old buggy version): use the nested path
"""

def __new__(cls, *args, **kwargs):
obj = super().__new__(cls, *args)
obj._resolved_path_str = None
return obj

def _get_resolved_str(self):
"""Resolve and return the actual database path as a string."""
# Use getattr with default to handle Python 3.9's pathlib behavior
# where __new__ may not be called in path operations
# See: https://github.com/python/cpython/issues/100479
resolved = getattr(self, '_resolved_path_str', None)

if resolved is None:
# Use parent class's __str__ to get path without triggering our override
# This avoids recursion when __str__ calls _get_resolved_str
path_str = super().__str__()
concrete_path = pathlib.Path(path_str)

if (concrete_path / 'PE').exists():
resolved = path_str
elif (concrete_path / 'db' / 'PE').exists():
resolved = str(concrete_path / 'db')
else:
resolved = path_str

self._resolved_path_str = resolved

return resolved

def __truediv__(self, other):
"""Handle path concatenation with backward compatibility."""
if other == 'db':
# User is using the old workaround: database_path / 'db'
# Check if the base path (before resolution) already contains PE/
# If yes, this is the new version and /'db' is redundant
base_path_str = super().__str__()
base_path = pathlib.Path(base_path_str)

if (base_path / 'PE').exists():
# New fixed version: database is at die/db/PE/
warnings.warn(
"Using 'database_path / \"db\"' is deprecated and no longer needed. "
"The database is now directly at 'database_path'. "
"Simply use 'database_path' instead.",
DeprecationWarning,
stacklevel=2
)
return self
# else: Old version, database is at die/db/db/PE/
# The /'db' is necessary, allow it to proceed

# Default behavior: use parent's __truediv__ for normal path concatenation
return super().__truediv__(other)

def __str__(self):
"""Return the resolved database path as a string."""
return self._get_resolved_str()

def __fspath__(self):
"""Return the resolved database path for os.fspath()."""
return self._get_resolved_str()

def exists(self):
"""Check if the resolved database path exists."""
return pathlib.Path(self._get_resolved_str()).exists()

def iterdir(self):
"""Iterate over the resolved database path."""
return pathlib.Path(self._get_resolved_str()).iterdir()


# Initialize database path with smart handling
database_path = _DatabasePath(__path__[0]) / "db"
"""Path to the DIE signature database

This path automatically points to the correct database location,
regardless of how the package is laid out:
- When the database directory is installed directly at die/db/
- When the database directory is installed at die/db/db/

Usage:
# Recommended:
die.scan_file(file, flags, str(die.database_path))

# Legacy code (still works, but may show a deprecation warning):
die.scan_file(file, flags, str(die.database_path / 'db'))
"""


class ScanFlags(enum.IntFlag):
Expand Down
2 changes: 1 addition & 1 deletion python/src/die.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ NB_MODULE(_die, m)
.export_values();

m.doc() = "The native `die` module";
m.attr("__version__") = "0.5.0";
m.attr("__version__") = "0.5.1";

Copilot AI Feb 9, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

m.attr("__version__") is bumped to 0.5.1 here, but the build/package versions still appear to be 0.5.0 (e.g., pyproject.toml and python/CMakeLists.txt). This will lead to inconsistent version reporting (wheel metadata vs die.__version__). Please update all version sources in the repo together or derive the extension version from the project/package version to keep them in sync.

Suggested change
m.attr("__version__") = "0.5.1";
m.attr("__version__") = DIE_VERSION;

Copilot uses AI. Check for mistakes.
m.attr("die_version") = DIE_VERSION;
m.attr("dielib_version") = DIELIB_VERSION;

Expand Down
82 changes: 80 additions & 2 deletions python/tests/test_die.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ def test_constants():
assert die.dielib_version

# validate die database
assert isinstance(die.database_path, pathlib.Path)
assert isinstance(die.database_path, die._DatabasePath)
assert die.database_path.exists()
Comment on lines 21 to 23

Copilot AI Feb 9, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assertion now couples the public API to the private _DatabasePath implementation. If _DatabasePath is meant to be an internal compatibility shim, tests should validate the public contract instead (e.g., os.fspath(die.database_path) returns an existing directory, and it behaves like a PathLike) rather than asserting the internal type.

Copilot uses AI. Check for mistakes.
assert die.database_path.is_dir()

# validate scan flags
assert die._DieFlags.Deepscan.value == die.ScanFlags.DEEP_SCAN
Expand Down Expand Up @@ -156,3 +155,82 @@ def test_basic_databases():
assert isinstance(db, pathlib.Path)
assert db.exists()
assert db.is_file()


def test_database_path_backward_compatibility():
"""Test backward compatibility for database_path usage."""
import warnings

# Test 1: New usage should work without warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
path_new = str(die.database_path)
assert len(w) == 0, "New usage should not produce warnings"

# Test 2: database_path should resolve to a valid location with PE/ directory
db_path = pathlib.Path(path_new)
assert db_path.exists(), f"Database path does not exist: {db_path}"
assert (db_path / 'PE').exists(), f"PE directory not found at {db_path}"

# Test 3: Old usage with /'db' should work through smart path resolution
# The smart path should detect the version and handle accordingly
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
path_old = str(die.database_path / 'db')

# The path should exist in both old and new versions
assert pathlib.Path(path_old).exists(), f"Old usage path doesn't exist: {path_old}"

if len(w) > 0:
# New fixed version: got deprecation warning
assert len(w) == 1
assert issubclass(w[0].category, DeprecationWarning)
assert "database_path" in str(w[0].message).lower()
# In new version, both should resolve to same location
assert pathlib.Path(path_new) == pathlib.Path(path_old)
else:
# Old buggy version: no warning, /'db' is necessary
# In old version, path_old should be die/db/db and path_new should also be die/db/db
assert path_new == path_old


def test_database_path_resolves_correctly():
"""Test that database_path resolves to the actual database location."""
# The resolved path should contain PE/ directory
db_path = pathlib.Path(str(die.database_path))

# Check for PE directory (main signature database)
assert (db_path / 'PE').exists(), f"PE directory not found at {db_path}"

# Check for other expected directories
expected_dirs = ['PE', 'ELF', 'MACH']
for dir_name in expected_dirs:
assert (db_path / dir_name).exists(), \
f"Expected directory {dir_name} not found at {db_path}"


def test_scan_with_explicit_database_path(target_binary: pathlib.Path):
"""Test that scan_file works with explicit database path."""
import warnings

# Test with new usage (no /'db')
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
res = die.scan_file(
target_binary,
die.ScanFlags.DEEP_SCAN,
database=str(die.database_path),
)
Comment on lines +219 to +223

Copilot AI Feb 9, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These multi-line scan_file calls are not formatted consistently with the rest of the file (missing trailing comma on the last argument, so Black would reformat it). Please add trailing commas so formatting is stable and consistent with existing calls above.

Copilot uses AI. Check for mistakes.
assert res
assert isinstance(res, str)

# Test with old usage (with /'db')
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
res = die.scan_file(
target_binary,
die.ScanFlags.DEEP_SCAN,
database=str(die.database_path / 'db'),
)
assert res
assert isinstance(res, str)
4 changes: 2 additions & 2 deletions python/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

TESTS_FOLDER = pathlib.Path(__file__).parent.absolute()
DATA_FOLDER = TESTS_FOLDER / "data"
DB_FOLDER = die.database_path / "db"
DB_FOLDER = die.database_path


def test_issue_48():
def test_issue_28():
# issue https://github.com/elastic/die-python/issues/28
# pr https://github.com/elastic/die-python/pull/30
fpath = DATA_FOLDER / "test.rar"
Expand Down
Loading