Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,20 @@ async def _asave_documents(self) -> None:
raise ValueError("No storage found to save documents.")

def convert_to_path(self, path: Path | str) -> Path:
"""Convert a path to a Path object."""
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
"""Convert a path to a Path object.

Validates that the resolved path stays within the knowledge directory
to prevent path traversal attacks.
"""
if isinstance(path, str):
result = (Path(KNOWLEDGE_DIRECTORY) / path).resolve()
knowledge_dir = Path(KNOWLEDGE_DIRECTORY).resolve()
if not result.is_relative_to(knowledge_dir):
raise ValueError(
f"Path '{path}' is outside the allowed knowledge directory"
)
return result
return path

def _process_file_paths(self) -> list[Path]:
"""Convert file_path to a list of Path objects."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,12 @@ def validate_content(self) -> list[Path | str]:
except Exception as e:
raise ValueError(f"Invalid URL: {path}. Error: {e!s}") from e
else:
local_path = Path(KNOWLEDGE_DIRECTORY + "/" + path)
local_path = (Path(KNOWLEDGE_DIRECTORY) / path).resolve()
knowledge_dir = Path(KNOWLEDGE_DIRECTORY).resolve()
if not local_path.is_relative_to(knowledge_dir):
raise ValueError(
f"Path '{path}' is outside the allowed knowledge directory"
)
if local_path.exists():
processed_paths.append(local_path)
else:
Expand Down
16 changes: 14 additions & 2 deletions lib/crewai/src/crewai/knowledge/source/excel_knowledge_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,20 @@ def _load_content(self) -> dict[Path, dict[str, str]]:
return content_dict

def convert_to_path(self, path: Path | str) -> Path:
"""Convert a path to a Path object."""
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
"""Convert a path to a Path object.

Validates that the resolved path stays within the knowledge directory
to prevent path traversal attacks.
"""
if isinstance(path, str):
result = (Path(KNOWLEDGE_DIRECTORY) / path).resolve()
knowledge_dir = Path(KNOWLEDGE_DIRECTORY).resolve()
if not result.is_relative_to(knowledge_dir):
raise ValueError(
f"Path '{path}' is outside the allowed knowledge directory"
)
return result
return path

def _import_dependencies(self) -> ModuleType:
"""Dynamically import dependencies."""
Expand Down
64 changes: 64 additions & 0 deletions lib/crewai/tests/knowledge/test_knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,70 @@ def test_file_path_validation():
PDFKnowledgeSource()


def test_path_traversal_blocked_in_base_file_knowledge_source(tmpdir):
"""Test that path traversal sequences are blocked in BaseFileKnowledgeSource.convert_to_path()."""
traversal_paths = [
"../../../etc/passwd",
"../../secret_file.txt",
"subdir/../../.env",
"../outside.txt",
]
for malicious_path in traversal_paths:
with pytest.raises(ValueError, match="outside the allowed knowledge directory"):
TextFileKnowledgeSource(file_paths=[malicious_path])


def test_path_traversal_blocked_in_excel_knowledge_source():
"""Test that path traversal sequences are blocked in ExcelKnowledgeSource.convert_to_path()."""
traversal_paths = [
"../../../etc/passwd",
"../../secret_file.xlsx",
"subdir/../../.env",
]
for malicious_path in traversal_paths:
with pytest.raises(ValueError, match="outside the allowed knowledge directory"):
ExcelKnowledgeSource(file_paths=[malicious_path])


def test_path_traversal_blocked_in_crew_docling_source():
"""Test that path traversal sequences are blocked in CrewDoclingSource.validate_content()."""
pytest.importorskip("docling")

traversal_paths = [
"../../../etc/passwd",
"../../secret_file.pdf",
"subdir/../../.env",
]
for malicious_path in traversal_paths:
with pytest.raises(ValueError, match="outside the allowed knowledge directory"):
CrewDoclingSource(file_paths=[malicious_path])


def test_valid_paths_still_work_in_convert_to_path(tmpdir):
"""Test that valid paths within the knowledge directory are accepted."""
import os

knowledge_dir = Path("knowledge")
knowledge_dir.mkdir(exist_ok=True)
test_file = knowledge_dir / "test_valid.txt"
test_file.write_text("test content")

try:
source = TextFileKnowledgeSource(file_paths=[test_file])
assert len(source.safe_file_paths) == 1
finally:
test_file.unlink(missing_ok=True)


def test_path_object_bypasses_traversal_check(tmpdir):
"""Test that Path objects (not strings) bypass the traversal check since they are explicit."""
file_path = Path(tmpdir.join("test.txt"))
file_path.write_text("test content")

source = TextFileKnowledgeSource(file_paths=[file_path])
assert source.safe_file_paths == [file_path]


def test_hash_based_id_generation_without_doc_id(mock_vector_db):
"""Test that documents without doc_id generate hash-based IDs. Duplicates are deduplicated before upsert."""
import hashlib
Expand Down
Loading