diff --git a/src/codegen/extensions/tools/list_directory.py b/src/codegen/extensions/tools/list_directory.py index 6774f43a7..b2668c435 100644 --- a/src/codegen/extensions/tools/list_directory.py +++ b/src/codegen/extensions/tools/list_directory.py @@ -140,13 +140,13 @@ def get_directory_info(dir_obj: Directory, current_depth: int) -> DirectoryInfo: """Helper function to get directory info recursively.""" # Get direct files (always include files unless at max depth) all_files = [] - for file in dir_obj.files: + for file in dir_obj.files(recursive=True): if file.directory == dir_obj: all_files.append(file.filepath.split("/")[-1]) # Get direct subdirectories subdirs = [] - for subdir in dir_obj.subdirectories: + for subdir in dir_obj.subdirectories(recursive=True): # Only include direct descendants if subdir.parent == dir_obj: if current_depth > 1 or current_depth == -1: diff --git a/src/codegen/git/repo_operator/repo_operator.py b/src/codegen/git/repo_operator/repo_operator.py index cd6d9b37d..0b66decf1 100644 --- a/src/codegen/git/repo_operator/repo_operator.py +++ b/src/codegen/git/repo_operator/repo_operator.py @@ -571,7 +571,11 @@ def delete_file(self, path: str) -> None: def get_filepaths_for_repo(self, ignore_list): # Get list of files to iterate over based on gitignore setting if self.repo_config.respect_gitignore: - filepaths = self.git_cli.git.ls_files().split("\n") + # ls-file flags: + # -c: show cached files + # -o: show other / untracked files + # --exclude-standard: exclude standard gitignore rules + filepaths = self.git_cli.git.ls_files("-co", "--exclude-standard").split("\n") else: filepaths = glob.glob("**", root_dir=self.repo_path, recursive=True, include_hidden=True) # Filter filepaths by ignore list. diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index f6c333e02..5b59480ea 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -46,7 +46,7 @@ from codegen.sdk.core.dataclasses.usage import Usage from codegen.sdk.core.expressions import Expression from codegen.sdk.core.external_module import ExternalModule - from codegen.sdk.core.file import SourceFile + from codegen.sdk.core.file import File, SourceFile from codegen.sdk.core.interfaces.importable import Importable from codegen.sdk.core.node_id_factory import NodeId from codegen.sdk.core.parser import Parser @@ -343,33 +343,15 @@ def prune_graph(self) -> None: self.remove_node(module.node_id) self._ext_module_idx.pop(module._idx_key, None) - def build_directory_tree(self, files: list[SourceFile]) -> None: + def build_directory_tree(self) -> None: """Builds the directory tree for the codebase""" # Reset and rebuild the directory tree self.directories = dict() - created_dirs = set() - for file in files: - directory = self.get_directory(file.path.parent, create_on_missing=True) - directory.add_file(file) - file._set_directory(directory) - created_dirs.add(file.path.parent) - - def _dir_has_file(filepath): - gen = os.scandir(filepath) - while entry := next(gen, None): - if entry.is_file(): - return True - return False - - for ctx in self.projects: - for rel_filepath in ctx.repo_operator.get_filepaths_for_repo(GLOBAL_FILE_IGNORE_LIST): - abs_filepath = self.to_absolute(rel_filepath) - if not abs_filepath.is_dir(): - abs_filepath = abs_filepath.parent - - if abs_filepath not in created_dirs and self.is_subdir(abs_filepath) and _dir_has_file(abs_filepath): - directory = self.get_directory(abs_filepath, create_on_missing=True) - created_dirs.add(abs_filepath) + + for file_path, _ in self.projects[0].repo_operator.iter_files(subdirs=self.projects[0].subdirectories, ignore_list=GLOBAL_FILE_IGNORE_LIST): + file_path = Path(file_path) + directory = self.get_directory(file_path.parent, create_on_missing=True) + directory._add_file(file_path.name) def get_directory(self, directory_path: PathLike, create_on_missing: bool = False, ignore_case: bool = False) -> Directory | None: """Returns the directory object for the given path, or None if the directory does not exist. @@ -397,16 +379,16 @@ def get_directory(self, directory_path: PathLike, create_on_missing: bool = Fals # Base Case if str(absolute_path) == str(self.repo_path) or str(absolute_path) == str(parent_path): - root_directory = Directory(path=absolute_path, dirpath="", parent=None) + root_directory = Directory(ctx=self, path=absolute_path, dirpath="") self.directories[absolute_path] = root_directory return root_directory # Recursively create the parent directory parent = self.get_directory(parent_path, create_on_missing=True) # Create the directory - directory = Directory(path=absolute_path, dirpath=str(self.to_relative(absolute_path)), parent=parent) + directory = Directory(ctx=self, path=absolute_path, dirpath=str(self.to_relative(absolute_path))) # Add the directory to the parent - parent.add_subdirectory(directory) + parent._add_subdirectory(directory.name) # Add the directory to the tree self.directories[absolute_path] = directory return directory @@ -512,7 +494,7 @@ def _process_diff_files(self, files_to_sync: Mapping[SyncType, list[Path]], incr # Step 6: Build directory tree logger.info("> Building directory tree") files = [f for f in sort_editables(self.get_nodes(NodeType.FILE), alphabetical=True, dedupe=False)] - self.build_directory_tree(files) + self.build_directory_tree() # Step 7: Build configs if self.config_parser is not None: @@ -611,13 +593,20 @@ def get_file(self, file_path: os.PathLike, ignore_case: bool = False) -> SourceF if node_id is not None: return self.get_node(node_id) if ignore_case: - parent = self.to_absolute(file_path).parent - if parent == Path(self.repo_path): - for file in self.to_absolute(self.repo_path).iterdir(): - if str(file_path).lower() == str(self.to_absolute(file)).lower(): - return self.get_file(file, ignore_case=False) - if directory := self.get_directory(parent, ignore_case=ignore_case): - return directory.get_file(os.path.basename(file_path), ignore_case=ignore_case) + # Using `get_directory` so that the case insensitive lookup works + parent = self.get_directory(self.to_absolute(file_path).parent, ignore_case=ignore_case).path + for file in parent.iterdir(): + if str(file_path).lower() == str(self.to_relative(file)).lower(): + return self.get_file(file, ignore_case=False) + + def _get_raw_file_from_path(self, path: Path) -> File | None: + from codegen.sdk.core.file import File + + try: + return File.from_content(path, self.io.read_text(path), self, sync=False) + except UnicodeDecodeError: + # Handle when file is a binary file + return File.from_content(path, self.io.read_bytes(path), self, sync=False, binary=True) def get_external_module(self, module: str, import_name: str) -> ExternalModule | None: node_id = self._ext_module_idx.get(module + "::" + import_name, None) diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index 090e87c50..1979e5f89 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -522,37 +522,24 @@ def get_file(self, filepath: str, *, optional: bool = False, ignore_case: bool = Raises: ValueError: If file not found and optional=False. """ - - def get_file_from_path(path: Path) -> File | None: - try: - return File.from_content(path, self.ctx.io.read_text(path), self.ctx, sync=False) - except UnicodeDecodeError: - # Handle when file is a binary file - return File.from_content(path, self.ctx.io.read_bytes(path), self.ctx, sync=False, binary=True) - # Try to get the file from the graph first file = self.ctx.get_file(filepath, ignore_case=ignore_case) if file is not None: return file + # If the file is not in the graph, check the filesystem absolute_path = self.ctx.to_absolute(filepath) - if absolute_path.suffix in self.ctx.extensions and not self.ctx.io.file_exists(absolute_path): - return None if self.ctx.io.file_exists(absolute_path): - return get_file_from_path(absolute_path) - elif ignore_case: - parent = absolute_path.parent - if parent == Path(self.ctx.repo_path): - for file in self.ctx.to_absolute(self.ctx.repo_path).iterdir(): - if str(absolute_path).lower() == str(file).lower(): - return get_file_from_path(file) - else: - dir = self.ctx.get_directory(parent, ignore_case=ignore_case) - if dir is None: - return None - for file in dir.path.iterdir(): - if str(absolute_path).lower() == str(file).lower(): - return get_file_from_path(file) - elif not optional: + return self.ctx._get_raw_file_from_path(absolute_path) + # If the file is not in the graph, check the filesystem + if absolute_path.parent.exists(): + for file in absolute_path.parent.iterdir(): + if ignore_case and str(absolute_path).lower() == str(file).lower(): + return self.ctx._get_raw_file_from_path(file) + elif not ignore_case and str(absolute_path) == str(file): + return self.ctx._get_raw_file_from_path(file) + + # If we get here, the file is not found + if not optional: msg = f"File {filepath} not found in codebase. Use optional=True to return None instead." raise ValueError(msg) return None diff --git a/src/codegen/sdk/core/directory.py b/src/codegen/sdk/core/directory.py index eb54adde0..5193015ff 100644 --- a/src/codegen/sdk/core/directory.py +++ b/src/codegen/sdk/core/directory.py @@ -2,9 +2,11 @@ import os from collections.abc import Iterator from pathlib import Path -from typing import Generic, Self +from typing import TYPE_CHECKING, Generic, Literal, Self +from codegen.sdk._proxy import proxy_property from codegen.sdk.core.interfaces.has_symbols import ( + FilesParam, HasSymbols, TClass, TFile, @@ -15,11 +17,17 @@ TSymbol, ) from codegen.sdk.core.utils.cache_utils import cached_generator +from codegen.sdk.enums import NodeType +from codegen.sdk.extensions.sort import sort_editables from codegen.shared.decorators.docs import apidoc, noapidoc logger = logging.getLogger(__name__) +if TYPE_CHECKING: + from codegen.sdk.codebase.codebase_context import CodebaseContext + + @apidoc class Directory( HasSymbols[TFile, TSymbol, TImportStatement, TGlobalVar, TClass, TFunction, TImport], @@ -36,21 +44,23 @@ class Directory( items: A dictionary containing files and subdirectories within the directory. """ + ctx: "CodebaseContext" path: Path # Absolute Path dirpath: str # Relative Path - parent: Self | None - items: dict[str, TFile | Self] + _files: list[str] # List of file names + _subdirectories: list[str] # List of subdirectory names - def __init__(self, path: Path, dirpath: str, parent: Self | None): + def __init__(self, ctx: "CodebaseContext", path: Path, dirpath: str): + self.ctx = ctx self.path = path self.dirpath = dirpath - self.parent = parent - self.items = {} + self._files = [] + self._subdirectories = [] def __iter__(self): - return iter(self.items.values()) + return iter(self.items) - def _is_a_subdirectory_of(self, target_directory: "Directory"): + def _is_a_subdirectory_of(self, target_directory: Self): """Checks whether this directory is a subdirectory of another directory""" if self.parent == target_directory: return True @@ -59,36 +69,38 @@ def _is_a_subdirectory_of(self, target_directory: "Directory"): return self.parent._is_a_subdirectory_of(target_directory=target_directory) def __contains__(self, item: str | TFile | Self) -> bool: + from codegen.sdk.core.file import File + + # Try to match all file and subdirectory names if isinstance(item, str): - return item in self.items + if item in self.item_names: + return True + # Try to match all subdirectories elif isinstance(item, Directory): - return item._is_a_subdirectory_of(self) - else: - # It could only ever be a file here, at least according to item's types... - match item.directory: - case None: - return False - case _ if item.directory == self: - return True - case _: - return item.directory._is_a_subdirectory_of(self) + if item.name in [directory.name for directory in self.subdirectories]: + return True + # Try to match all files + elif isinstance(item, File): + if item.name in [file.name for file in self.files(extensions="*")]: + return True - def __len__(self) -> int: - return len(self.items) + # Attempt to match recursively + for directory in self.subdirectories(recursive=False): + if item in directory: + return True - def __getitem__(self, item_name: str) -> TFile | Self: - return self.items[item_name] + # If no match, return False + return False - def __setitem__(self, item_name: str, item: TFile | Self) -> None: - self.items[item_name] = item + def __len__(self) -> int: + # Using item names here as items will cause an infinite loop + return len(self.item_names) - def __delitem__(self, item_name: str) -> None: - del self.items[item_name] - msg = f"Item {item_name} not found in directory {self.dirpath}" - raise KeyError(msg) + def __getitem__(self, item_name: str) -> TFile | Self: + return next((item for item in self.items if item.name == item_name), None) def __repr__(self) -> str: - return f"Directory({self.dirpath}, {self.items.keys()})" + return f"Directory(name='{self.name}', items={self.item_names})" @property def name(self) -> str: @@ -101,98 +113,140 @@ def name(self) -> str: """ return os.path.basename(self.dirpath) - @property - def files(self) -> list[TFile]: - """Get a recursive list of all files in the directory and its subdirectories.""" + @proxy_property + def files(self, *, extensions: list[str] | Literal["*"] | None = None, recursive: bool = False) -> list[TFile]: + """Gets a list of all top level files in the directory. + + Set `recursive=True` to get all files recursively. + + By default, this only returns source files. Setting `extensions='*'` will return all files, and + `extensions=[...]` will return all files with the specified extensions. + + For Python and Typescript repos WITH file parsing enabled, + `extensions='*'` is REQUIRED for listing all non source code files. + Or else, codebase.files will ONLY return source files (e.g. .py, .ts). + + For repos with file parsing disabled or repos with other languages, this will return all files in the codebase. + + Returns all Files in the codebase, sorted alphabetically. For Python codebases, returns PyFiles (python files). + For Typescript codebases, returns TSFiles (typescript files). + + Returns: + list[TSourceFile]: A sorted list of source files in the codebase. + """ + # If there are no source files, return ALL files + if len(self.ctx.get_nodes(NodeType.FILE)) == 0: + extensions = "*" + # If extensions is not set, use the extensions from the codebase + elif extensions is None: + extensions = self.ctx.extensions + files = [] + for file_name in self._files: + if extensions == "*": + files.append(self.get_file(file_name)) + elif extensions is not None: + if any(file_name.endswith(ext) for ext in extensions): + files.append(self.get_file(file_name)) - def _get_files(directory: Directory): - for item in directory.items.values(): - if isinstance(item, Directory): - _get_files(item) - else: - files.append(item) + if recursive: + for directory in self.subdirectories: + files.extend(directory.files(extensions=extensions, recursive=True)) - _get_files(self) - return files + return sort_editables(files, alphabetical=True, dedupe=False) - @property - def subdirectories(self) -> list[Self]: - """Get a recursive list of all subdirectories in the directory and its subdirectories.""" + @proxy_property + def subdirectories(self, recursive: bool = False) -> list[Self]: + """Get a list of all top level subdirectories in the directory. + + Set `recursive=True` to get all subdirectories recursively. + + Returns: + list[Directory]: A sorted list of subdirectories in the directory. + """ subdirectories = [] + for directory_name in self._subdirectories: + subdirectories.append(self.get_subdirectory(directory_name)) - def _get_subdirectories(directory: Directory): - for item in directory.items.values(): - if isinstance(item, Directory): - subdirectories.append(item) - _get_subdirectories(item) + if recursive: + for directory in self.subdirectories: + subdirectories.extend(directory.subdirectories(recursive=True)) - _get_subdirectories(self) - return subdirectories + return sorted(subdirectories, key=lambda x: x.name) - @noapidoc - @cached_generator() - def files_generator(self) -> Iterator[TFile]: - """Yield files recursively from the directory.""" - yield from self.files + @proxy_property + def items(self, recursive: bool = False) -> list[Self | TFile]: + """Get a list of all files and subdirectories in the directory. - # Directory-specific methods - def add_file(self, file: TFile) -> None: - """Add a file to the directory.""" - rel_path = os.path.relpath(file.file_path, self.dirpath) - self.items[rel_path] = file + Set `recursive=True` to get all files and subdirectories recursively. - def remove_file(self, file: TFile) -> None: - """Remove a file from the directory.""" - rel_path = os.path.relpath(file.file_path, self.dirpath) - del self.items[rel_path] + Returns: + list[Self | TFile]: A sorted list of files and subdirectories in the directory. + """ + return self.files(extensions="*", recursive=recursive) + self.subdirectories(recursive=recursive) - def remove_file_by_path(self, file_path: os.PathLike) -> None: - """Remove a file from the directory by its path.""" - rel_path = str(Path(file_path).relative_to(self.dirpath)) - del self.items[rel_path] + @property + def item_names(self, recursive: bool = False) -> list[str]: + """Get a list of all file and subdirectory names in the directory. - def get_file(self, filename: str, ignore_case: bool = False) -> TFile | None: - """Get a file by its name relative to the directory.""" - from codegen.sdk.core.file import File + Set `recursive=True` to get all file and subdirectory names recursively. - if ignore_case: - return next( - (f for name, f in self.items.items() if name.lower() == filename.lower() and isinstance(f, File)), - None, - ) - return self.items.get(filename, None) + Returns: + list[str]: A list of file and subdirectory names in the directory. + """ + return self._files + self._subdirectories - def add_subdirectory(self, subdirectory: Self) -> None: - """Add a subdirectory to the directory.""" - rel_path = os.path.relpath(subdirectory.dirpath, self.dirpath) - self.items[rel_path] = subdirectory + @property + def tree(self) -> list[Self | TFile]: + """Get a recursive list of all files and subdirectories in the directory. - def remove_subdirectory(self, subdirectory: Self) -> None: - """Remove a subdirectory from the directory.""" - rel_path = os.path.relpath(subdirectory.dirpath, self.dirpath) - del self.items[rel_path] + Returns: + list[Self | TFile]: A recursive list of files and subdirectories in the directory. + """ + return self.items(recursive=True) - def remove_subdirectory_by_path(self, subdirectory_path: str) -> None: - """Remove a subdirectory from the directory by its path.""" - rel_path = os.path.relpath(subdirectory_path, self.dirpath) - del self.items[rel_path] + @property + def parent(self) -> Self | None: + """Get the parent directory of the current directory.""" + return self.ctx.get_directory(self.path.parent) + + @noapidoc + @cached_generator() + def files_generator(self, *args: FilesParam.args, **kwargs: FilesParam.kwargs) -> Iterator[TFile]: + """Yield files recursively from the directory.""" + yield from self.files(*args, extensions="*", **kwargs, recursive=True) + + def get_file(self, filename: str, ignore_case: bool = False) -> TFile | None: + """Get a file by its name relative to the directory.""" + file_path = os.path.join(self.dirpath, filename) + absolute_path = self.ctx.to_absolute(file_path) + # Try to get the file from the graph first + file = self.ctx.get_file(file_path, ignore_case=ignore_case) + if file is not None: + return file + # If the file is not in the graph, check the filesystem + for file in absolute_path.parent.iterdir(): + if ignore_case and str(absolute_path).lower() == str(file).lower(): + return self.ctx._get_raw_file_from_path(file) + elif not ignore_case and str(absolute_path) == str(file): + return self.ctx._get_raw_file_from_path(file) + return None def get_subdirectory(self, subdirectory_name: str) -> Self | None: """Get a subdirectory by its name (relative to the directory).""" - return self.items.get(subdirectory_name, None) + return self.ctx.get_directory(os.path.join(self.dirpath, subdirectory_name)) def update_filepath(self, new_filepath: str) -> None: """Update the filepath of the directory and its contained files.""" old_path = self.dirpath new_path = new_filepath - for file in self.files: + for file in self.files(recursive=True): new_file_path = os.path.join(new_path, os.path.relpath(file.file_path, old_path)) file.update_filepath(new_file_path) def remove(self) -> None: """Remove all the files in the files container.""" - for f in self.files: + for f in self.files(recursive=True): f.remove() def rename(self, new_name: str) -> None: @@ -200,3 +254,11 @@ def rename(self, new_name: str) -> None: parent_dir, _ = os.path.split(self.dirpath) new_path = os.path.join(parent_dir, new_name) self.update_filepath(new_path) + + def _add_file(self, file_name: str) -> None: + """Add a file to the directory.""" + self._files.append(file_name) + + def _add_subdirectory(self, subdirectory_name: str) -> None: + """Add a subdirectory to the directory.""" + self._subdirectories.append(subdirectory_name) diff --git a/src/codegen/sdk/core/file.py b/src/codegen/sdk/core/file.py index e5f0836f3..5520481f5 100644 --- a/src/codegen/sdk/core/file.py +++ b/src/codegen/sdk/core/file.py @@ -65,7 +65,6 @@ class File(Editable[None]): file_path: str path: Path node_type: Literal[NodeType.FILE] = NodeType.FILE - _directory: Directory | None _pending_imports: set[str] _binary: bool = False _range_index: RangeIndex @@ -80,7 +79,6 @@ def __init__(self, filepath: PathLike, ctx: CodebaseContext, ts_node: TSNode | N self.path = self.ctx.to_absolute(filepath) self.file_path = str(self.ctx.to_relative(self.path)) self.name = self.path.stem - self._directory = None self._binary = binary @property @@ -178,11 +176,7 @@ def directory(self) -> Directory | None: Returns: Directory | None: The directory containing this file, or None if the file is not in any directory. """ - return self._directory - - @noapidoc - def _set_directory(self, directory: Directory | None) -> None: - self._directory = directory + return self.ctx.get_directory(self.path.parent) @property def is_binary(self) -> bool: @@ -441,7 +435,6 @@ def __init__(self, ts_node: TSNode, filepath: PathLike, ctx: CodebaseContext) -> super().__init__(filepath, ctx, ts_node=ts_node) self._nodes.clear() self.ctx.filepath_idx[self.file_path] = self.node_id - self._directory = None self._pending_imports = set() try: self.parse(ctx) diff --git a/src/codegen/sdk/typescript/ts_config.py b/src/codegen/sdk/typescript/ts_config.py index ead6434b7..2c51b312e 100644 --- a/src/codegen/sdk/typescript/ts_config.py +++ b/src/codegen/sdk/typescript/ts_config.py @@ -188,14 +188,14 @@ def _precompute_import_aliases(self): continue # With the directory, try to grab the next available file and get its tsconfig. - if reference_dir and reference_dir.files: - next_file: TSFile = reference_dir.files[0] + if reference_dir and reference_dir.files(recursive=True): + next_file: TSFile = reference_dir.files(recursive=True)[0] else: - logger.warning(f"No next file found for reference during self_reference_import_aliases computation in _precompute_import_aliases: {reference.filepath}") + logger.warning(f"No next file found for reference during self_reference_import_aliases computation in _precompute_import_aliases: {reference.dirpath}") continue target_ts_config = next_file.ts_config if target_ts_config is None: - logger.warning(f"No tsconfig found for reference during self_reference_import_aliases computation in _precompute_import_aliases: {reference.filepath}") + logger.warning(f"No tsconfig found for reference during self_reference_import_aliases computation in _precompute_import_aliases: {reference.dirpath}") continue # With the tsconfig, grab its rootDirs and outDir diff --git a/tests/unit/codegen/sdk/core/test_directory.py b/tests/unit/codegen/sdk/core/test_directory.py index 32725ccc3..9d5e5fed4 100644 --- a/tests/unit/codegen/sdk/core/test_directory.py +++ b/tests/unit/codegen/sdk/core/test_directory.py @@ -1,227 +1,200 @@ -import os -import types from pathlib import Path -from unittest.mock import MagicMock -import pytest - -from codegen.configs.models.codebase import DefaultCodebaseConfig -from codegen.sdk.codebase.codebase_context import CodebaseContext from codegen.sdk.codebase.factory.get_session import get_codebase_session -from codegen.sdk.core.directory import Directory -from codegen.sdk.core.file import File from codegen.shared.enums.programming_language import ProgrammingLanguage -@pytest.fixture -def mock_codebase_context(tmp_path): - mock = MagicMock(spec=CodebaseContext) - mock.transaction_manager = MagicMock() - mock.config = DefaultCodebaseConfig - mock.repo_path = tmp_path - mock.to_absolute = types.MethodType(CodebaseContext.to_absolute, mock) - mock.to_relative = types.MethodType(CodebaseContext.to_relative, mock) - mock.io = MagicMock() - return mock - - -@pytest.fixture -def subdir_path(tmp_path): - return tmp_path / "mock_dir" / "subdir" - - -@pytest.fixture -def dir_path(tmp_path): - return tmp_path / "mock_dir" - - -@pytest.fixture -def sub_dir(subdir_path, tmp_path): - return Directory(path=subdir_path.absolute(), dirpath=str(subdir_path.relative_to(tmp_path)), parent=None) - - -@pytest.fixture -def mock_file(dir_path, mock_codebase_context): - return File(filepath=dir_path / "example.py", ctx=mock_codebase_context) - - -@pytest.fixture -def mock_directory(tmp_path, dir_path, sub_dir, mock_file): - directory = Directory(path=dir_path.absolute(), dirpath=str(dir_path.relative_to(tmp_path)), parent=None) - directory.add_file(mock_file) - directory.add_subdirectory(sub_dir) - return directory - - -def test_directory_init(tmp_path, mock_directory): - """Test initialization of Directory object.""" - assert mock_directory.path == tmp_path / "mock_dir" - assert mock_directory.dirpath == "mock_dir" - assert mock_directory.parent is None - assert len(mock_directory.items) == 2 - assert mock_directory.items["subdir"] is not None - assert mock_directory.items["example.py"] is not None - - -def test_name_property(mock_directory): - """Test name property returns the basename of the dirpath.""" - assert mock_directory.name == "mock_dir" - - -def test_add_and_file(mock_directory, mock_codebase_context): - """Test adding a file to the directory.""" - mock_file = File(filepath=Path("mock_dir/example_2.py"), ctx=mock_codebase_context) - mock_directory.add_file(mock_file) - rel_path = os.path.relpath(mock_file.file_path, mock_directory.dirpath) - assert rel_path in mock_directory.items - assert mock_directory.items[rel_path] is mock_file - - -def test_remove_file(mock_directory, mock_file): - """Test removing a file from the directory.""" - mock_directory.remove_file(mock_file) - - rel_path = os.path.relpath(mock_file.file_path, mock_directory.dirpath) - assert rel_path not in mock_directory.items - - -def test_remove_file_by_path(mock_directory, mock_file): - """Test removing a file by path.""" - mock_directory.remove_file_by_path(Path(mock_file.file_path)) - - rel_path = os.path.relpath(mock_file.file_path, mock_directory.dirpath) - assert rel_path not in mock_directory.items - - -def test_get_file(mock_directory, mock_file): - """Test retrieving a file by name.""" - retrieved_file = mock_directory.get_file("example.py") - assert retrieved_file is mock_file - - # Case-insensitive match - retrieved_file_ci = mock_directory.get_file("EXAMPLE.PY", ignore_case=True) - assert retrieved_file_ci is mock_file - - -def test_get_file_not_found(mock_directory): - """Test retrieving a non-existing file returns None.""" - assert mock_directory.get_file("nonexistent.py") is None - - -def test_add_subdirectory(mock_directory, dir_path): - """Test adding a subdirectory.""" - new_subdir_path = dir_path / "new_subdir" - subdir = Directory(path=new_subdir_path.absolute(), dirpath=str(new_subdir_path.relative_to(dir_path)), parent=mock_directory) - mock_directory.add_subdirectory(subdir) - rel_path = os.path.relpath(subdir.dirpath, mock_directory.dirpath) - assert rel_path in mock_directory.items - assert mock_directory.items[rel_path] is subdir - - -def test_remove_subdirectory(mock_directory, sub_dir): - """Test removing a subdirectory.""" - mock_directory.add_subdirectory(sub_dir) - mock_directory.remove_subdirectory(sub_dir) - - rel_path = os.path.relpath(sub_dir.dirpath, mock_directory.dirpath) - assert rel_path not in mock_directory.items - - -def test_remove_subdirectory_by_path(mock_directory, sub_dir): - """Test removing a subdirectory by path.""" - mock_directory.remove_subdirectory_by_path(sub_dir.dirpath) - - rel_path = os.path.relpath(sub_dir.dirpath, mock_directory.dirpath) - assert rel_path not in mock_directory.items - - -def test_get_subdirectory(mock_directory, sub_dir): - """Test retrieving a subdirectory by name.""" - retrieved_subdir = mock_directory.get_subdirectory("subdir") - assert retrieved_subdir is sub_dir - - -def test_files_property(mock_directory, sub_dir, mock_codebase_context): - """Test the 'files' property returns all files recursively.""" - all_files = mock_directory.files - assert len(all_files) == 1 - - new_file = File(filepath=Path("mock_dir/example_2.py"), ctx=mock_codebase_context) - sub_dir.add_file(new_file) - - all_files = mock_directory.files - assert len(all_files) == 2 - assert new_file in all_files - - gen = mock_directory.files_generator() - files_list = list(gen) - assert len(files_list) == 2 - assert new_file in files_list - - -def test_subdirectories_property(mock_directory, sub_dir): - """Test the 'subdirectories' property returns all directories recursively.""" - all_subdirs = mock_directory.subdirectories - assert len(all_subdirs) == 1 - assert sub_dir in all_subdirs - - new_sub_dir = Directory(path=sub_dir.path / "new_subdir", dirpath=str(Path(sub_dir.dirpath) / "new_subdir"), parent=sub_dir) - sub_dir.add_subdirectory(new_sub_dir) - - all_subdirs = mock_directory.subdirectories - assert len(all_subdirs) == 2 - assert new_sub_dir in all_subdirs - - -def test_update_filepath(mock_directory, mock_codebase_context, mock_file): - """Test updating file paths when the directory path changes.""" - mock_directory.update_filepath("/absolute/new_mock_dir") - - # Verify the files have updated file paths - mock_codebase_context.transaction_manager.add_file_rename_transaction.assert_called_once_with(mock_file, "/absolute/new_mock_dir/example.py") +def test_directory_init(tmpdir) -> None: + with get_codebase_session( + tmpdir=tmpdir, + files={"mock_dir/example.py": "", "mock_dir/subdir/empty.py": ""}, + programming_language=ProgrammingLanguage.PYTHON, + ) as codebase: + # Get the directory and check its attributes + directory = codebase.get_directory("mock_dir") + assert directory.path == Path(tmpdir) / "mock_dir" + assert directory.dirpath == "mock_dir" + assert directory.parent is not None + assert len(directory.items) == 2 + assert set(directory.item_names) == {"example.py", "subdir"} -def test_remove(mock_directory, sub_dir, mock_codebase_context, mock_file): - mock_directory.remove() +def test_name_property(tmpdir) -> None: + with get_codebase_session( + tmpdir=tmpdir, + files={"mock_dir/example.py": ""}, + programming_language=ProgrammingLanguage.PYTHON, + ) as codebase: + # Get the directory and check its name + directory = codebase.get_directory("mock_dir") + assert directory.name == "mock_dir" - mock_codebase_context.transaction_manager.add_file_remove_transaction.assert_called_once_with(mock_file) +def test_add_and_file(tmpdir) -> None: + with get_codebase_session( + tmpdir=tmpdir, + files={"mock_dir/example.py": ""}, + programming_language=ProgrammingLanguage.PYTHON, + ) as codebase: + # Create a new file + codebase.create_file("mock_dir/example_2.py", "print('Hello, world!')") + codebase.commit() -def test_rename(mock_directory, mock_codebase_context, mock_file): - """Test renaming the directory.""" - mock_directory.rename("renamed_dir") - # This fails because it is not implemented to rename the directory itself. - # assert mock_directory.dirpath == "/absolute/renamed_dir" - mock_codebase_context.transaction_manager.add_file_rename_transaction.assert_called_once_with(mock_file, "renamed_dir/example.py") + # Check that the file was added + directory = codebase.get_directory("mock_dir") + assert len(directory.files) == 2 + assert "example_2.py" in directory.item_names -def test_iteration(mock_directory): - """Test iterating over the directory items.""" - items = list(mock_directory) # uses Directory.__iter__ - assert len(items) == 2 - assert mock_directory.items["subdir"] in items - assert mock_directory.items["example.py"] in items +def test_remove_file(tmpdir) -> None: + with get_codebase_session( + tmpdir=tmpdir, + files={"mock_dir/example.py": ""}, + programming_language=ProgrammingLanguage.PYTHON, + ) as codebase: + # Remove the file + file = codebase.get_file("mock_dir/example.py") + file.remove() + codebase.commit() + # Check that the file was removed + directory = codebase.get_directory("mock_dir", optional=True) + assert directory is None -def test_contains(mock_directory): - """Test the containment checks using the 'in' operator.""" - assert "subdir" in mock_directory - assert "example.py" in mock_directory +def test_get_file(tmpdir) -> None: + with get_codebase_session( + tmpdir=tmpdir, + files={"mock_dir/example.py": ""}, + programming_language=ProgrammingLanguage.PYTHON, + ) as codebase: + # Get the directory and get the file + directory = codebase.get_directory("mock_dir") + mock_file = codebase.get_file("mock_dir/example.py") + retrieved_file = directory.get_file("example.py") + assert retrieved_file is mock_file + + # Case-insensitive match + retrieved_file_ci = directory.get_file("EXAMPLE.PY", ignore_case=True) + assert retrieved_file_ci is mock_file + + +def test_get_file_not_found(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/example.py": ""}) as codebase: + # Get the directory and check that the file is not found + directory = codebase.get_directory("mock_dir") + assert directory.get_file("nonexistent.py", ignore_case=True) is None + + +def test_add_subdirectory(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/example.py": ""}) as codebase: + # Get the directory and add a file in a subdirectory + directory = codebase.get_directory("mock_dir") + codebase.create_file("mock_dir/subdir/example.py", "print('Hello, world!')") + codebase.commit() + + # Get the directory and check that the file is in the subdirectory + directory = codebase.get_directory("mock_dir") + assert directory.get_file("subdir/example.py") is not None + + # Get the subdirectory and check that the file is in the subdirectory + subdir = codebase.get_directory("mock_dir/subdir") + assert subdir.get_file("example.py") is not None + + +def test_remove_subdirectory(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/subdir/empty.py": ""}) as codebase: + # Get the directory and remove the subdirectory + directory = codebase.get_directory("mock_dir") + subdir = codebase.get_directory("mock_dir/subdir") + directory.remove() + codebase.commit() + + # Check that the subdirectory was removed + assert codebase.get_directory("mock_dir/subdir", optional=True) is None + + +def test_get_subdirectory(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/subdir/empty.py": ""}) as codebase: + # Get the directory and get the subdirectory + directory = codebase.get_directory("mock_dir") + sub_dir = codebase.get_directory("mock_dir/subdir") + retrieved_subdir = directory.get_subdirectory("subdir") + assert retrieved_subdir is sub_dir + + +def test_update_filepath(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/example.py": ""}) as codebase: + # Get the directory and update the filepath + directory = codebase.get_directory("mock_dir") + directory.update_filepath("new_mock_dir/new_mock_subdir") + codebase.commit() + + # Check that the directory was updated + directory = codebase.get_directory("new_mock_dir/new_mock_subdir") + assert directory is not None + assert codebase.get_directory("mock_dir", optional=True) is None + assert directory.dirpath == "new_mock_dir/new_mock_subdir" + assert directory.name == "new_mock_subdir" + assert directory.path == Path(tmpdir) / "new_mock_dir/new_mock_subdir" + assert directory.parent is not None + assert len(directory.items) == 1 + assert "example.py" in directory.item_names + + # Check that the file was updated + file = codebase.get_file("new_mock_dir/new_mock_subdir/example.py") + assert file is not None + assert codebase.get_file("mock_dir/example.py", optional=True) is None + + +def test_remove(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/example.py": ""}) as codebase: + # Get the directory and remove it + directory = codebase.get_directory("mock_dir") + directory.remove() + codebase.commit() + + # Check that the directory was removed + assert codebase.get_directory("mock_dir", optional=True) is None + assert codebase.get_file("mock_dir/example.py", optional=True) is None + + +def test_rename(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/example.py": ""}) as codebase: + # Get the directory and rename it + directory = codebase.get_directory("mock_dir") + directory.rename("renamed_dir") + codebase.commit() + # Check that the directory was renamed + directory = codebase.get_directory("renamed_dir") + assert directory is not None + assert codebase.get_directory("mock_dir", optional=True) is None + assert directory.dirpath == "renamed_dir" + assert directory.name == "renamed_dir" + assert directory.path == Path(tmpdir) / "renamed_dir" + assert directory.parent is not None + assert len(directory.items) == 1 + assert "example.py" in directory.item_names + + # Check that the file was renamed + file = codebase.get_file("renamed_dir/example.py") + assert file is not None + assert codebase.get_file("mock_dir/example.py", optional=True) is None -def test_len(mock_directory): - """Test the __len__ method returns the number of items.""" - assert len(mock_directory) == 2 +def test_contains(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/example.py": "", "mock_dir/subdir/empty.py": ""}) as codebase: + # Get the directory and check the contains + directory = codebase.get_directory("mock_dir") + assert "subdir" in directory + assert "example.py" in directory -def test_get_set_delete_item(mock_directory): - """Test __getitem__, __setitem__, and __delitem__ methods.""" - mock_file = mock_directory.items["example.py"] - mock_directory["example.py"] = mock_file - assert mock_directory["example.py"] == mock_file - with pytest.raises(KeyError, match="subdir_2"): - del mock_directory["subdir_2"] +def test_len(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"mock_dir/example.py": "", "mock_dir/subdir/empty.py": ""}) as codebase: + # Get the directory and check the length + directory = codebase.get_directory("mock_dir") + assert len(directory) == 2 def test_unicode_in_filename(tmpdir) -> None: @@ -231,110 +204,7 @@ def test_unicode_in_filename(tmpdir) -> None: programming_language=ProgrammingLanguage.PYTHON, verify_output=True, ) as codebase: + # Get the file and check the content file = codebase.get_file("test/我很喜欢冰激淋/test-file 12'3_🍦.py") assert file is not None assert file.content == "print('Hello, world!')" - - -def test_contains_dirs_and_files(tmpdir) -> None: - # language=python - with get_codebase_session( - tmpdir=tmpdir, - files={ - "file0.py": "", - "main_dir/file1.py": "", - "main_dir/file2.py": "", - "main_dir/sub_dir/file3.py": "", - "main_dir/sub_dir/sub_sub_dir/file4.py": "", - "main_dir/sub_dir/sub_sub_dir/file5.py": "", - "main_dir/sub_dir/sub_sub_dir/sub_sub_sub_dir/file6.py": "", - "main_dir/sub_dir/sub_sub_dir/sub_sub_sub_dir/sub_sub_sub_sub_dir/sub_sub_sub_sub_sub_dir/file7.py": "", - "lonely_dir/file_lonely.py": "", - }, - ) as codebase: - file0 = codebase.get_file("file0.py") - main_dir = codebase.get_directory("main_dir") - file1 = codebase.get_file("main_dir/file1.py") - file2 = codebase.get_file("main_dir/file2.py") - sub_dir = codebase.get_directory("main_dir/sub_dir") - file3 = codebase.get_file("main_dir/sub_dir/file3.py") - sub_sub_dir = codebase.get_directory("main_dir/sub_dir/sub_sub_dir") - file4 = codebase.get_file("main_dir/sub_dir/sub_sub_dir/file4.py") - file5 = codebase.get_file("main_dir/sub_dir/sub_sub_dir/file5.py") - sub_sub_sub_dir = codebase.get_directory("main_dir/sub_dir/sub_sub_dir/sub_sub_sub_dir") - file6 = codebase.get_file("main_dir/sub_dir/sub_sub_dir/sub_sub_sub_dir/file6.py") - sub_sub_sub_sub_dir = codebase.get_directory("main_dir/sub_dir/sub_sub_dir/sub_sub_sub_dir/sub_sub_sub_sub_dir") - sub_sub_sub_sub_sub_dir = codebase.get_directory("main_dir/sub_dir/sub_sub_dir/sub_sub_sub_dir/sub_sub_sub_sub_dir/sub_sub_sub_sub_sub_dir") - file7 = codebase.get_file("main_dir/sub_dir/sub_sub_dir/sub_sub_sub_dir/sub_sub_sub_sub_dir/sub_sub_sub_sub_sub_dir/file7.py") - directory_stack = main_dir.subdirectories - directory_stack.append(main_dir) - main_directory_stack_no_root = directory_stack - file_stack = [file7] - for directory in directory_stack: - # ignore self - if directory != sub_sub_sub_sub_sub_dir: - assert sub_sub_sub_sub_sub_dir in directory - else: - # A dir is not in itself! - assert sub_sub_sub_sub_sub_dir not in directory - for file in file_stack: - assert file in directory - - directory_stack.remove(sub_sub_sub_sub_sub_dir) - - for directory in directory_stack: - if directory != sub_sub_sub_sub_dir: - assert sub_sub_sub_sub_dir in directory - - for file in file_stack: - assert file in directory - - directory_stack.remove(sub_sub_sub_sub_dir) - file_stack.append(file6) - - for directory in directory_stack: - if directory != sub_sub_sub_dir: - assert sub_sub_sub_dir in directory - - for file in file_stack: - assert file in directory - - directory_stack.remove(sub_sub_sub_dir) - file_stack.append(file5) - file_stack.append(file4) - - for directory in directory_stack: - if directory != sub_sub_dir: - assert sub_sub_dir in directory - - for file in file_stack: - assert file in directory - - directory_stack.remove(sub_sub_dir) - file_stack.append(file3) - - for directory in directory_stack: - if directory != sub_dir: - assert sub_dir in directory - for file in file_stack: - assert file in directory - - directory_stack.remove(sub_dir) - file_stack.append(file2) - file_stack.append(file1) - - for directory in directory_stack: - if directory != main_dir: - assert main_dir in directory - for file in file_stack: - assert file in directory - - lonely_dir = codebase.get_directory("lonely_dir") - lonely_file = codebase.get_file("lonely_dir/file_lonely.py") - - for directory in main_directory_stack_no_root: - assert file0 not in directory - assert lonely_dir not in directory - assert lonely_file not in directory - - assert lonely_file in lonely_dir diff --git a/tests/unit/codegen/sdk/python/directory/test_directory.py b/tests/unit/codegen/sdk/python/directory/test_directory.py index 30176e20e..2b65fddfc 100644 --- a/tests/unit/codegen/sdk/python/directory/test_directory.py +++ b/tests/unit/codegen/sdk/python/directory/test_directory.py @@ -57,6 +57,7 @@ class C: assert directory.get_file("file1.py") is not None assert directory.get_file("file2.py") is not None assert directory.get_file("file3.py") is not None + # Test ignore_case assert directory.get_file("FILE1.PY", ignore_case=True) is not None assert directory.get_file("file1.py", ignore_case=True) is not None @@ -98,9 +99,12 @@ class B: directory = codebase.get_directory("dir") assert directory.name == "dir" assert directory.parent == codebase.get_directory("") - assert len(directory.files) == 2 + assert len(directory.files) == 1 assert len(directory.subdirectories) == 1 - assert {f.filepath for f in directory.files} == {"dir/file1.py", "dir/subdir/file2.py"} + assert len(directory.files(recursive=True)) == 2 + assert len(directory.tree) == 3 + assert {f.filepath for f in directory.files} == {"dir/file1.py"} + assert {f.filepath for f in directory.files(recursive=True)} == {"dir/file1.py", "dir/subdir/file2.py"} assert directory.symbols == codebase.symbols assert directory.global_vars == codebase.global_vars assert directory.classes == codebase.classes @@ -150,11 +154,28 @@ class B: def test_subdirectories_listing_odd_filetypes(tmpdir) -> None: # language=python - files = {"docs/sub/test_fil``e1.mdx": "", "docs/sub/file2.txt": "", "docs/test.py": "", "docs/py/test.py": "", "docs/json/test.json": "", "docs/rand/odd/1.py": "", "docs/rand/even/2.txt": ""} - expected_tree = {"docs/rand": ["odd", "even"], "docs/rand/odd": ["1.py"], "docs/rand/even": [], "docs/py": ["test.py"], "docs/json": [], "docs/sub": []} + files = { + "docs/test.py": "", + "docs/py/test.py": "", + "docs/json/test.json": "", + "docs/sub/test_file1.mdx": "", + "docs/sub/file2.txt": "", + "docs/rand/odd/1.py": "", + "docs/rand/even/2.txt": "", + } + expected_tree = { + "": {"docs"}, + "docs": {"sub", "rand", "py", "json", "test.py"}, + "docs/py": {"test.py"}, + "docs/json": {"test.json"}, + "docs/sub": {"test_file1.mdx", "file2.txt"}, + "docs/rand": {"odd", "even"}, + "docs/rand/odd": {"1.py"}, + "docs/rand/even": {"2.txt"}, + } with get_codebase_session(tmpdir=tmpdir, files=files) as codebase: docs = codebase.get_directory("docs") - subdirectories = docs.subdirectories + subdirectories = docs.subdirectories(recursive=True) for subdir in subdirectories: assert subdir.dirpath in expected_tree.keys() - assert list(subdir.items.keys()) == expected_tree[subdir.dirpath] + assert set(subdir.item_names) == expected_tree[subdir.dirpath] diff --git a/tests/unit/codegen/sdk/python/directory/test_directory_remove.py b/tests/unit/codegen/sdk/python/directory/test_directory_remove.py index 7514fa920..307949649 100644 --- a/tests/unit/codegen/sdk/python/directory/test_directory_remove.py +++ b/tests/unit/codegen/sdk/python/directory/test_directory_remove.py @@ -23,7 +23,7 @@ def c(): assert codebase.get_file("dir/subdir/file3.py") is not None assert codebase.get_directory("dir") is not None assert codebase.get_directory("dir/subdir") is not None - assert {f.filepath for f in codebase.get_directory("dir").files} == {"dir/file1.py", "dir/file2.py", "dir/subdir/file3.py"} + assert {f.filepath for f in codebase.get_directory("dir").files(recursive=True)} == {"dir/file1.py", "dir/file2.py", "dir/subdir/file3.py"} assert {d.dirpath for d in codebase.directories} == {"", "dir", "dir/subdir"} codebase.get_directory("dir").remove() codebase.commit()