diff --git a/.github/actions/report/action.yml b/.github/actions/report/action.yml index 80f1170c4..4a9a92123 100644 --- a/.github/actions/report/action.yml +++ b/.github/actions/report/action.yml @@ -10,18 +10,18 @@ inputs: runs: using: "composite" steps: - - name: Upload test results to Codecov - if: ${{ !cancelled() }} - uses: codecov/test-results-action@v1 - with: - token: ${{ inputs.codecov_token }} - files: build/test-results/test/TEST.xml - - name: Upload coverage reports to Codecov - if: (success() || failure()) # always upload coverage reports even if the tests fail - continue-on-error: true - uses: codecov/codecov-action@v5.4.3 - with: - token: ${{ inputs.codecov_token }} - files: coverage.xml - flags: ${{ inputs.flag }} - plugins: pycoverage,compress-pycoverage + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ inputs.codecov_token }} + files: build/test-results/test/TEST.xml + - name: Upload coverage reports to Codecov + if: (success() || failure()) # always upload coverage reports even if the tests fail + continue-on-error: true + uses: codecov/codecov-action@v5.4.3 + with: + token: ${{ inputs.codecov_token }} + files: coverage.xml + flags: ${{ inputs.flag }} + plugins: pycoverage diff --git a/.github/codecov.yml b/.github/codecov.yml index 009c99717..d918be7c4 100644 --- a/.github/codecov.yml +++ b/.github/codecov.yml @@ -62,7 +62,7 @@ cli: plugins: pycoverage: report_type: "json" - include_contexts: true + # include_contexts: true runners: pytest: coverage_root: "./" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 54f6f66e0..6aa953d41 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -38,6 +38,7 @@ jobs: uv run pytest \ -n auto \ --cov src \ + --cov-report=json \ --timeout 15 \ -o junit_suite_name="${{github.job}}" \ tests/unit @@ -92,7 +93,7 @@ jobs: parse-tests: needs: access-check if: contains(github.event.pull_request.labels.*.name, 'parse-tests') || github.event_name == 'push' || github.event_name == 'workflow_dispatch' - runs-on: ubuntu-latest-32 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: diff --git a/README.md b/README.md index 33e7e6f73..e3cb04bbd 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ gs notebook See [Getting Started](https://graph-sitter.com/introduction/getting-started) for a full tutorial. ``` -from codegen import Codebase +from graph_sitter import Codebase ``` ## Troubleshooting diff --git a/src/graph_sitter/cli/commands/run/run_local.py b/src/graph_sitter/cli/commands/run/run_local.py index 7e0b42da9..f3d7fd73c 100644 --- a/src/graph_sitter/cli/commands/run/run_local.py +++ b/src/graph_sitter/cli/commands/run/run_local.py @@ -1,12 +1,15 @@ from pathlib import Path import rich +import rich.progress from rich.panel import Panel from rich.status import Status from graph_sitter.cli.auth.session import CodegenSession from graph_sitter.cli.utils.function_finder import DecoratedFunction from graph_sitter.codebase.config import ProjectConfig +from graph_sitter.codebase.progress.progress import Progress +from graph_sitter.codebase.progress.task import Task from graph_sitter.core.codebase import Codebase from graph_sitter.git.repo_operator.repo_operator import RepoOperator from graph_sitter.git.schemas.repo_config import RepoConfig @@ -14,6 +17,34 @@ from graph_sitter.shared.enums.programming_language import ProgrammingLanguage +class RichTask(Task): + _task: rich.progress.Task + _progress: rich.progress.Progress + _total: int | None + + def __init__(self, task: rich.progress.Task, progress: rich.progress.Progress, total: int | None = None) -> None: + self._task = task + self._progress = progress + self._total = total + + def update(self, message: str, count: int | None = None) -> None: + self._progress.update(self._task, description=message, completed=count) + + def end(self) -> None: + self._progress.update(self._task, completed=self._total) + + +class RichProgress(Progress[RichTask]): + _progress: rich.progress.Progress + + def __init__(self, progress: rich.progress.Progress) -> None: + self._progress = progress + + def begin(self, message: str, count: int | None = None) -> RichTask: + task = self._progress.add_task(description=message, total=count) + return RichTask(task, progress=self._progress, total=count) + + def parse_codebase( repo_path: Path, subdirectories: list[str] | None = None, @@ -27,15 +58,24 @@ def parse_codebase( Returns: Parsed Codebase object """ - codebase = Codebase( - projects=[ - ProjectConfig( - repo_operator=RepoOperator(repo_config=RepoConfig.from_repo_path(repo_path=repo_path)), - subdirectories=subdirectories, - programming_language=language or determine_project_language(repo_path), - ) - ] - ) + with rich.progress.Progress( + rich.progress.TextColumn("[progress.description]{task.description}"), + rich.progress.BarColumn(bar_width=None), + rich.progress.TaskProgressColumn(), + rich.progress.TimeRemainingColumn(), + rich.progress.TimeElapsedColumn(), + expand=True, + ) as progress: + codebase = Codebase( + projects=[ + ProjectConfig( + repo_operator=RepoOperator(repo_config=RepoConfig.from_repo_path(repo_path=repo_path)), + subdirectories=subdirectories, + programming_language=language or determine_project_language(repo_path), + ) + ], + progress=RichProgress(progress), + ) return codebase @@ -51,12 +91,11 @@ def run_local( function: The function to run diff_preview: Number of lines of diff to preview (None for all) """ + rich.print("Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...") # Parse codebase and run - with Status(f"[bold]Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...", spinner="dots") as status: - codebase = parse_codebase(repo_path=session.repo_path, subdirectories=function.subdirectories, language=function.language) - status.update("[bold green]✓ Parsed codebase") - - status.update("[bold]Running codemod...") + codebase = parse_codebase(repo_path=session.repo_path, subdirectories=function.subdirectories, language=function.language) + with Status("[bold]Running codemod...", spinner="dots") as status: + status.update("") function.run(codebase) # Run the function status.update("[bold green]✓ Completed codemod") diff --git a/src/graph_sitter/codebase/codebase_context.py b/src/graph_sitter/codebase/codebase_context.py index f1edb8be5..f18b6438f 100644 --- a/src/graph_sitter/codebase/codebase_context.py +++ b/src/graph_sitter/codebase/codebase_context.py @@ -4,7 +4,7 @@ from collections import Counter, defaultdict from contextlib import contextmanager from enum import IntEnum, auto, unique -from functools import lru_cache +from functools import cached_property, lru_cache from os import PathLike from pathlib import Path from typing import TYPE_CHECKING, Any @@ -215,23 +215,19 @@ def __init__( def __repr__(self): return self.__class__.__name__ - @property + @cached_property def _graph(self) -> PyDiGraph[Importable, Edge]: if not self.__graph_ready: logger.info("Lazily Computing Graph") self.build_graph(self.projects[0].repo_operator) return self.__graph - @_graph.setter - def _graph(self, value: PyDiGraph[Importable, Edge]) -> None: - self.__graph = value - @stopwatch @commiter def build_graph(self, repo_operator: RepoOperator) -> None: """Builds a codebase graph based on the current file state of the given repo operator""" self.__graph_ready = True - self._graph.clear() + self.__graph.clear() # =====[ Add all files to the graph in parallel ]===== syncs = defaultdict(lambda: []) @@ -492,22 +488,22 @@ def _process_diff_files(self, files_to_sync: Mapping[SyncType, list[Path]], incr for file_path in files_to_sync[SyncType.REPARSE]: file = self.get_file(file_path) file.remove_internal_edges() - - task = self.progress.begin("Reparsing updated files", count=len(files_to_sync[SyncType.REPARSE])) files_to_resolve = [] - # Step 4: Reparse updated files - for idx, file_path in enumerate(files_to_sync[SyncType.REPARSE]): - task.update(f"Reparsing {self.to_relative(file_path)}", count=idx) - file = self.get_file(file_path) - to_resolve.extend(file.unparse(reparse=True)) - to_resolve = list(filter(lambda node: self.has_node(node.node_id) and node is not None, to_resolve)) - file.sync_with_file_content() - files_to_resolve.append(file) - task.end() + if len(files_to_sync[SyncType.REPARSE]) > 0: + task = self.progress.begin("Reparsing updated files", count=len(files_to_sync[SyncType.REPARSE])) + # Step 4: Reparse updated files + for idx, file_path in enumerate(files_to_sync[SyncType.REPARSE]): + task.update(f"Reparsing {self.to_relative(file_path)}", count=idx) + file = self.get_file(file_path) + to_resolve.extend(file.unparse(reparse=True)) + to_resolve = list(filter(lambda node: self.has_node(node.node_id) and node is not None, to_resolve)) + file.sync_with_file_content() + files_to_resolve.append(file) + task.end() # Step 5: Add new files as nodes to graph (does not yet add edges) - task = self.progress.begin("Adding new files", count=len(files_to_sync[SyncType.ADD])) + task = self.progress.begin("Parsing new files", count=len(files_to_sync[SyncType.ADD])) for idx, filepath in enumerate(files_to_sync[SyncType.ADD]): - task.update(f"Adding {self.to_relative(filepath)}", count=idx) + task.update(f"Parsing {self.to_relative(filepath)}", count=idx) try: content = self.io.read_text(filepath) except UnicodeDecodeError as e: @@ -624,6 +620,10 @@ def get_edges(self) -> list[tuple[NodeId, NodeId, EdgeType, Usage | None]]: return [(x[0], x[1], x[2].type, x[2].usage) for x in self._graph.weighted_edge_list()] def get_file(self, file_path: os.PathLike, ignore_case: bool = False) -> SourceFile | None: + # Performance hack: just use the relative path + node_id = self.filepath_idx.get(str(file_path), None) + if node_id is not None: + return self.get_node(node_id) # If not part of repo path, return None absolute_path = self.to_absolute(file_path) if not self.is_subdir(absolute_path) and not self.config.allow_external: @@ -752,6 +752,7 @@ def to_relative(self, filepath: PathLike | str) -> Path: return path.relative_to(self.repo_path) return path + @lru_cache(maxsize=10000) def is_subdir(self, path: PathLike | str) -> bool: path = self.to_absolute(path) return path == Path(self.repo_path) or path.is_relative_to(self.repo_path) or Path(self.repo_path) in path.parents diff --git a/src/graph_sitter/codebase/io/file_io.py b/src/graph_sitter/codebase/io/file_io.py index 1bfb763d0..03b08d465 100644 --- a/src/graph_sitter/codebase/io/file_io.py +++ b/src/graph_sitter/codebase/io/file_io.py @@ -1,4 +1,5 @@ from concurrent.futures import ThreadPoolExecutor +from functools import lru_cache from pathlib import Path from graph_sitter.codebase.io.io import IO, BadWriteError @@ -17,6 +18,7 @@ def __init__(self, allowed_paths: list[Path] | None = None): self.files = {} self.allowed_paths = allowed_paths + @lru_cache(maxsize=10000) def _verify_path(self, path: Path) -> None: if self.allowed_paths is not None: if not any(path.resolve().is_relative_to(p.resolve()) for p in self.allowed_paths): diff --git a/src/graph_sitter/core/file.py b/src/graph_sitter/core/file.py index 600411ce1..3ea971d07 100644 --- a/src/graph_sitter/core/file.py +++ b/src/graph_sitter/core/file.py @@ -898,6 +898,8 @@ def resolve_name(self, name: str, start_byte: int | None = None, strict: bool = Symbol | Import | WildcardImport: The resolved symbol, import, or wildcard import that matches the name and scope requirements. Yields at most one result. """ + from graph_sitter.core.function import Function + if resolved := self.valid_symbol_names.get(name): # If we have a start_byte and the resolved symbol is after it, # we need to look for earlier definitions of the symbol @@ -905,7 +907,7 @@ def resolve_name(self, name: str, start_byte: int | None = None, strict: bool = # Search backwards through symbols to find the most recent definition # that comes before our start_byte position for symbol in reversed(self.symbols): - if symbol.start_byte <= start_byte and symbol.name == name: + if symbol.name == name and (start_byte is None or (symbol.start_byte if isinstance(symbol, Class | Function) else symbol.end_byte) <= start_byte): yield symbol return # If strict mode and no valid symbol found, return nothing diff --git a/src/graph_sitter/core/import_resolution.py b/src/graph_sitter/core/import_resolution.py index 4eb485d43..09589574f 100644 --- a/src/graph_sitter/core/import_resolution.py +++ b/src/graph_sitter/core/import_resolution.py @@ -711,6 +711,11 @@ def _compute_dependencies(self, usage_type: UsageKind, dest: HasName | None = No def filepath(self) -> str: return self.imp.filepath + @property + @noapidoc + def parent(self) -> Editable: + return self.imp.parent + class ExternalImportResolver: def resolve(self, imp: Import) -> str | None: diff --git a/src/graph_sitter/core/interfaces/editable.py b/src/graph_sitter/core/interfaces/editable.py index c7bac1fe1..47e929357 100644 --- a/src/graph_sitter/core/interfaces/editable.py +++ b/src/graph_sitter/core/interfaces/editable.py @@ -130,7 +130,7 @@ def __init__(self, ts_node: TSNode, file_node_id: NodeId, ctx: CodebaseContext, assert (parent.ts_node, parent.__class__) not in seen seen.add((parent.ts_node, parent.__class__)) parent = parent.parent - if self.file and self.ctx.config.full_range_index: + if self.ctx.config.full_range_index and self.file: self._add_to_index def __hash__(self): @@ -370,7 +370,7 @@ def previous_named_sibling(self) -> Editable[Parent] | None: return self.parent._parse_expression(previous_named_sibling_node) - @property + @cached_property def file(self) -> SourceFile: """The file object that this Editable instance belongs to. diff --git a/src/graph_sitter/typescript/namespace.py b/src/graph_sitter/typescript/namespace.py index 524b97a5d..e1bf94833 100644 --- a/src/graph_sitter/typescript/namespace.py +++ b/src/graph_sitter/typescript/namespace.py @@ -1,13 +1,15 @@ from __future__ import annotations -from typing import TYPE_CHECKING, override +from typing import TYPE_CHECKING, Self, override from graph_sitter.compiled.autocommit import reader +from graph_sitter.compiled.resolution import ResolutionStack from graph_sitter.compiled.sort import sort_editables from graph_sitter.compiled.utils import cached_property from graph_sitter.core.autocommit import commiter from graph_sitter.core.autocommit.decorators import writer from graph_sitter.core.export import Export +from graph_sitter.core.interfaces.chainable import Chainable from graph_sitter.core.interfaces.has_attribute import HasAttribute from graph_sitter.core.interfaces.has_name import HasName from graph_sitter.enums import SymbolType @@ -22,7 +24,7 @@ from graph_sitter.typescript.type_alias import TSTypeAlias if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Generator, Sequence from tree_sitter import Node as TSNode @@ -41,7 +43,13 @@ @ts_apidoc -class TSNamespace(TSSymbol, TSHasBlock, HasName, HasAttribute): +class TSNamespace( + TSSymbol, + TSHasBlock, + Chainable, + HasName, + HasAttribute, +): """Representation of a namespace module in TypeScript. Attributes: @@ -398,3 +406,11 @@ def resolve_attribute(self, name: str) -> Symbol | None: The resolved symbol or None if not found """ return self.valid_import_names.get(name, None) + + @override + def _resolved_types(self) -> Generator[ResolutionStack[Self], None, None]: + """Returns the resolved types for this namespace. + + This includes all exports and the namespace itself. + """ + yield ResolutionStack(self)