Skip to content

Commit 93c1b71

Browse files
perf: Improve performance and make parse tests pass (#46)
- **perf improvement** - **use basic runners** --------- Co-authored-by: Codegen Team (Internal) <135641899+codegen-team@users.noreply.github.com>
1 parent 0a99986 commit 93c1b71

File tree

11 files changed

+124
-58
lines changed

11 files changed

+124
-58
lines changed

.github/actions/report/action.yml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@ inputs:
1010
runs:
1111
using: "composite"
1212
steps:
13-
- name: Upload test results to Codecov
14-
if: ${{ !cancelled() }}
15-
uses: codecov/test-results-action@v1
16-
with:
17-
token: ${{ inputs.codecov_token }}
18-
files: build/test-results/test/TEST.xml
19-
- name: Upload coverage reports to Codecov
20-
if: (success() || failure()) # always upload coverage reports even if the tests fail
21-
continue-on-error: true
22-
uses: codecov/codecov-action@v5.4.3
23-
with:
24-
token: ${{ inputs.codecov_token }}
25-
files: coverage.xml
26-
flags: ${{ inputs.flag }}
27-
plugins: pycoverage,compress-pycoverage
13+
- name: Upload test results to Codecov
14+
if: ${{ !cancelled() }}
15+
uses: codecov/test-results-action@v1
16+
with:
17+
token: ${{ inputs.codecov_token }}
18+
files: build/test-results/test/TEST.xml
19+
- name: Upload coverage reports to Codecov
20+
if: (success() || failure()) # always upload coverage reports even if the tests fail
21+
continue-on-error: true
22+
uses: codecov/codecov-action@v5.4.3
23+
with:
24+
token: ${{ inputs.codecov_token }}
25+
files: coverage.xml
26+
flags: ${{ inputs.flag }}
27+
plugins: pycoverage

.github/codecov.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ cli:
6262
plugins:
6363
pycoverage:
6464
report_type: "json"
65-
include_contexts: true
65+
# include_contexts: true
6666
runners:
6767
pytest:
6868
coverage_root: "./"

.github/workflows/test.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ jobs:
3838
uv run pytest \
3939
-n auto \
4040
--cov src \
41+
--cov-report=json \
4142
--timeout 15 \
4243
-o junit_suite_name="${{github.job}}" \
4344
tests/unit
@@ -92,7 +93,7 @@ jobs:
9293
parse-tests:
9394
needs: access-check
9495
if: contains(github.event.pull_request.labels.*.name, 'parse-tests') || github.event_name == 'push' || github.event_name == 'workflow_dispatch'
95-
runs-on: ubuntu-latest-32
96+
runs-on: ubuntu-latest
9697
steps:
9798
- uses: actions/checkout@v4
9899
with:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ gs notebook
7676
See [Getting Started](https://graph-sitter.com/introduction/getting-started) for a full tutorial.
7777

7878
```
79-
from codegen import Codebase
79+
from graph_sitter import Codebase
8080
```
8181

8282
## Troubleshooting

src/graph_sitter/cli/commands/run/run_local.py

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,50 @@
11
from pathlib import Path
22

33
import rich
4+
import rich.progress
45
from rich.panel import Panel
56
from rich.status import Status
67

78
from graph_sitter.cli.auth.session import CodegenSession
89
from graph_sitter.cli.utils.function_finder import DecoratedFunction
910
from graph_sitter.codebase.config import ProjectConfig
11+
from graph_sitter.codebase.progress.progress import Progress
12+
from graph_sitter.codebase.progress.task import Task
1013
from graph_sitter.core.codebase import Codebase
1114
from graph_sitter.git.repo_operator.repo_operator import RepoOperator
1215
from graph_sitter.git.schemas.repo_config import RepoConfig
1316
from graph_sitter.git.utils.language import determine_project_language
1417
from graph_sitter.shared.enums.programming_language import ProgrammingLanguage
1518

1619

20+
class RichTask(Task):
21+
_task: rich.progress.Task
22+
_progress: rich.progress.Progress
23+
_total: int | None
24+
25+
def __init__(self, task: rich.progress.Task, progress: rich.progress.Progress, total: int | None = None) -> None:
26+
self._task = task
27+
self._progress = progress
28+
self._total = total
29+
30+
def update(self, message: str, count: int | None = None) -> None:
31+
self._progress.update(self._task, description=message, completed=count)
32+
33+
def end(self) -> None:
34+
self._progress.update(self._task, completed=self._total)
35+
36+
37+
class RichProgress(Progress[RichTask]):
38+
_progress: rich.progress.Progress
39+
40+
def __init__(self, progress: rich.progress.Progress) -> None:
41+
self._progress = progress
42+
43+
def begin(self, message: str, count: int | None = None) -> RichTask:
44+
task = self._progress.add_task(description=message, total=count)
45+
return RichTask(task, progress=self._progress, total=count)
46+
47+
1748
def parse_codebase(
1849
repo_path: Path,
1950
subdirectories: list[str] | None = None,
@@ -27,15 +58,24 @@ def parse_codebase(
2758
Returns:
2859
Parsed Codebase object
2960
"""
30-
codebase = Codebase(
31-
projects=[
32-
ProjectConfig(
33-
repo_operator=RepoOperator(repo_config=RepoConfig.from_repo_path(repo_path=repo_path)),
34-
subdirectories=subdirectories,
35-
programming_language=language or determine_project_language(repo_path),
36-
)
37-
]
38-
)
61+
with rich.progress.Progress(
62+
rich.progress.TextColumn("[progress.description]{task.description}"),
63+
rich.progress.BarColumn(bar_width=None),
64+
rich.progress.TaskProgressColumn(),
65+
rich.progress.TimeRemainingColumn(),
66+
rich.progress.TimeElapsedColumn(),
67+
expand=True,
68+
) as progress:
69+
codebase = Codebase(
70+
projects=[
71+
ProjectConfig(
72+
repo_operator=RepoOperator(repo_config=RepoConfig.from_repo_path(repo_path=repo_path)),
73+
subdirectories=subdirectories,
74+
programming_language=language or determine_project_language(repo_path),
75+
)
76+
],
77+
progress=RichProgress(progress),
78+
)
3979
return codebase
4080

4181

@@ -51,12 +91,11 @@ def run_local(
5191
function: The function to run
5292
diff_preview: Number of lines of diff to preview (None for all)
5393
"""
94+
rich.print("Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...")
5495
# Parse codebase and run
55-
with Status(f"[bold]Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...", spinner="dots") as status:
56-
codebase = parse_codebase(repo_path=session.repo_path, subdirectories=function.subdirectories, language=function.language)
57-
status.update("[bold green]✓ Parsed codebase")
58-
59-
status.update("[bold]Running codemod...")
96+
codebase = parse_codebase(repo_path=session.repo_path, subdirectories=function.subdirectories, language=function.language)
97+
with Status("[bold]Running codemod...", spinner="dots") as status:
98+
status.update("")
6099
function.run(codebase) # Run the function
61100
status.update("[bold green]✓ Completed codemod")
62101

src/graph_sitter/codebase/codebase_context.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from collections import Counter, defaultdict
55
from contextlib import contextmanager
66
from enum import IntEnum, auto, unique
7-
from functools import lru_cache
7+
from functools import cached_property, lru_cache
88
from os import PathLike
99
from pathlib import Path
1010
from typing import TYPE_CHECKING, Any
@@ -215,23 +215,19 @@ def __init__(
215215
def __repr__(self):
216216
return self.__class__.__name__
217217

218-
@property
218+
@cached_property
219219
def _graph(self) -> PyDiGraph[Importable, Edge]:
220220
if not self.__graph_ready:
221221
logger.info("Lazily Computing Graph")
222222
self.build_graph(self.projects[0].repo_operator)
223223
return self.__graph
224224

225-
@_graph.setter
226-
def _graph(self, value: PyDiGraph[Importable, Edge]) -> None:
227-
self.__graph = value
228-
229225
@stopwatch
230226
@commiter
231227
def build_graph(self, repo_operator: RepoOperator) -> None:
232228
"""Builds a codebase graph based on the current file state of the given repo operator"""
233229
self.__graph_ready = True
234-
self._graph.clear()
230+
self.__graph.clear()
235231

236232
# =====[ Add all files to the graph in parallel ]=====
237233
syncs = defaultdict(lambda: [])
@@ -492,22 +488,22 @@ def _process_diff_files(self, files_to_sync: Mapping[SyncType, list[Path]], incr
492488
for file_path in files_to_sync[SyncType.REPARSE]:
493489
file = self.get_file(file_path)
494490
file.remove_internal_edges()
495-
496-
task = self.progress.begin("Reparsing updated files", count=len(files_to_sync[SyncType.REPARSE]))
497491
files_to_resolve = []
498-
# Step 4: Reparse updated files
499-
for idx, file_path in enumerate(files_to_sync[SyncType.REPARSE]):
500-
task.update(f"Reparsing {self.to_relative(file_path)}", count=idx)
501-
file = self.get_file(file_path)
502-
to_resolve.extend(file.unparse(reparse=True))
503-
to_resolve = list(filter(lambda node: self.has_node(node.node_id) and node is not None, to_resolve))
504-
file.sync_with_file_content()
505-
files_to_resolve.append(file)
506-
task.end()
492+
if len(files_to_sync[SyncType.REPARSE]) > 0:
493+
task = self.progress.begin("Reparsing updated files", count=len(files_to_sync[SyncType.REPARSE]))
494+
# Step 4: Reparse updated files
495+
for idx, file_path in enumerate(files_to_sync[SyncType.REPARSE]):
496+
task.update(f"Reparsing {self.to_relative(file_path)}", count=idx)
497+
file = self.get_file(file_path)
498+
to_resolve.extend(file.unparse(reparse=True))
499+
to_resolve = list(filter(lambda node: self.has_node(node.node_id) and node is not None, to_resolve))
500+
file.sync_with_file_content()
501+
files_to_resolve.append(file)
502+
task.end()
507503
# Step 5: Add new files as nodes to graph (does not yet add edges)
508-
task = self.progress.begin("Adding new files", count=len(files_to_sync[SyncType.ADD]))
504+
task = self.progress.begin("Parsing new files", count=len(files_to_sync[SyncType.ADD]))
509505
for idx, filepath in enumerate(files_to_sync[SyncType.ADD]):
510-
task.update(f"Adding {self.to_relative(filepath)}", count=idx)
506+
task.update(f"Parsing {self.to_relative(filepath)}", count=idx)
511507
try:
512508
content = self.io.read_text(filepath)
513509
except UnicodeDecodeError as e:
@@ -624,6 +620,10 @@ def get_edges(self) -> list[tuple[NodeId, NodeId, EdgeType, Usage | None]]:
624620
return [(x[0], x[1], x[2].type, x[2].usage) for x in self._graph.weighted_edge_list()]
625621

626622
def get_file(self, file_path: os.PathLike, ignore_case: bool = False) -> SourceFile | None:
623+
# Performance hack: just use the relative path
624+
node_id = self.filepath_idx.get(str(file_path), None)
625+
if node_id is not None:
626+
return self.get_node(node_id)
627627
# If not part of repo path, return None
628628
absolute_path = self.to_absolute(file_path)
629629
if not self.is_subdir(absolute_path) and not self.config.allow_external:
@@ -752,6 +752,7 @@ def to_relative(self, filepath: PathLike | str) -> Path:
752752
return path.relative_to(self.repo_path)
753753
return path
754754

755+
@lru_cache(maxsize=10000)
755756
def is_subdir(self, path: PathLike | str) -> bool:
756757
path = self.to_absolute(path)
757758
return path == Path(self.repo_path) or path.is_relative_to(self.repo_path) or Path(self.repo_path) in path.parents

src/graph_sitter/codebase/io/file_io.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from concurrent.futures import ThreadPoolExecutor
2+
from functools import lru_cache
23
from pathlib import Path
34

45
from graph_sitter.codebase.io.io import IO, BadWriteError
@@ -17,6 +18,7 @@ def __init__(self, allowed_paths: list[Path] | None = None):
1718
self.files = {}
1819
self.allowed_paths = allowed_paths
1920

21+
@lru_cache(maxsize=10000)
2022
def _verify_path(self, path: Path) -> None:
2123
if self.allowed_paths is not None:
2224
if not any(path.resolve().is_relative_to(p.resolve()) for p in self.allowed_paths):

src/graph_sitter/core/file.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -898,14 +898,16 @@ def resolve_name(self, name: str, start_byte: int | None = None, strict: bool =
898898
Symbol | Import | WildcardImport: The resolved symbol, import, or wildcard import that matches
899899
the name and scope requirements. Yields at most one result.
900900
"""
901+
from graph_sitter.core.function import Function
902+
901903
if resolved := self.valid_symbol_names.get(name):
902904
# If we have a start_byte and the resolved symbol is after it,
903905
# we need to look for earlier definitions of the symbol
904906
if start_byte is not None and resolved.end_byte > start_byte:
905907
# Search backwards through symbols to find the most recent definition
906908
# that comes before our start_byte position
907909
for symbol in reversed(self.symbols):
908-
if symbol.start_byte <= start_byte and symbol.name == name:
910+
if symbol.name == name and (start_byte is None or (symbol.start_byte if isinstance(symbol, Class | Function) else symbol.end_byte) <= start_byte):
909911
yield symbol
910912
return
911913
# If strict mode and no valid symbol found, return nothing

src/graph_sitter/core/import_resolution.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,11 @@ def _compute_dependencies(self, usage_type: UsageKind, dest: HasName | None = No
711711
def filepath(self) -> str:
712712
return self.imp.filepath
713713

714+
@property
715+
@noapidoc
716+
def parent(self) -> Editable:
717+
return self.imp.parent
718+
714719

715720
class ExternalImportResolver:
716721
def resolve(self, imp: Import) -> str | None:

src/graph_sitter/core/interfaces/editable.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def __init__(self, ts_node: TSNode, file_node_id: NodeId, ctx: CodebaseContext,
130130
assert (parent.ts_node, parent.__class__) not in seen
131131
seen.add((parent.ts_node, parent.__class__))
132132
parent = parent.parent
133-
if self.file and self.ctx.config.full_range_index:
133+
if self.ctx.config.full_range_index and self.file:
134134
self._add_to_index
135135

136136
def __hash__(self):
@@ -370,7 +370,7 @@ def previous_named_sibling(self) -> Editable[Parent] | None:
370370

371371
return self.parent._parse_expression(previous_named_sibling_node)
372372

373-
@property
373+
@cached_property
374374
def file(self) -> SourceFile:
375375
"""The file object that this Editable instance belongs to.
376376

0 commit comments

Comments
 (0)