Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ dependencies = [
"colorlog>=6.9.0",
"langsmith",
"langchain-xai>=0.2.1",
"codegen-sdk-pink>=0.1.0",
]

license = { text = "Apache-2.0" }
Expand Down Expand Up @@ -172,6 +173,7 @@ dev-dependencies = [
[tool.uv.workspace]
exclude = ["codegen-examples"]


[tool.cython-lint]
max-line-length = 200

Expand Down
12 changes: 12 additions & 0 deletions src/codegen/configs/models/codebase.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
from enum import IntEnum, auto

from pydantic import Field

from codegen.configs.models.base_config import BaseConfig


class PinkMode(IntEnum):
# Use the python SDK for all files
OFF = auto()
# Use the Rust SDK for all files
ALL_FILES = auto()
# Use the Rust SDK for files the python SDK can't parse (non-source files)
NON_SOURCE_FILES = auto()


class CodebaseConfig(BaseConfig):
def __init__(self, prefix: str = "CODEBASE", *args, **kwargs) -> None:
super().__init__(prefix=prefix, *args, **kwargs)
Expand All @@ -25,6 +36,7 @@ def __init__(self, prefix: str = "CODEBASE", *args, **kwargs) -> None:
ts_language_engine: bool = False
v8_ts_engine: bool = False
unpacking_assignment_partial_removal: bool = True
use_pink: PinkMode = PinkMode.OFF


DefaultCodebaseConfig = CodebaseConfig()
4 changes: 2 additions & 2 deletions src/codegen/sdk/codebase/codebase_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from rustworkx import PyDiGraph, WeightedEdgeList

from codegen.configs.models.codebase import CodebaseConfig
from codegen.configs.models.codebase import CodebaseConfig, PinkMode
from codegen.configs.models.secrets import SecretsConfig
from codegen.sdk.codebase.config import ProjectConfig, SessionOptions
from codegen.sdk.codebase.config_parser import ConfigParser, get_config_parser_for_language
Expand Down Expand Up @@ -189,7 +189,7 @@ def __init__(
logger.warning("Some features may not work as expected. Advanced static analysis will be disabled but simple file IO will still work.")

# Build the graph
if not self.config.exp_lazy_graph:
if not self.config.exp_lazy_graph and self.config.use_pink != PinkMode.ALL_FILES:
self.build_graph(context.repo_operator)
try:
self.synced_commit = context.repo_operator.head_commit
Expand Down
21 changes: 20 additions & 1 deletion src/codegen/sdk/core/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from rich.console import Console
from typing_extensions import TypeVar, deprecated

from codegen.configs.models.codebase import CodebaseConfig
from codegen.configs.models.codebase import CodebaseConfig, PinkMode
from codegen.configs.models.secrets import SecretsConfig
from codegen.git.repo_operator.repo_operator import RepoOperator
from codegen.git.schemas.enums import CheckoutResult, SetupOption
Expand Down Expand Up @@ -212,6 +212,10 @@ def __init__(
self.repo_path = Path(self._op.repo_path)
self.ctx = CodebaseContext(projects, config=config, secrets=secrets, io=io, progress=progress)
self.console = Console(record=True, soft_wrap=True)
if self.ctx.config.use_pink != PinkMode.OFF:
import codegen_sdk_pink

self._pink_codebase = codegen_sdk_pink.Codebase(self.repo_path)

@noapidoc
def __str__(self) -> str:
Expand Down Expand Up @@ -297,6 +301,8 @@ def files(self, *, extensions: list[str] | Literal["*"] | None = None) -> list[T
Returns:
list[TSourceFile]: A sorted list of source files in the codebase.
"""
if self.ctx.config.use_pink == PinkMode.ALL_FILES:
return self._pink_codebase.files
if extensions is None and len(self.ctx.get_nodes(NodeType.FILE)) > 0:
# If extensions is None AND there is at least one file in the codebase (This checks for unsupported languages or parse-off repos),
# Return all source files
Expand Down Expand Up @@ -528,6 +534,12 @@ def has_file(self, filepath: str, ignore_case: bool = False) -> bool:
Returns:
bool: True if the file exists in the codebase, False otherwise.
"""
if self.ctx.config.use_pink == PinkMode.ALL_FILES:
absolute_path = self.ctx.to_absolute(filepath)
return self._pink_codebase.has_file(absolute_path)
if self.ctx.config.use_pink == PinkMode.NON_SOURCE_FILES:
if self._pink_codebase.has_file(filepath):
return True
return self.get_file(filepath, optional=True, ignore_case=ignore_case) is not None

@overload
Expand All @@ -550,13 +562,20 @@ def get_file(self, filepath: str, *, optional: bool = False, ignore_case: bool =
Raises:
ValueError: If file not found and optional=False.
"""
if self.ctx.config.use_pink == PinkMode.ALL_FILES:
absolute_path = self.ctx.to_absolute(filepath)
return self._pink_codebase.get_file(absolute_path)
# Try to get the file from the graph first
file = self.ctx.get_file(filepath, ignore_case=ignore_case)
if file is not None:
return file

# If the file is not in the graph, check the filesystem
absolute_path = self.ctx.to_absolute(filepath)
if self.ctx.io.file_exists(absolute_path):
if self.ctx.config.use_pink != PinkMode.OFF:
if file := self._pink_codebase.get_file(absolute_path):
return file
return self.ctx._get_raw_file_from_path(absolute_path)
# If the file is not in the graph, check the filesystem
if absolute_path.parent.exists():
Expand Down
191 changes: 191 additions & 0 deletions tests/unit/codegen/sdk/codebase/file/test_file_pink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import os
import sys

import pytest

from codegen.configs.models.codebase import PinkMode
from codegen.sdk.codebase.config import TestFlags
from codegen.sdk.codebase.factory.get_session import get_codebase_session
from codegen.sdk.core.file import File, SourceFile
from codegen.shared.enums.programming_language import ProgrammingLanguage

Config = TestFlags.model_copy(update=dict(use_pink=PinkMode.ALL_FILES))


@pytest.mark.xfail(reason="Blocked on CG-11949")
def test_file(tmpdir) -> None:
file1_source = "Hello world!"
file2_source = "print(123)"
file3_source = b"\x89PNG"
with get_codebase_session(tmpdir=tmpdir, files={"file1.txt": file1_source, "file2.py": file2_source, "file3.bin": file3_source}, config=Config) as codebase:
file1 = codebase.get_file("file1.txt")
assert isinstance(file1, File)
assert not isinstance(file1, SourceFile)
assert file1 is not None
assert file1.filepath == "file1.txt"
assert file1.content == file1_source
assert file1.is_binary is False

file2 = codebase.get_file("file2.py")
assert isinstance(file2, SourceFile)
assert file2 is not None
assert file2.filepath == "file2.py"
assert file2.content == file2_source
assert file2.is_binary is False

file3 = codebase.get_file("file3.bin")
assert isinstance(file3, File)
assert not isinstance(file3, SourceFile)
assert file3 is not None
assert file3.filepath == "file3.bin"
assert file3.is_binary is True
assert file3.content_bytes == file3_source
with pytest.raises(ValueError):
codebase.get_file("file4.txt")
with pytest.raises(ValueError):
codebase.get_directory("file4/")


@pytest.mark.xfail(reason="Blocked on CG-11949")
def test_codebase_files(tmpdir) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG", "file4": "Hello world!"}, config=Config) as codebase:
file1 = codebase.get_file("file1.py")
file2 = codebase.get_file("file2.py")
file3 = codebase.get_file("file3.bin")
file4 = codebase.get_file("file4")

assert len(codebase.files) == 2
assert {f for f in codebase.files} == {file1, file2}

assert len(codebase.files(extensions="*")) == 4
assert {f for f in codebase.files(extensions="*")} == {file1, file2, file3, file4}

assert len(codebase.files(extensions=[".py"])) == 2
assert {f for f in codebase.files(extensions=[".py"])} == {file1, file2}

assert len(codebase.files(extensions=[".bin"])) == 1
assert {f for f in codebase.files(extensions=[".bin"])} == {file3}


@pytest.mark.xfail(reason="Blocked on CG-11949")
def test_codebase_files_other_language(tmpdir) -> None:
with get_codebase_session(
tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG", "file4": "Hello world!"}, programming_language=ProgrammingLanguage.OTHER, config=Config
) as codebase:
file1 = codebase.get_file("file1.py")
file2 = codebase.get_file("file2.py")
file3 = codebase.get_file("file3.bin")
file4 = codebase.get_file("file4")

assert len(codebase.files) == 4 # Match all files if the language is OTHER
assert {f for f in codebase.files} == {file1, file2, file3, file4}

assert len(codebase.files(extensions="*")) == 4
assert {f for f in codebase.files(extensions="*")} == {file1, file2, file3, file4}

assert len(codebase.files(extensions=[".py"])) == 2
assert {f for f in codebase.files(extensions=[".py"])} == {file1, file2}

assert len(codebase.files(extensions=[".bin"])) == 1
assert {f for f in codebase.files(extensions=[".bin"])} == {file3}


@pytest.mark.skipif(sys.platform == "darwin", reason="macOS is case-insensitive")
@pytest.mark.xfail(reason="Blocked on CG-11949")
def test_file_extensions_ignore_case(tmpdir) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG", "file4": "Hello world!"}, config=Config) as codebase:
file1 = codebase.get_file("file1.py")
file2 = codebase.get_file("file2.py")
file3 = codebase.get_file("file3.bin")
file4 = codebase.get_file("file4")

assert len(codebase.files(extensions=[".pyi"])) == 0
assert {f for f in codebase.files(extensions=[".pyi"])} == set()
# Test ignore_case
file1_upper = codebase.get_file("FILE1.PY", ignore_case=True)
assert file1_upper is not None
assert file1_upper == file1

file2_mixed = codebase.get_file("FiLe2.Py", ignore_case=True)
assert file2_mixed is not None
assert file2_mixed == file2

file3_upper = codebase.get_file("FILE3.BIN", ignore_case=True)
assert file3_upper is not None
assert file3_upper == file3

# Test ignore_case=False (default)
assert codebase.get_file("FILE1.PY", ignore_case=False, optional=True) is None
assert codebase.get_file("FiLe2.Py", ignore_case=False, optional=True) is None
assert codebase.get_file("FILE3.BIN", ignore_case=False, optional=True) is None


@pytest.mark.skipif(sys.platform == "darwin", reason="macOS is case-insensitive")
@pytest.mark.xfail(reason="Blocked on CG-11949")
def test_file_case_sensitivity_has_file(tmpdir) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG"}, config=Config) as codebase:
# Test has_file with ignore_case=True
assert codebase.has_file("file1.py", ignore_case=True)
assert codebase.has_file("FILE1.PY", ignore_case=True)
assert codebase.has_file("FiLe1.Py", ignore_case=True)
assert codebase.has_file("file2.py", ignore_case=True)
assert codebase.has_file("FILE2.PY", ignore_case=True)
assert codebase.has_file("FiLe2.Py", ignore_case=True)
assert codebase.has_file("file3.bin", ignore_case=True)
assert codebase.has_file("FILE3.BIN", ignore_case=True)
assert codebase.has_file("FiLe3.BiN", ignore_case=True)

# Test has_file with ignore_case=False (default)
assert codebase.has_file("file1.py", ignore_case=False)
assert not codebase.has_file("FILE1.PY", ignore_case=False)
assert not codebase.has_file("FiLe1.Py", ignore_case=False)
assert codebase.has_file("file2.py", ignore_case=False)
assert not codebase.has_file("FILE2.PY", ignore_case=False)
assert not codebase.has_file("FiLe2.Py", ignore_case=False)
assert codebase.has_file("file3.bin", ignore_case=False)
assert not codebase.has_file("FILE3.BIN", ignore_case=False)
assert not codebase.has_file("FiLe3.BiN", ignore_case=False)


@pytest.mark.skipif(sys.platform == "darwin", reason="macOS is case-insensitive")
@pytest.mark.xfail(reason="Blocked on CG-11949")
def test_file_case_sensitivity_get_file(tmpdir) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG"}, config=Config) as codebase:
file1 = codebase.get_file("file1.py")
file2 = codebase.get_file("file2.py")
file3 = codebase.get_file("file3.bin")

# Test get_file with ignore_case=True
assert codebase.get_file("FILE1.PY", ignore_case=True) == file1
assert codebase.get_file("FiLe1.Py", ignore_case=True) == file1
assert codebase.get_file("FILE2.PY", ignore_case=True) == file2
assert codebase.get_file("FiLe2.Py", ignore_case=True) == file2
assert codebase.get_file("FILE3.BIN", ignore_case=True) == file3
assert codebase.get_file("FiLe3.BiN", ignore_case=True) == file3

# Test get_file with ignore_case=False (default)
assert codebase.get_file("FILE1.PY", ignore_case=False, optional=True) is None
assert codebase.get_file("FiLe1.Py", ignore_case=False, optional=True) is None
assert codebase.get_file("FILE2.PY", ignore_case=False, optional=True) is None
assert codebase.get_file("FiLe2.Py", ignore_case=False, optional=True) is None
assert codebase.get_file("FILE3.BIN", ignore_case=False, optional=True) is None
assert codebase.get_file("FiLe3.BiN", ignore_case=False, optional=True) is None


def test_minified_file(tmpdir) -> None:
with get_codebase_session(
tmpdir=tmpdir,
files={
"file1.min.js": "console.log(123)",
"file2.js": open(f"{os.path.dirname(__file__)}/example.min.js").read(),
},
programming_language=ProgrammingLanguage.TYPESCRIPT,
config=Config,
) as codebase:
# This should match the `*.min.js` pattern
file1 = codebase.ctx.get_file("file1.min.js")
assert file1 is None

# This should match the maximum line length threshold
file2 = codebase.ctx.get_file("file2.js")
assert file2 is None
Loading
Loading