diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py
index 1979e5f89..359830e22 100644
--- a/src/codegen/sdk/core/codebase.py
+++ b/src/codegen/sdk/core/codebase.py
@@ -5,6 +5,7 @@
import logging
import os
import re
+import tempfile
from collections.abc import Generator
from contextlib import contextmanager
from functools import cached_property
@@ -1298,6 +1299,135 @@ def from_repo(
logger.exception(f"Failed to initialize codebase: {e}")
raise
+ @classmethod
+ def from_string(
+ cls,
+ code: str,
+ *,
+ language: Literal["python", "typescript"] | ProgrammingLanguage,
+ ) -> "Codebase":
+ """Creates a Codebase instance from a string of code.
+
+ Args:
+ code: String containing code
+ language: Language of the code. Defaults to Python.
+
+ Returns:
+ Codebase: A Codebase instance initialized with the provided code
+
+ Example:
+ >>> # Python code
+ >>> code = "def add(a, b): return a + b"
+ >>> codebase = Codebase.from_string(code, language="python")
+
+ >>> # TypeScript code
+ >>> code = "function add(a: number, b: number): number { return a + b; }"
+ >>> codebase = Codebase.from_string(code, language="typescript")
+ """
+ if not language:
+ msg = "missing required argument language"
+ raise TypeError(msg)
+
+ logger.info("Creating codebase from string")
+
+ # Determine language and filename
+ prog_lang = ProgrammingLanguage(language.upper()) if isinstance(language, str) else language
+ filename = "test.ts" if prog_lang == ProgrammingLanguage.TYPESCRIPT else "test.py"
+
+ # Create codebase using factory
+ from codegen.sdk.codebase.factory.codebase_factory import CodebaseFactory
+
+ files = {filename: code}
+
+ with tempfile.TemporaryDirectory(prefix="codegen_") as tmp_dir:
+ logger.info(f"Using directory: {tmp_dir}")
+ codebase = CodebaseFactory.get_codebase_from_files(repo_path=tmp_dir, files=files, programming_language=prog_lang)
+ logger.info("Codebase initialization complete")
+ return codebase
+
+ @classmethod
+ def from_files(
+ cls,
+ files: dict[str, str],
+ *,
+ language: Literal["python", "typescript"] | ProgrammingLanguage | None = None,
+ ) -> "Codebase":
+ """Creates a Codebase instance from multiple files.
+
+ Args:
+ files: Dictionary mapping filenames to their content, e.g. {"main.py": "print('hello')"}
+ language: Optional language override. If not provided, will be inferred from file extensions.
+ All files must have extensions matching the same language.
+
+ Returns:
+ Codebase: A Codebase instance initialized with the provided files
+
+ Raises:
+ ValueError: If file extensions don't match a single language or if explicitly provided
+ language doesn't match the extensions
+
+ Example:
+ >>> # Language inferred as Python
+ >>> files = {"main.py": "print('hello')", "utils.py": "def add(a, b): return a + b"}
+ >>> codebase = Codebase.from_files(files)
+
+ >>> # Language inferred as TypeScript
+ >>> files = {"index.ts": "console.log('hello')", "utils.tsx": "export const App = () =>
Hello
"}
+ >>> codebase = Codebase.from_files(files)
+ """
+ # Create codebase using factory
+ from codegen.sdk.codebase.factory.codebase_factory import CodebaseFactory
+
+ if not files:
+ msg = "No files provided"
+ raise ValueError(msg)
+
+ logger.info("Creating codebase from files")
+
+ prog_lang = ProgrammingLanguage.PYTHON # Default language
+
+ if files:
+ py_extensions = {".py"}
+ ts_extensions = {".ts", ".tsx", ".js", ".jsx"}
+
+ extensions = {os.path.splitext(f)[1].lower() for f in files}
+ inferred_lang = None
+
+ # all check to ensure that the from_files method is being used for small testing purposes only.
+ # If parsing an actual repo, it should not be used. Instead do Codebase("path/to/repo")
+ if all(ext in py_extensions for ext in extensions):
+ inferred_lang = ProgrammingLanguage.PYTHON
+ elif all(ext in ts_extensions for ext in extensions):
+ inferred_lang = ProgrammingLanguage.TYPESCRIPT
+ else:
+ msg = f"Cannot determine single language from extensions: {extensions}. Files must all be Python (.py) or TypeScript (.ts, .tsx, .js, .jsx)"
+ raise ValueError(msg)
+
+ if language is not None:
+ explicit_lang = ProgrammingLanguage(language.upper()) if isinstance(language, str) else language
+ if explicit_lang != inferred_lang:
+ msg = f"Provided language {explicit_lang} doesn't match inferred language {inferred_lang} from file extensions"
+ raise ValueError(msg)
+
+ prog_lang = inferred_lang
+ else:
+ # Default to Python if no files provided
+ prog_lang = ProgrammingLanguage.PYTHON if language is None else (ProgrammingLanguage(language.upper()) if isinstance(language, str) else language)
+
+ logger.info(f"Using language: {prog_lang}")
+
+ with tempfile.TemporaryDirectory(prefix="codegen_") as tmp_dir:
+ logger.info(f"Using directory: {tmp_dir}")
+
+ # Initialize git repo to avoid "not in a git repository" error
+ import subprocess
+
+ subprocess.run(["git", "init"], cwd=tmp_dir, check=True, capture_output=True)
+
+ codebase = CodebaseFactory.get_codebase_from_files(repo_path=tmp_dir, files=files, programming_language=prog_lang)
+ logger.info("Codebase initialization complete")
+ return codebase
+
def get_modified_symbols_in_pr(self, pr_id: int) -> tuple[str, dict[str, str], list[str]]:
"""Get all modified symbols in a pull request"""
pr = self._op.get_pull_request(pr_id)
diff --git a/tests/unit/codegen/sdk/codebase/session/test_codebase_from_files.py b/tests/unit/codegen/sdk/codebase/session/test_codebase_from_files.py
new file mode 100644
index 000000000..5415b0ffc
--- /dev/null
+++ b/tests/unit/codegen/sdk/codebase/session/test_codebase_from_files.py
@@ -0,0 +1,65 @@
+import pytest
+
+from codegen.sdk.core.codebase import Codebase
+
+
+def test_from_files_python():
+ """Test creating a Python codebase from multiple files"""
+ files = {"main.py": "from utils import add\nprint(add(1, 2))", "utils.py": "def add(a, b):\n return a + b"}
+ # Language is optional, will be inferred
+ codebase = Codebase.from_files(files)
+ assert len(codebase.files) == 2
+ assert any(f.filepath.endswith("main.py") for f in codebase.files)
+ assert any(f.filepath.endswith("utils.py") for f in codebase.files)
+ assert any("from utils import add" in f.source for f in codebase.files)
+
+
+def test_from_files_typescript():
+ """Test creating a TypeScript codebase from multiple files"""
+ files = {"index.ts": "import { add } from './utils';\nconsole.log(add(1, 2));", "utils.ts": "export function add(a: number, b: number): number {\n return a + b;\n}"}
+ # Language is optional, will be inferred
+ codebase = Codebase.from_files(files)
+ assert len(codebase.files) == 2
+ assert any(f.filepath.endswith("index.ts") for f in codebase.files)
+ assert any(f.filepath.endswith("utils.ts") for f in codebase.files)
+ assert any("import { add }" in f.source for f in codebase.files)
+
+
+def test_from_files_empty():
+ """Test creating a codebase with no files raises ValueError"""
+ with pytest.raises(ValueError, match="No files provided"):
+ Codebase.from_files({})
+
+
+def test_from_files_mixed_extensions():
+ """Test files with mixed extensions raises error"""
+ files = {"main.py": "print('hello')", "test.ts": "console.log('world')"}
+ with pytest.raises(ValueError, match="Cannot determine single language from extensions"):
+ Codebase.from_files(files)
+
+
+def test_from_files_typescript_multiple_extensions():
+ """Test TypeScript codebase with various valid extensions"""
+ files = {
+ "index.ts": "console.log('hi')",
+ "component.tsx": "export const App = () =>
Hello
",
+ "utils.js": "module.exports = { add: (a, b) => a + b }",
+ "button.jsx": "export const Button = () => ",
+ }
+ # Language is optional, will be inferred as TypeScript
+ codebase = Codebase.from_files(files)
+ assert len(codebase.files) == 4
+
+
+def test_from_files_explicit_language_mismatch():
+ """Test error when explicit language doesn't match extensions"""
+ files = {"main.py": "print('hello')", "utils.py": "def add(a, b): return a + b"}
+ with pytest.raises(ValueError, match="Provided language.*doesn't match inferred language"):
+ Codebase.from_files(files, language="typescript")
+
+
+def test_from_files_explicit_language_match():
+ """Test explicit language matching file extensions works"""
+ files = {"main.py": "print('hello')", "utils.py": "def add(a, b): return a + b"}
+ codebase = Codebase.from_files(files, language="python")
+ assert len(codebase.files) == 2
diff --git a/tests/unit/codegen/sdk/codebase/session/test_codebase_from_string.py b/tests/unit/codegen/sdk/codebase/session/test_codebase_from_string.py
new file mode 100644
index 000000000..328b318a9
--- /dev/null
+++ b/tests/unit/codegen/sdk/codebase/session/test_codebase_from_string.py
@@ -0,0 +1,78 @@
+import pytest
+
+from codegen.sdk.core.codebase import Codebase
+from codegen.shared.enums.programming_language import ProgrammingLanguage
+
+
+def test_from_string_python():
+ """Test creating a Python codebase from string"""
+ code = """
+def hello():
+ return "world"
+ """
+ codebase = Codebase.from_string(code, language="python")
+ assert len(codebase.files) == 1
+ assert codebase.files[0].filepath.endswith("test.py")
+ assert "def hello" in codebase.files[0].source
+
+
+def test_from_string_typescript():
+ """Test creating a TypeScript codebase from string"""
+ code = """
+function hello(): string {
+ return "world";
+}
+ """
+ codebase = Codebase.from_string(code, language="typescript")
+ assert len(codebase.files) == 1
+ assert codebase.files[0].filepath.endswith("test.ts")
+ assert "function hello" in codebase.files[0].source
+
+
+def test_from_string_with_enum():
+ """Test creating a codebase using ProgrammingLanguage enum"""
+ code = "const x = 42;"
+ codebase = Codebase.from_string(code, language=ProgrammingLanguage.TYPESCRIPT)
+ assert len(codebase.files) == 1
+ assert codebase.files[0].filepath.endswith("test.ts")
+
+
+def test_from_string_invalid_syntax():
+ """Test that invalid syntax is still accepted (parsing happens later)"""
+ code = "this is not valid python"
+ codebase = Codebase.from_string(code, language="python")
+ assert len(codebase.files) == 1
+ assert codebase.files[0].source == code
+
+
+def test_from_string_empty():
+ """Test creating a codebase from empty string"""
+ codebase = Codebase.from_string("", language="python")
+ assert len(codebase.files) == 1
+ assert codebase.files[0].source == ""
+
+
+def test_from_string_missing_language():
+ """Test that language is required"""
+ with pytest.raises(TypeError, match="missing.*required.*argument.*language"):
+ Codebase.from_string("print('hello')")
+
+
+def test_from_string_invalid_language():
+ """Test that invalid language raises error"""
+ with pytest.raises(ValueError):
+ Codebase.from_string("print('hello')", language="invalid")
+
+
+def test_from_string_multifile():
+ """Test that multifile is not supported yet"""
+ code = """
+# file1.py
+def hello(): pass
+
+# file2.py
+def world(): pass
+ """
+ # Still works, just puts everything in one file
+ codebase = Codebase.from_string(code, language="python")
+ assert len(codebase.files) == 1