diff --git a/codeflash/verification/__init__.py b/codeflash/verification/__init__.py
index e69de29bb..9732b6753 100644
--- a/codeflash/verification/__init__.py
+++ b/codeflash/verification/__init__.py
@@ -0,0 +1,31 @@
+"""Verification module for codeflash.
+
+This module provides test running and verification functionality.
+"""
+
+
+def __getattr__(name: str):  # noqa: ANN202
+    """Lazy import for LLM tools to avoid circular imports."""
+    if name in (
+        "AVAILABLE_TOOLS",
+        "RUN_BEHAVIORAL_TESTS_TOOL_SCHEMA",
+        "execute_tool",
+        "get_all_tool_schemas",
+        "get_tool_schema",
+        "run_behavioral_tests_tool",
+    ):
+        from codeflash.verification import llm_tools
+
+        return getattr(llm_tools, name)
+    msg = f"module {__name__!r} has no attribute {name!r}"
+    raise AttributeError(msg)
+
+
+__all__ = [
+    "AVAILABLE_TOOLS",
+    "RUN_BEHAVIORAL_TESTS_TOOL_SCHEMA",
+    "execute_tool",
+    "get_all_tool_schemas",
+    "get_tool_schema",
+    "run_behavioral_tests_tool",
+]
diff --git a/codeflash/verification/llm_tools.py b/codeflash/verification/llm_tools.py
new file mode 100644
index 000000000..960b70309
--- /dev/null
+++ b/codeflash/verification/llm_tools.py
@@ -0,0 +1,321 @@
+"""LLM Tool definitions for verification functions.
+
+This module exposes verification functions as tools that can be called by LLMs.
+Each tool has a JSON schema definition and a simplified wrapper function.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from codeflash.models.models import TestFile, TestFiles, TestType
+from codeflash.verification.parse_test_output import parse_test_xml
+from codeflash.verification.test_runner import run_behavioral_tests
+from codeflash.verification.verification_utils import TestConfig
+
+
+class TestFileInput(BaseModel):
+    """Input schema for a single test file."""
+
+    test_file_path: str = Field(description="Absolute path to the test file to run")
+    test_type: str = Field(
+        default="existing_unit_test",
+        description="Type of test: 'existing_unit_test', 'generated_regression', 'replay_test', or 'concolic_coverage_test'",
+    )
+
+
+class RunBehavioralTestsInput(BaseModel):
+    """Input schema for the run_behavioral_tests tool."""
+
+    test_files: list[TestFileInput] = Field(description="List of test files to run")
+    test_framework: str = Field(default="pytest", description="Test framework to use: 'pytest' or 'unittest'")
+    project_root: str = Field(description="Absolute path to the project root directory")
+    pytest_timeout: int | None = Field(default=30, description="Timeout in seconds for each pytest test")
+    verbose: bool = Field(default=False, description="Enable verbose output")
+
+
+class TestResultOutput(BaseModel):
+    """Output schema for a single test result."""
+
+    test_id: str = Field(description="Unique identifier for the test")
+    test_file: str = Field(description="Path to the test file")
+    test_function: str | None = Field(description="Name of the test function")
+    passed: bool = Field(description="Whether the test passed")
+    runtime_ns: int | None = Field(description="Runtime in nanoseconds, if available")
+    timed_out: bool = Field(description="Whether the test timed out")
+
+
+class RunBehavioralTestsOutput(BaseModel):
+    """Output schema for the run_behavioral_tests tool."""
+
+    success: bool = Field(description="Whether the test run completed successfully")
+    total_tests: int = Field(description="Total number of tests run")
+    passed_tests: int = Field(description="Number of tests that passed")
+    failed_tests: int = Field(description="Number of tests that failed")
+    results: list[TestResultOutput] = Field(description="Detailed results for each test")
+    stdout: str = Field(description="Standard output from the test run")
+    stderr: str = Field(description="Standard error from the test run")
+    error: str | None = Field(default=None, description="Error message if the run failed")
+
+
+# JSON Schema for OpenAI-style function calling
+RUN_BEHAVIORAL_TESTS_TOOL_SCHEMA = {
+    "type": "function",
+    "function": {
+        "name": "run_behavioral_tests",
+        "description": (
+            "Run behavioral tests to verify code correctness. "
+            "This executes test files using pytest or unittest and returns detailed results "
+            "including pass/fail status, runtime information, and any errors encountered."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "test_files": {
+                    "type": "array",
+                    "description": "List of test files to run",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "test_file_path": {
+                                "type": "string",
+                                "description": "Absolute path to the test file to run",
+                            },
+                            "test_type": {
+                                "type": "string",
+                                "enum": [
+                                    "existing_unit_test",
+                                    "generated_regression",
+                                    "replay_test",
+                                    "concolic_coverage_test",
+                                ],
+                                "default": "existing_unit_test",
+                                "description": "Type of test being run",
+                            },
+                        },
+                        "required": ["test_file_path"],
+                    },
+                },
+                "test_framework": {
+                    "type": "string",
+                    "enum": ["pytest", "unittest"],
+                    "default": "pytest",
+                    "description": "Test framework to use",
+                },
+                "project_root": {"type": "string", "description": "Absolute path to the project root directory"},
+                "pytest_timeout": {
+                    "type": "integer",
+                    "default": 30,
+                    "description": "Timeout in seconds for each pytest test",
+                },
+                "verbose": {"type": "boolean", "default": False, "description": "Enable verbose output"},
+            },
+            "required": ["test_files", "project_root"],
+        },
+    },
+}
+
+
+def _test_type_from_string(test_type_str: str) -> TestType:
+    """Convert a string test type to TestType enum."""
+    mapping = {
+        "existing_unit_test": TestType.EXISTING_UNIT_TEST,
+        "generated_regression": TestType.GENERATED_REGRESSION,
+        "replay_test": TestType.REPLAY_TEST,
+        "concolic_test": TestType.CONCOLIC_COVERAGE_TEST,
+        "concolic_coverage_test": TestType.CONCOLIC_COVERAGE_TEST,
+    }
+    return mapping.get(test_type_str.lower(), TestType.EXISTING_UNIT_TEST)
+
+
+def run_behavioral_tests_tool(
+    test_files: list[dict[str, Any]],
+    project_root: str,
+    test_framework: str = "pytest",
+    pytest_timeout: int | None = 30,
+    verbose: bool = False,  # noqa: FBT002, FBT001
+) -> dict[str, Any]:
+    """Run behavioral tests and return results in an LLM-friendly format.
+
+    This is a simplified wrapper around run_behavioral_tests that accepts
+    primitive types suitable for LLM tool calling and returns a structured
+    dictionary response.
+
+    Args:
+        test_files: List of dicts with 'test_file_path' and optional 'test_type'
+        project_root: Absolute path to the project root directory
+        test_framework: Test framework to use ('pytest' or 'unittest')
+        pytest_timeout: Timeout in seconds for each pytest test
+        verbose: Enable verbose output
+
+    Returns:
+        Dictionary containing test results with success status, counts, and details
+
+    Example:
+        >>> result = run_behavioral_tests_tool(
+        ...     test_files=[{"test_file_path": "/path/to/test_example.py"}], project_root="/path/to/project"
+        ... )
+        >>> print(result["passed_tests"], "tests passed")
+
+    """
+    try:
+        project_root_path = Path(project_root).resolve()
+
+        # Build TestFiles structure
+        test_file_objects = []
+        for tf in test_files:
+            test_file_path = Path(tf["test_file_path"]).resolve()
+            test_type_str = tf.get("test_type", "existing_unit_test")
+            test_type = _test_type_from_string(test_type_str)
+
+            test_file_objects.append(
+                TestFile(
+                    instrumented_behavior_file_path=test_file_path,
+                    benchmarking_file_path=test_file_path,
+                    original_file_path=test_file_path,
+                    test_type=test_type,
+                )
+            )
+
+        test_files_model = TestFiles(test_files=test_file_objects)
+
+        # Set up test environment
+        test_env = os.environ.copy()
+        test_env["CODEFLASH_TEST_ITERATION"] = "0"
+        test_env["CODEFLASH_TRACER_DISABLE"] = "1"
+
+        # Ensure PYTHONPATH includes project root
+        if "PYTHONPATH" not in test_env:
+            test_env["PYTHONPATH"] = str(project_root_path)
+        else:
+            test_env["PYTHONPATH"] += os.pathsep + str(project_root_path)
+
+        # Run the tests
+        result_file_path, process, _, _ = run_behavioral_tests(
+            test_paths=test_files_model,
+            test_framework=test_framework,
+            test_env=test_env,
+            cwd=project_root_path,
+            pytest_timeout=pytest_timeout,
+            verbose=verbose,
+        )
+
+        # Create test config for parsing results
+        test_config = TestConfig(
+            tests_root=project_root_path,
+            project_root_path=project_root_path,
+            test_framework=test_framework,
+            tests_project_rootdir=project_root_path,
+        )
+
+        # Parse test results
+        test_results = parse_test_xml(
+            test_xml_file_path=result_file_path,
+            test_files=test_files_model,
+            test_config=test_config,
+            run_result=process,
+        )
+
+        # Clean up result file
+        result_file_path.unlink(missing_ok=True)
+
+        # Build response
+        results_list = []
+        passed_count = 0
+        failed_count = 0
+
+        for result in test_results:
+            passed = result.did_pass
+            if passed:
+                passed_count += 1
+            else:
+                failed_count += 1
+
+            results_list.append(
+                {
+                    "test_id": result.id.id() if result.id else "",
+                    "test_file": str(result.file_name) if result.file_name else "",
+                    "test_function": result.id.test_function_name if result.id else None,
+                    "passed": passed,
+                    "runtime_ns": result.runtime,
+                    "timed_out": result.timed_out or False,
+                }
+            )
+
+        return {
+            "success": True,
+            "total_tests": len(test_results),
+            "passed_tests": passed_count,
+            "failed_tests": failed_count,
+            "results": results_list,
+            "stdout": process.stdout if process.stdout else "",
+            "stderr": process.stderr if process.stderr else "",
+            "error": None,
+        }
+
+    except Exception as e:
+        return {
+            "success": False,
+            "total_tests": 0,
+            "passed_tests": 0,
+            "failed_tests": 0,
+            "results": [],
+            "stdout": "",
+            "stderr": "",
+            "error": str(e),
+        }
+
+
+# Registry of available tools
+AVAILABLE_TOOLS = {
+    "run_behavioral_tests": {"schema": RUN_BEHAVIORAL_TESTS_TOOL_SCHEMA, "function": run_behavioral_tests_tool}
+}
+
+
+def get_tool_schema(tool_name: str) -> dict[str, Any] | None:
+    """Get the JSON schema for a tool by name.
+
+    Args:
+        tool_name: Name of the tool to get schema for
+
+    Returns:
+        JSON schema dict or None if tool not found
+
+    """
+    tool = AVAILABLE_TOOLS.get(tool_name)
+    return tool["schema"] if tool else None
+
+
+def get_all_tool_schemas() -> list[dict[str, Any]]:
+    """Get JSON schemas for all available tools.
+
+    Returns:
+        List of JSON schema dicts for all tools
+
+    """
+    return [tool["schema"] for tool in AVAILABLE_TOOLS.values()]
+
+
+def execute_tool(tool_name: str, **kwargs: Any) -> dict[str, Any]:  # noqa: ANN401
+    """Execute a tool by name with the given arguments.
+
+    Args:
+        tool_name: Name of the tool to execute
+        **kwargs: Arguments to pass to the tool function
+
+    Returns:
+        Tool execution result as a dictionary
+
+    Raises:
+        ValueError: If tool_name is not found
+
+    """
+    tool = AVAILABLE_TOOLS.get(tool_name)
+    if not tool:
+        msg = f"Unknown tool: {tool_name}"
+        raise ValueError(msg)
+    return tool["function"](**kwargs)
diff --git a/tests/test_llm_tools.py b/tests/test_llm_tools.py
new file mode 100644
index 000000000..c12769114
--- /dev/null
+++ b/tests/test_llm_tools.py
@@ -0,0 +1,193 @@
+"""Tests for LLM tools in the verification module."""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from codeflash.verification.llm_tools import (
+    AVAILABLE_TOOLS,
+    RUN_BEHAVIORAL_TESTS_TOOL_SCHEMA,
+    execute_tool,
+    get_all_tool_schemas,
+    get_tool_schema,
+    run_behavioral_tests_tool,
+)
+
+
+def test_run_behavioral_tests_tool_schema_structure():
+    """Test that the tool schema has the correct structure."""
+    schema = RUN_BEHAVIORAL_TESTS_TOOL_SCHEMA
+
+    assert schema["type"] == "function"
+    assert "function" in schema
+    assert schema["function"]["name"] == "run_behavioral_tests"
+    assert "description" in schema["function"]
+    assert "parameters" in schema["function"]
+
+    params = schema["function"]["parameters"]
+    assert params["type"] == "object"
+    assert "test_files" in params["properties"]
+    assert "project_root" in params["properties"]
+    assert "test_framework" in params["properties"]
+    assert "test_files" in params["required"]
+    assert "project_root" in params["required"]
+
+
+def test_get_tool_schema():
+    """Test getting tool schema by name."""
+    schema = get_tool_schema("run_behavioral_tests")
+    assert schema is not None
+    assert schema["function"]["name"] == "run_behavioral_tests"
+
+    # Non-existent tool should return None
+    assert get_tool_schema("non_existent_tool") is None
+
+
+def test_get_all_tool_schemas():
+    """Test getting all tool schemas."""
+    schemas = get_all_tool_schemas()
+    assert isinstance(schemas, list)
+    assert len(schemas) >= 1
+
+    # Check that run_behavioral_tests is in the list
+    names = [s["function"]["name"] for s in schemas]
+    assert "run_behavioral_tests" in names
+
+
+def test_available_tools_registry():
+    """Test that the AVAILABLE_TOOLS registry has correct structure."""
+    assert "run_behavioral_tests" in AVAILABLE_TOOLS
+
+    tool = AVAILABLE_TOOLS["run_behavioral_tests"]
+    assert "schema" in tool
+    assert "function" in tool
+    assert callable(tool["function"])
+
+
+def test_execute_tool_unknown_tool():
+    """Test that execute_tool raises ValueError for unknown tools."""
+    with pytest.raises(ValueError, match="Unknown tool"):
+        execute_tool("non_existent_tool")
+
+
+def test_run_behavioral_tests_tool_pytest():
+    """Test running pytest tests through the LLM tool."""
+    test_code = """
+def add(a, b):
+    return a + b
+
+def test_add():
+    assert add(1, 2) == 3
+    assert add(0, 0) == 0
+    assert add(-1, 1) == 0
+"""
+    # Use repo root for project_root to avoid path resolution issues
+    repo_root = Path(__file__).resolve().parent.parent
+
+    with tempfile.TemporaryDirectory(dir=repo_root) as temp_dir:
+        test_file_path = Path(temp_dir) / "test_example.py"
+        test_file_path.write_text(test_code, encoding="utf-8")
+
+        result = run_behavioral_tests_tool(
+            test_files=[{"test_file_path": str(test_file_path)}],
+            project_root=str(repo_root),
+            test_framework="pytest",
+            pytest_timeout=30,
+        )
+
+        assert result["success"] is True
+        assert result["total_tests"] >= 1
+        assert result["passed_tests"] >= 1
+        assert result["failed_tests"] == 0
+        assert result["error"] is None
+        assert isinstance(result["results"], list)
+
+
+def test_run_behavioral_tests_tool_failing_test():
+    """Test running a failing test through the LLM tool."""
+    test_code = """
+def test_failing():
+    assert 1 == 2, "This test should fail"
+"""
+    # Use repo root for project_root to avoid path resolution issues
+    repo_root = Path(__file__).resolve().parent.parent
+
+    with tempfile.TemporaryDirectory(dir=repo_root) as temp_dir:
+        test_file_path = Path(temp_dir) / "test_failing.py"
+        test_file_path.write_text(test_code, encoding="utf-8")
+
+        result = run_behavioral_tests_tool(
+            test_files=[{"test_file_path": str(test_file_path)}],
+            project_root=str(repo_root),
+            test_framework="pytest",
+            pytest_timeout=30,
+        )
+
+        assert result["success"] is True  # The run completed, even if tests failed
+        assert result["failed_tests"] >= 1
+
+
+def test_run_behavioral_tests_tool_via_execute():
+    """Test running tests through the execute_tool interface."""
+    test_code = """
+def test_simple():
+    assert True
+"""
+    # Use repo root for project_root to avoid path resolution issues
+    repo_root = Path(__file__).resolve().parent.parent
+
+    with tempfile.TemporaryDirectory(dir=repo_root) as temp_dir:
+        test_file_path = Path(temp_dir) / "test_simple.py"
+        test_file_path.write_text(test_code, encoding="utf-8")
+
+        result = execute_tool(
+            "run_behavioral_tests",
+            test_files=[{"test_file_path": str(test_file_path)}],
+            project_root=str(repo_root),
+        )
+
+        assert result["success"] is True
+        assert result["error"] is None
+
+
+def test_run_behavioral_tests_tool_invalid_path():
+    """Test handling of invalid test file path."""
+    # Use repo root for project_root
+    repo_root = Path(__file__).resolve().parent.parent
+
+    result = run_behavioral_tests_tool(
+        test_files=[{"test_file_path": "/non/existent/test_file.py"}],
+        project_root=str(repo_root),
+        test_framework="pytest",
+    )
+
+    # Should complete but with no tests found
+    assert result["success"] is True
+    assert result["total_tests"] == 0
+
+
+def test_run_behavioral_tests_tool_with_test_type():
+    """Test specifying test type."""
+    test_code = """
+def test_with_type():
+    assert True
+"""
+    # Use repo root for project_root to avoid path resolution issues
+    repo_root = Path(__file__).resolve().parent.parent
+
+    with tempfile.TemporaryDirectory(dir=repo_root) as temp_dir:
+        test_file_path = Path(temp_dir) / "test_typed.py"
+        test_file_path.write_text(test_code, encoding="utf-8")
+
+        result = run_behavioral_tests_tool(
+            test_files=[
+                {
+                    "test_file_path": str(test_file_path),
+                    "test_type": "existing_unit_test",
+                }
+            ],
+            project_root=str(repo_root),
+        )
+
+        assert result["success"] is True