refactor folder structure and update prompt

codeintegrity-ai · Jul 11, 2024 · f81412d · f81412d
1 parent b0ab5fa
commit f81412d
Show file tree

Hide file tree

Showing 35 changed files with 136 additions and 253 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1 @@
-include src/mutahunter/core/pilot/aider/queries/*.scm
+include src/mutahunter/core/queries/*.scm
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ Mutahunter uses LLM models to inject context-aware faults into your codebase. Th
 
 ## Features
 
-- **Extreme Mutation Testing:** Leverages language agnostic [TreeSitter](https://tree-sitter.github.io/) parser to apply extreme mutations to the codebase without using LLMs. [Research](https://arxiv.org/abs/2103.08480) shows that this approach is effective at detecting pseudo-tested methods with significantly lower computational cost. Currently supports Python, Java, JavaScript, and Go. Check the [scheme files](/src/mutahunter/core/pilot/aider/queries/) to see the supported operators. We welcome contributions to add more operators and languages.
+- **Extreme Mutation Testing:** Leverages language agnostic [TreeSitter](https://tree-sitter.github.io/) parser to apply extreme mutations to the codebase without using LLMs. [Research](https://arxiv.org/abs/2103.08480) shows that this approach is effective at detecting pseudo-tested methods with significantly lower computational cost. Currently supports Python, Java, JavaScript, and Go. Check the [scheme files](/src/mutahunter/core/queries/) to see the supported operators. We welcome contributions to add more operators and languages.
 - **LLM Context-aware Mutations:** Utilizes LLM models to generate context-aware mutants. [Research](https://arxiv.org/abs/2406.09843) indicates that LLM-generated mutants have higher fault detection potential, fewer equivalent mutants, and higher coupling and semantic similarity to real faults. It uses a map of your entire git repository to generate contextually relevant mutants using [aider's repomap](https://aider.chat/docs/repomap.html). Supports self-hosted LLMs, Anthropic, OpenAI, and any LLM models via [LiteLLM](https://github.com/BerriAI/litellm).
 - **Change-Based Testing:** Runs mutation tests on modified files and lines based on the latest commit or pull request changes, ensuring that only relevant parts of the code are tested.
 - **Language Agnostic:** Compatible with languages that provide coverage reports in Cobertura XML, Jacoco XML, and lcov formats. Extensible to additional languages and testing frameworks.
@@ -172,7 +172,6 @@ jobs:
 
 Help us improve Mutahunter and get rewarded! We have a cash bounty program to incentivize contributions to the project. Check out the [bounty board](https://docs.google.com/spreadsheets/d/1cT2_O55m5txrUgZV81g1gtqE_ZDu9LlzgbpNa_HIisc/edit?gid=0#gid=0) to see the available bounties and claim one today!
 
-
 ## Acknowledgements
 
 Mutahunter makes use of the following open-source libraries:

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,12 +25,10 @@ dependencies = [
     'scipy',
     'setuptools',
     'jinja2',
-    'litellm'
+    'litellm',
 ]
 
-authors = [
-    { name = "Steven Jung", email = "jungsteven@codeintegrity.ai" },
-]
+authors = [{ name = "Steven Jung", email = "jungsteven@codeintegrity.ai" }]
 
 [project.optional-dependencies]
 dev = ['isort', 'black']
@@ -40,4 +38,4 @@ testing = ['pytest']
 mutahunter = "mutahunter.main:run"
 
 [tool.setuptools.package-data]
-mutahunter = ['src/mutahunter/core/pilot/aider/queries/*.scm']
+mutahunter = ['src/mutahunter/core/queries/*.scm']
diff --git a/src/mutahunter/core/analyzer.py b/src/mutahunter/core/analyzer.py
@@ -334,7 +334,7 @@ def _load_query_scm(self, lang: str) -> str:
         """
         try:
             scm_fname = resources.files(__package__).joinpath(
-                "pilot", "aider", "queries", f"tree-sitter-{lang}-tags.scm"
+                "queries", f"tree-sitter-{lang}-tags.scm"
             )
         except KeyError:
             return ""

diff --git a/src/mutahunter/core/mutator.py b/src/mutahunter/core/mutator.py
@@ -6,8 +6,8 @@
 from jinja2 import Template
 
 from mutahunter.core.logger import logger
-from mutahunter.core.pilot.aider.repomap import RepoMap
-from mutahunter.core.pilot.prompts.factory import PromptFactory
+from mutahunter.core.repomap import RepoMap
+from mutahunter.core.prompts.factory import PromptFactory
 
 
 class MutationStrategy:
@@ -42,15 +42,12 @@ def generate_mutations(
         ):
             start_byte = function_block.start_byte
             end_byte = function_block.end_byte
-            function_name = function_block.child_by_field_name("name").text.decode(
-                "utf8"
-            )
+
             mutant_generator = MutantGenerator(
                 config=hunter.config,
                 executed_lines=executed_lines,
                 cov_files=list(hunter.analyzer.file_lines_executed.keys()),
                 source_file_path=file_path,
-                function_name=function_name,
                 start_byte=start_byte,
                 end_byte=end_byte,
                 router=hunter.router,
@@ -96,7 +93,6 @@ def __init__(
         executed_lines,
         cov_files,
         source_file_path,  # file_path for the source code
-        function_name,
         start_byte,
         end_byte,
         router,
@@ -105,7 +101,6 @@ def __init__(
         self.executed_lines = executed_lines
         self.cov_files = cov_files
         self.source_file_path = source_file_path
-        self.function_name = function_name
         self.start_byte = start_byte
         self.end_byte = end_byte
         self.router = router
@@ -121,22 +116,36 @@ def get_function_block_source_code(self):
         return src_code[self.start_byte : self.end_byte].decode("utf-8")
 
     def generate_mutant(self, repo_map_result):
-        system_template = Template(self.prompt.system_prompt).render()
+        # add line number for each line of code
+        function_block_with_line_num = "\n".join(
+            [
+                f"{i + 1} {line}"
+                for i, line in enumerate(self.function_block_source_code.splitlines())
+            ]
+        )
+        system_template = Template(self.prompt.system_prompt).render(
+            language=self.language
+        )
         user_template = Template(self.prompt.user_prompt).render(
             language=self.language,
-            covered_lines=self.executed_lines,
             ast=repo_map_result,
-            function_name=self.function_name,
+            covered_lines=self.executed_lines,
             example_output=self.prompt.example_output,
-            function_block=self.function_block_source_code,
+            function_block=function_block_with_line_num,
+            maximum_num_of_mutants_per_function_block=3,
         )
         prompt = {
             "system": system_template,
             "user": user_template,
         }
+        print("system_template:", system_template)
+        print("user_template:", user_template)
+
+        exit()
         model_response, _, _ = self.router.generate_response(
             prompt=prompt, streaming=True
         )
+        # print("model_response", model_response)
         return model_response
 
     def generate(self):
@@ -150,6 +159,7 @@ def generate(self):
         ai_reply = self.generate_mutant(repo_map_result)
         mutation_info = self.extract_json_from_reply(ai_reply)
         changes = mutation_info["changes"]
+        print("changes:", len(changes))
         original_lines = self.function_block_source_code.splitlines(keepends=True)
         for change in changes:
             original_line = change["original_line"]

diff --git a/src/mutahunter/core/pilot/aider/queries/__init__.py b/src/mutahunter/core/pilot/aider/queries/__init__.py
diff --git a/src/mutahunter/core/pilot/prompts/__init__.py b/src/mutahunter/core/pilot/prompts/__init__.py
diff --git a/src/mutahunter/core/pilot/prompts/system.py b/src/mutahunter/core/pilot/prompts/system.py
diff --git a/src/mutahunter/core/pilot/prompts/user.py b/src/mutahunter/core/pilot/prompts/user.py
diff --git a/src/mutahunter/core/pilot/__init__.py → src/mutahunter/core/prompts/__init__.py b/src/mutahunter/core/pilot/__init__.py → src/mutahunter/core/prompts/__init__.py
diff --git a/...mutahunter/core/pilot/prompts/examples.py → src/mutahunter/core/prompts/examples.py b/...mutahunter/core/pilot/prompts/examples.py → src/mutahunter/core/prompts/examples.py
diff --git a/src/mutahunter/core/pilot/prompts/factory.py → src/mutahunter/core/prompts/factory.py b/src/mutahunter/core/pilot/prompts/factory.py → src/mutahunter/core/prompts/factory.py
@@ -2,12 +2,14 @@
 Module for generating prompts based on the programming language.
 """
 
-from mutahunter.core.pilot.prompts.examples import (GO_EXAMPLE_OUTPUT,
-                                                    JAVA_EXAMPLE_OUTPUT,
-                                                    JAVASCRIPT_EXAMPLE_OUTPUT,
-                                                    PYTHON_EXAMPLE_OUTPUT)
-from mutahunter.core.pilot.prompts.system import SYSTEM_PROMPT
-from mutahunter.core.pilot.prompts.user import USER_PROMPT
+from mutahunter.core.prompts.examples import (
+    GO_EXAMPLE_OUTPUT,
+    JAVA_EXAMPLE_OUTPUT,
+    JAVASCRIPT_EXAMPLE_OUTPUT,
+    PYTHON_EXAMPLE_OUTPUT,
+)
+from mutahunter.core.prompts.system import SYSTEM_PROMPT
+from mutahunter.core.prompts.user import USER_PROMPT
 
 
 class PromptFactory:

diff --git a/src/mutahunter/core/prompts/system.py b/src/mutahunter/core/prompts/system.py
@@ -0,0 +1,4 @@
+SYSTEM_PROMPT = """
+# Context:
+You are an AI Agent named Mutanthunter, part of the Software Quality Assurance Team. Your task is to mutate the {{language}} code provided to you. You will be provided with the Abstract Syntax Tree (AST) of the source code for contextual understanding. This AST will help you understand the entire source code. Make sure to read the AST before proceeding with the mutation. 
+"""
diff --git a/src/mutahunter/core/prompts/user.py b/src/mutahunter/core/prompts/user.py
@@ -0,0 +1,30 @@
+USER_PROMPT = """
+## Abstract Syntax Tree (AST) for Context
+```ast
+{{ast}}
+```
+
+## Response
+The output must be in JSON format, wrapped in triple backticks (json...), and adhere to the following Pydantic definitions.
+```
+class SingleMutant(BaseModel):
+    type: str = Field(description="The type of the mutation operator.(e.g., Off-by-One Error, Boundary Condition, Arithmetic, Block removal, Relational Operator, etc.)")
+    description: str = Field(description="Description of the mutation.")
+    context_before: str = Field(description="Line of code context before the mutation.")
+    original_line: str = Field(description="The original line of code before mutation.")
+    mutated_line: str = Field(description="The line of code after mutation, including a comment with the mutation description.")
+    context_after: str = Field(description="Line of code context after the mutation.")
+
+class Mutants(BaseModel):
+    changes: List[Change] = Field(description="A list of changes representing the mutants.")
+```
+
+## Function Block to Mutate
+Lines Covered: {{covered_lines}}. Only mutate lines that are covered by execution.
+Note that we have manually added line numbers for each line of code. Do not include line numbers in your mutation.
+```{{language}}
+{{function_block}}
+```
+
+Generate 1~{{maximum_num_of_mutants_per_function_block}} mutants for the function block provided to you. Ensure that the mutants are semantically different from the original code. Focus on critical areas such as error handling, boundary conditions, and logical branches.
+"""
diff --git a/src/mutahunter/core/pilot/aider/__init__.py → src/mutahunter/core/queries/__init__.py b/src/mutahunter/core/pilot/aider/__init__.py → src/mutahunter/core/queries/__init__.py
diff --git a/...ilot/aider/queries/tree-sitter-c-tags.scm → ...unter/core/queries/tree-sitter-c-tags.scm b/...ilot/aider/queries/tree-sitter-c-tags.scm → ...unter/core/queries/tree-sitter-c-tags.scm
diff --git a/...ider/queries/tree-sitter-c_sharp-tags.scm → ...core/queries/tree-sitter-c_sharp-tags.scm b/...ider/queries/tree-sitter-c_sharp-tags.scm → ...core/queries/tree-sitter-c_sharp-tags.scm
diff --git a/...ot/aider/queries/tree-sitter-cpp-tags.scm → ...ter/core/queries/tree-sitter-cpp-tags.scm b/...ot/aider/queries/tree-sitter-cpp-tags.scm → ...ter/core/queries/tree-sitter-cpp-tags.scm
diff --git a/.../aider/queries/tree-sitter-elisp-tags.scm → ...r/core/queries/tree-sitter-elisp-tags.scm b/.../aider/queries/tree-sitter-elisp-tags.scm → ...r/core/queries/tree-sitter-elisp-tags.scm
diff --git a/...aider/queries/tree-sitter-elixir-tags.scm → .../core/queries/tree-sitter-elixir-tags.scm b/...aider/queries/tree-sitter-elixir-tags.scm → .../core/queries/tree-sitter-elixir-tags.scm
diff --git a/...ot/aider/queries/tree-sitter-elm-tags.scm → ...ter/core/queries/tree-sitter-elm-tags.scm b/...ot/aider/queries/tree-sitter-elm-tags.scm → ...ter/core/queries/tree-sitter-elm-tags.scm
diff --git a/...lot/aider/queries/tree-sitter-go-tags.scm → ...nter/core/queries/tree-sitter-go-tags.scm b/...lot/aider/queries/tree-sitter-go-tags.scm → ...nter/core/queries/tree-sitter-go-tags.scm
diff --git a/...t/aider/queries/tree-sitter-java-tags.scm → ...er/core/queries/tree-sitter-java-tags.scm b/...t/aider/queries/tree-sitter-java-tags.scm → ...er/core/queries/tree-sitter-java-tags.scm
diff --git a/...r/queries/tree-sitter-javascript-tags.scm → ...e/queries/tree-sitter-javascript-tags.scm b/...r/queries/tree-sitter-javascript-tags.scm → ...e/queries/tree-sitter-javascript-tags.scm
diff --git a/.../aider/queries/tree-sitter-ocaml-tags.scm → ...r/core/queries/tree-sitter-ocaml-tags.scm b/.../aider/queries/tree-sitter-ocaml-tags.scm → ...r/core/queries/tree-sitter-ocaml-tags.scm
diff --git a/...ot/aider/queries/tree-sitter-php-tags.scm → ...ter/core/queries/tree-sitter-php-tags.scm b/...ot/aider/queries/tree-sitter-php-tags.scm → ...ter/core/queries/tree-sitter-php-tags.scm
diff --git a/...aider/queries/tree-sitter-python-tags.scm → .../core/queries/tree-sitter-python-tags.scm b/...aider/queries/tree-sitter-python-tags.scm → .../core/queries/tree-sitter-python-tags.scm
diff --git a/...lot/aider/queries/tree-sitter-ql-tags.scm → ...nter/core/queries/tree-sitter-ql-tags.scm b/...lot/aider/queries/tree-sitter-ql-tags.scm → ...nter/core/queries/tree-sitter-ql-tags.scm
diff --git a/...t/aider/queries/tree-sitter-ruby-tags.scm → ...er/core/queries/tree-sitter-ruby-tags.scm b/...t/aider/queries/tree-sitter-ruby-tags.scm → ...er/core/queries/tree-sitter-ruby-tags.scm
diff --git a/...t/aider/queries/tree-sitter-rust-tags.scm → ...er/core/queries/tree-sitter-rust-tags.scm b/...t/aider/queries/tree-sitter-rust-tags.scm → ...er/core/queries/tree-sitter-rust-tags.scm
diff --git a/...r/queries/tree-sitter-typescript-tags.scm → ...e/queries/tree-sitter-typescript-tags.scm b/...r/queries/tree-sitter-typescript-tags.scm → ...e/queries/tree-sitter-typescript-tags.scm
diff --git a/src/mutahunter/core/pilot/aider/repomap.py → src/mutahunter/core/repomap.py b/src/mutahunter/core/pilot/aider/repomap.py → src/mutahunter/core/repomap.py
diff --git a/src/mutahunter/core/report.py b/src/mutahunter/core/report.py
@@ -100,21 +100,20 @@ def _format_summary(self, data: dict, total_cost: float, line_rate: float) -> st
             str: Formatted summary report.
         """
         line_coverage = f"{line_rate * 100:.2f}%"
-        summary = [
-            "Mutation Coverage:",
-            f"📊 Line Coverage: {line_coverage} 📊",
-            f"🎯 Mutation Coverage: {data['mutation_coverage']} 🎯",
-            f"🦠 Total Mutants: {data['total_mutants']} 🦠",
-            f"🛡️ Survived Mutants: {data['survived_mutants']} 🛡️",
-            f"🗡️ Killed Mutants: {data['killed_mutants']} 🗡️",
-            f"🕒 Timeout Mutants: {data['timeout_mutants']} 🕒",
-            f"🔥 Compile Error Mutants: {data['compile_error_mutants']} 🔥",
-        ]
+        details = []
+        details.append("📊 Overall Mutation Coverage 📊")
+        details.append(f"📈 Line Coverage: {line_coverage} 📈")
+        details.append(f"🎯 Mutation Coverage: {data['mutation_coverage']} 🎯")
+        details.append(f"🦠 Total Mutants: {data['total_mutants']} 🦠")
+        details.append(f"🛡️ Survived Mutants: {data['survived_mutants']} 🛡️")
+        details.append(f"🗡️ Killed Mutants: {data['killed_mutants']} 🗡️")
+        details.append(f"🕒 Timeout Mutants: {data['timeout_mutants']} 🕒")
+        details.append(f"🔥 Compile Error Mutants: {data['compile_error_mutants']} 🔥")
         if self.config.extreme:
-            summary.append("💰 No Cost for extreme mutation testing 💰")
+            details.append("💰 No Cost for extreme mutation testing 💰")
         else:
-            summary.append(f"💰 Expected Cost: ${total_cost:.5f} USD 💰")
-        return "\n".join(summary)
+            details.append(f"💰 Expected Cost: ${total_cost:.5f} USD 💰")
+        return "\n".join(details)
 
     def _generate_detailed_report(self, mutants: List[dict]) -> None:
         """
@@ -125,7 +124,7 @@ def _generate_detailed_report(self, mutants: List[dict]) -> None:
         """
         report_detail = self._compute_detailed_data(mutants)
         detailed_text = self._format_detailed_report(report_detail)
-        self._log_and_write("\nDetailed Mutation Coverage:\n" + detailed_text)
+        self._log_and_write(detailed_text)
 
     def _compute_detailed_data(self, mutants: List[dict]) -> dict:
         """
@@ -181,7 +180,7 @@ def _format_detailed_report(self, report_detail: dict) -> str:
         Returns:
             str: Formatted detailed report.
         """
-        details = []
+        details = ["📂 Detailed Mutation Coverage 📂"]
         for source_path, detail in report_detail.items():
             details.append(f"📂 Source File: {source_path} 📂")
             details.append(f"🎯 Mutation Coverage: {detail['mutation_coverage']} 🎯")

diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py
@@ -373,69 +373,3 @@ def test_find_function_blocks_nodes(
     mock_find_blocks_nodes.assert_called_once_with(
         source_file_path, source_code, ["definition.function", "definition.method"]
     )
-
-
-# @patch("xml.etree.ElementTree.parse")
-# @patch("mutahunter.core.analyzer.filename_to_lang", return_value="python")
-# @patch("mutahunter.core.analyzer.get_parser")
-# @patch("mutahunter.core.analyzer.get_language")
-# @patch(
-#     "mutahunter.core.analyzer.Analyzer._load_query_scm",
-#     return_value="(function_definition) @definition.function",
-# )
-# def test_find_blocks_nodes(
-#     mock_load_query_scm,
-#     mock_get_language,
-#     mock_get_parser,
-#     mock_filename_to_lang,
-#     mock_parse,
-#     config,
-#     cobertura_xml_content,
-# ):
-#     source_code = b"def foo():\n    pass"
-#     source_file_path = "test_file.py"
-#     tags = ["definition.function", "definition.method"]
-#     mock_parse.return_value = ET.ElementTree(ET.fromstring(cobertura_xml_content))
-
-#     mock_parser = mock_get_parser.return_value
-#     mock_language = mock_get_language.return_value
-#     mock_tree = Mock()
-#     mock_parser.parse.return_value = mock_tree
-#     mock_query = Mock()
-#     mock_query.captures.return_value = [(Mock(), "definition.function")]
-#     mock_language.query.return_value = mock_query
-
-#     analyzer = Analyzer(config)
-#     result = analyzer._find_blocks_nodes(source_file_path, source_code, tags)
-
-#     # Assertions
-#     assert len(result) == 1
-#     mock_get_parser.assert_called_once_with("python")
-#     mock_get_language.assert_called_once_with("python")
-#     mock_parser.parse.assert_called_once_with(source_code)
-#     mock_language.query.assert_called_once_with(
-#         "(function_definition) @definition.function"
-#     )
-#     mock_query.captures.assert_called_once_with(mock_tree.root_node)
-
-
-# @patch("xml.etree.ElementTree.parse")
-# @patch("importlib.resources.files")
-# def test_load_query_scm(mock_resources, mock_parse, config, cobertura_xml_content):
-#     lang = "python"
-#     scm_content = "(function_definition) @definition.function"
-#     mock_parse.return_value = ET.ElementTree(ET.fromstring(cobertura_xml_content))
-
-#     mock_path = Mock()
-#     mock_path.exists.return_value = True
-#     mock_path.read_text.return_value = scm_content
-#     mock_resources.return_value.joinpath.return_value = mock_path
-
-#     analyzer = Analyzer(config)
-#     result = analyzer._load_query_scm(lang)
-
-#     # Assertions
-#     assert result == scm_content
-#     mock_resources.return_value.joinpath.assert_called_once_with(
-#         "pilot", "aider", "queries", f"tree-sitter-{lang}-tags.scm"
-#     )