# Mutation Injection Notebook

This notebook analyzes all JSON files in the `results` folder and, for each element in `results`, verifies whether mutation testing fields (`mutation_score_percent`, `mutation_killed`, `mutation_survived`) are present and non-null.

If they are missing or null, mutation testing is executed via `mutmut_runner` and the results are updated in the JSON file.

In [1]:
import json
import os
import sys
from pathlib import Path

# Aggiungi la cartella src al path per importare i moduli
sys.path.insert(0, str(Path.cwd() / "src"))

from utils.mutmut_runner import get_mutation_metrics

In [2]:
# Define the folders
RESULTS_DIR = Path.cwd() / "results"
INPUT_CODE_DIR = Path.cwd() / "data" / "input_code"
OUTPUT_TESTS_DIR = Path.cwd() / "data" / "output_tests"

print(f"Results directory: {RESULTS_DIR}")
print(f"Input code directory: {INPUT_CODE_DIR}")
print(f"Output tests directory: {OUTPUT_TESTS_DIR}")

Results directory: /Users/luigi/Documents/MSc-PoliTO/II_anno/LLM4SE/LLM-Agents-for-Collaborative-Test-Case/results
Input code directory: /Users/luigi/Documents/MSc-PoliTO/II_anno/LLM4SE/LLM-Agents-for-Collaborative-Test-Case/data/input_code
Output tests directory: /Users/luigi/Documents/MSc-PoliTO/II_anno/LLM4SE/LLM-Agents-for-Collaborative-Test-Case/data/output_tests


In [3]:
def needs_mutation_testing(result: dict) -> bool:
    """
    Checks if a result needs mutation testing.
    Returns True if mutation_score_percent, mutation_killed, mutation_survived fields
    don't exist or are null.
    """
    metrics = result.get("metrics", {})

    mutation_fields = ["mutation_score_percent", "mutation_killed", "mutation_survived"]

    for field in mutation_fields:
        if field not in metrics or metrics[field] is None:
            return True

    return False


def get_test_file_path(run_id: str, source_file: str) -> Path:
    """
    Builds the path of the corresponding test file.
    """
    # The test file name is test_<source_name>.py
    test_file_name = f"test_{source_file}"
    test_file_path = OUTPUT_TESTS_DIR / run_id / test_file_name
    return test_file_path


def process_json_file(json_path: Path) -> bool:
    """
    Processes a single JSON file, running mutation testing where necessary.
    Returns True if the file was modified.
    """
    print(f"\n{'='*60}")
    print(f"Processing: {json_path.name}")
    print(f"{'='*60}")

    # Load the JSON
    with open(json_path, "r") as f:
        data = json.load(f)

    run_id = data.get("run_id", "")
    results = data.get("results", [])

    modified = False

    for i, result in enumerate(results):
        source_file = result.get("file", "")
        status = result.get("status", "")

        print(f"\n[{i+1}/{len(results)}] {source_file}")

        # Skip if status is not success
        if status != "success":
            print(f"  ‚è≠Ô∏è  Skipped (status: {status})")
            continue

        # Check if mutation testing is needed
        if not needs_mutation_testing(result):
            metrics = result.get("metrics", {})
            print(f"  ‚úÖ Mutation data already present:")
            print(f"     Score: {metrics.get('mutation_score_percent')}%")
            print(f"     Killed: {metrics.get('mutation_killed')}")
            print(f"     Survived: {metrics.get('mutation_survived')}")
            continue

        # Build paths
        source_path = INPUT_CODE_DIR / source_file
        test_path = get_test_file_path(run_id, source_file)

        # Verify that files exist
        if not source_path.exists():
            print(f"  ‚ùå Source file not found: {source_path}")
            continue

        if not test_path.exists():
            print(f"  ‚ùå Test file not found: {test_path}")
            continue

        print(f"  üî¨ Running mutation testing...")
        print(f"     Source: {source_path}")
        print(f"     Test: {test_path}")

        # Run mutation testing
        mutation_metrics = get_mutation_metrics(str(source_path), str(test_path))

        if mutation_metrics is None:
            print(f"  ‚ö†Ô∏è  Mutation testing failed or timed out")
            # Set null values in case of failure
            if "metrics" not in result:
                result["metrics"] = {}
            result["metrics"]["mutation_score_percent"] = None
            result["metrics"]["mutation_killed"] = None
            result["metrics"]["mutation_survived"] = None
        else:
            print(f"  ‚úÖ Mutation testing completed:")
            print(f"     Score: {mutation_metrics['mutation_score_percent']}%")
            print(f"     Killed: {mutation_metrics['mutation_killed']}")
            print(f"     Survived: {mutation_metrics['mutation_survived']}")

            # Update the metrics
            if "metrics" not in result:
                result["metrics"] = {}
            result["metrics"].update(mutation_metrics)

        modified = True

    # Save JSON if modified
    if modified:
        with open(json_path, "w") as f:
            json.dump(data, f, indent=4)
        print(f"\nüíæ File saved: {json_path.name}")
    else:
        print(f"\nüìÑ No changes needed for: {json_path.name}")

    return modified

In [4]:
# Find all JSON files in the results folder
json_files = list(RESULTS_DIR.glob("*.json"))
print(f"Found {len(json_files)} JSON files in results folder:")
for f in json_files:
    print(f"  - {f.name}")


# Process all JSON files
modified_count = 0

for json_file in json_files:
    try:
        if process_json_file(json_file):
            modified_count += 1
    except Exception as e:
        print(f"\n‚ùå Error processing {json_file.name}: {e}")

print(f"\n{'='*60}")
print(f"SUMMARY")
print(f"{'='*60}")
print(f"Total files processed: {len(json_files)}")
print(f"Files modified: {modified_count}")

Found 23 JSON files in results folder:
  - collaborative_llamaScout17B_gptoss20B_2026-01-20T12-49-15.json
  - competitive_gptoss120B_gptoss120B_llama70B_2026-01-21T18-00-33.json
  - single_gptoss120B_2026-01-20T12-18-59.json
  - competitive_llama70B_gptoss20B_llamaScout17B_2026-01-21T18-06-16.json
  - collaborative_llama70B_llamaScout17B_2026-01-19T20-29-36.json
  - collaborative_gptoss20B_gptoss120B_2026-01-18T20-11-18.json
  - collaborative_gptoss20B_llamaScout17B_2026-01-18T17-21-50.json
  - collaborative_gptoss120B_llama70B_2026-01-23T13-03-04.json
  - competitive_llamaScout17B_gptoss20B_llamaScout17B_2026-01-19T09-31-01.json
  - collaborative_llama70B_gptoss120B_2026-01-18T17-37-57.json
  - single_llamaScout17B_2026-01-18T11-02-55.json
  - competitive_llama70B_gptoss120B_llama70B_2026-01-19T20-23-18.json
  - single_llama70B_2026-01-18T17-44-42.json
  - collaborative_gptoss20B_gptoss20B_2026-01-23T12-09-03.json
  - collaborative_llama70B_llama70B_2026-01-21T17-51-51.json
  - compet